Galois
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BufferedGraph.h
Go to the documentation of this file.
1 /*
2  * This file belongs to the Galois project, a C++ library for exploiting
3  * parallelism. The code is being released under the terms of the 3-Clause BSD
4  * License (a copy is located in LICENSE.txt at the top-level directory).
5  *
6  * Copyright (C) 2018, The University of Texas at Austin. All rights reserved.
7  * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS
8  * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY,
9  * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF
10  * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF
11  * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH
12  * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances
13  * shall University be liable for incidental, special, indirect, direct or
14  * consequential damages or loss of profits, interruption of business, or
15  * related expenses which may arise from use of Software or Documentation,
16  * including but not limited to those resulting from defects in Software and/or
17  * Documentation, or loss or inaccuracy of data of any kind.
18  */
19 
26 #ifndef GALOIS_GRAPHS_BUFGRAPH_H
27 #define GALOIS_GRAPHS_BUFGRAPH_H
28 
29 #include <fstream>
30 
31 #include <boost/iterator/counting_iterator.hpp>
32 
33 #include "galois/config.h"
34 #include "galois/gIO.h"
35 #include "galois/Reduction.h"
36 
37 namespace galois {
38 namespace graphs {
39 
48 template <typename EdgeDataType>
50 private:
51  // buffers that you load data into
53  uint64_t* outIndexBuffer = nullptr;
55  uint32_t* edgeDestBuffer = nullptr;
57  EdgeDataType* edgeDataBuffer = nullptr;
58 
60  uint32_t globalSize = 0;
62  uint64_t globalEdgeSize = 0;
63 
65  uint32_t numLocalNodes = 0;
67  uint64_t numLocalEdges = 0;
68 
71  uint64_t nodeOffset = 0;
74  uint64_t edgeOffset = 0;
76  bool graphLoaded = false;
77 
78  // accumulators for tracking bytes read
80  galois::GAccumulator<uint64_t> numBytesReadOutIndex;
82  galois::GAccumulator<uint64_t> numBytesReadEdgeDest;
84  galois::GAccumulator<uint64_t> numBytesReadEdgeData;
85 
94  void loadOutIndex(std::ifstream& graphFile, uint64_t nodeStart,
95  uint64_t numNodesToLoad) {
96  if (numNodesToLoad == 0) {
97  return;
98  }
99  assert(outIndexBuffer == nullptr);
100  outIndexBuffer = (uint64_t*)malloc(sizeof(uint64_t) * numNodesToLoad);
101 
102  if (outIndexBuffer == nullptr) {
103  GALOIS_DIE("Failed to allocate memory for out index buffer.");
104  }
105 
106  // position to start of contiguous chunk of nodes to read
107  uint64_t readPosition = (4 + nodeStart) * sizeof(uint64_t);
108  graphFile.seekg(readPosition);
109 
110  uint64_t numBytesToLoad = numNodesToLoad * sizeof(uint64_t);
111  uint64_t bytesRead = 0;
112 
113  while (numBytesToLoad > 0) {
114  graphFile.read(((char*)this->outIndexBuffer) + bytesRead, numBytesToLoad);
115  size_t numRead = graphFile.gcount();
116  numBytesToLoad -= numRead;
117  bytesRead += numRead;
118  }
119 
120  assert(numBytesToLoad == 0);
121 
122  nodeOffset = nodeStart;
123  }
124 
134  void loadEdgeDest(std::ifstream& graphFile, uint64_t edgeStart,
135  uint64_t numEdgesToLoad, uint64_t numGlobalNodes) {
136  if (numEdgesToLoad == 0) {
137  return;
138  }
139 
140  assert(edgeDestBuffer == nullptr);
141  edgeDestBuffer = (uint32_t*)malloc(sizeof(uint32_t) * numEdgesToLoad);
142 
143  if (edgeDestBuffer == nullptr) {
144  GALOIS_DIE("Failed to allocate memory for edge dest buffer.");
145  }
146 
147  // position to start of contiguous chunk of edges to read
148  uint64_t readPosition = (4 + numGlobalNodes) * sizeof(uint64_t) +
149  (sizeof(uint32_t) * edgeStart);
150  graphFile.seekg(readPosition);
151 
152  uint64_t numBytesToLoad = numEdgesToLoad * sizeof(uint32_t);
153  uint64_t bytesRead = 0;
154  while (numBytesToLoad > 0) {
155  graphFile.read(((char*)this->edgeDestBuffer) + bytesRead, numBytesToLoad);
156  size_t numRead = graphFile.gcount();
157  numBytesToLoad -= numRead;
158  bytesRead += numRead;
159  }
160 
161  assert(numBytesToLoad == 0);
162  // save edge offset of this graph for later use
163  edgeOffset = edgeStart;
164  }
165 
178  template <
179  typename EdgeType,
180  typename std::enable_if<!std::is_void<EdgeType>::value>::type* = nullptr>
181  void loadEdgeData(std::ifstream& graphFile, uint64_t edgeStart,
182  uint64_t numEdgesToLoad, uint64_t numGlobalNodes,
183  uint64_t numGlobalEdges) {
184  galois::gDebug("Loading edge data");
185 
186  if (numEdgesToLoad == 0) {
187  return;
188  }
189 
190  assert(edgeDataBuffer == nullptr);
191  edgeDataBuffer =
192  (EdgeDataType*)malloc(sizeof(EdgeDataType) * numEdgesToLoad);
193 
194  if (edgeDataBuffer == nullptr) {
195  GALOIS_DIE("Failed to allocate memory for edge data buffer.");
196  }
197 
198  // position after nodes + edges
199  uint64_t baseReadPosition = (4 + numGlobalNodes) * sizeof(uint64_t) +
200  (sizeof(uint32_t) * numGlobalEdges);
201 
202  // version 1 padding TODO make version agnostic
203  if (numGlobalEdges % 2) {
204  baseReadPosition += sizeof(uint32_t);
205  }
206 
207  // jump to first byte of edge data
208  uint64_t readPosition =
209  baseReadPosition + (sizeof(EdgeDataType) * edgeStart);
210  graphFile.seekg(readPosition);
211  uint64_t numBytesToLoad = numEdgesToLoad * sizeof(EdgeDataType);
212  uint64_t bytesRead = 0;
213 
214  while (numBytesToLoad > 0) {
215  graphFile.read(((char*)this->edgeDataBuffer) + bytesRead, numBytesToLoad);
216  size_t numRead = graphFile.gcount();
217  numBytesToLoad -= numRead;
218  bytesRead += numRead;
219  }
220 
221  assert(numBytesToLoad == 0);
222  }
223 
232  template <
233  typename EdgeType,
234  typename std::enable_if<std::is_void<EdgeType>::value>::type* = nullptr>
235  void loadEdgeData(std::ifstream&, uint64_t, uint64_t, uint64_t, uint64_t) {
236  galois::gDebug("Not loading edge data");
237  // do nothing (edge data is void, i.e. no edge data)
238  }
239 
243  void resetGraphStatus() {
244  graphLoaded = false;
245  globalSize = 0;
246  globalEdgeSize = 0;
247  nodeOffset = 0;
248  edgeOffset = 0;
249  numLocalNodes = 0;
250  numLocalEdges = 0;
252  }
253 
257  void freeMemory() {
258  free(outIndexBuffer);
259  outIndexBuffer = nullptr;
260  free(edgeDestBuffer);
261  edgeDestBuffer = nullptr;
262  free(edgeDataBuffer);
263  edgeDataBuffer = nullptr;
264  }
265 
266 public:
272 
276  ~BufferedGraph() noexcept { freeMemory(); }
277 
278  // copy not allowed
280  BufferedGraph(const BufferedGraph&) = delete;
282  BufferedGraph& operator=(const BufferedGraph&) = delete;
283  // move not allowed
285  BufferedGraph(BufferedGraph&&) = delete;
288 
294  uint32_t size() const { return globalSize; }
295 
301  uint32_t sizeEdges() const { return globalEdgeSize; }
302 
304  uint64_t getNodeOffset() const { return nodeOffset; }
305 
312  void loadGraph(const std::string& filename) {
313  if (graphLoaded) {
314  GALOIS_DIE("Cannot load an buffered graph more than once.");
315  }
316 
317  std::ifstream graphFile(filename.c_str());
318  uint64_t header[4];
319  graphFile.read(((char*)header), sizeof(uint64_t) * 4);
320 
321  numLocalNodes = globalSize = header[2];
322  numLocalEdges = globalEdgeSize = header[3];
323 
324  loadOutIndex(graphFile, 0, globalSize);
325  loadEdgeDest(graphFile, 0, globalEdgeSize, globalSize);
326  // may or may not do something depending on EdgeDataType
327  loadEdgeData<EdgeDataType>(graphFile, 0, globalEdgeSize, globalSize,
328  globalEdgeSize);
329  graphLoaded = true;
330 
331  graphFile.close();
332  }
333 
348  void loadPartialGraph(const std::string& filename, uint64_t nodeStart,
349  uint64_t nodeEnd, uint64_t edgeStart, uint64_t edgeEnd,
350  uint64_t numGlobalNodes, uint64_t numGlobalEdges) {
351  if (graphLoaded) {
352  GALOIS_DIE("Cannot load an buffered graph more than once.");
353  }
354 
355  std::ifstream graphFile(filename.c_str());
356 
357  globalSize = numGlobalNodes;
358  globalEdgeSize = numGlobalEdges;
359 
360  assert(nodeEnd >= nodeStart);
361  numLocalNodes = nodeEnd - nodeStart;
362  loadOutIndex(graphFile, nodeStart, numLocalNodes);
363 
364  assert(edgeEnd >= edgeStart);
365  numLocalEdges = edgeEnd - edgeStart;
366  loadEdgeDest(graphFile, edgeStart, numLocalEdges, numGlobalNodes);
367 
368  // may or may not do something depending on EdgeDataType
369  loadEdgeData<EdgeDataType>(graphFile, edgeStart, numLocalEdges,
370  numGlobalNodes, numGlobalEdges);
371  graphLoaded = true;
372 
373  graphFile.close();
374  }
375 
377  using EdgeIterator = boost::counting_iterator<uint64_t>;
386  EdgeIterator edgeBegin(uint64_t globalNodeID) {
387  if (!graphLoaded) {
388  GALOIS_DIE("Graph hasn't been loaded yet.");
389  }
390 
391  if (numLocalNodes == 0) {
392  return EdgeIterator(0);
393  }
394  assert(nodeOffset <= globalNodeID);
395  assert(globalNodeID < (nodeOffset + numLocalNodes));
396 
397  uint64_t localNodeID = globalNodeID - nodeOffset;
398 
399  if (localNodeID != 0) {
400  numBytesReadOutIndex += sizeof(uint64_t);
401  return EdgeIterator(outIndexBuffer[localNodeID - 1]);
402  } else {
403  return EdgeIterator(edgeOffset);
404  }
405  }
406 
414  EdgeIterator edgeEnd(uint64_t globalNodeID) {
415  if (!graphLoaded) {
416  GALOIS_DIE("Graph hasn't been loaded yet.");
417  }
418 
419  if (numLocalNodes == 0) {
420  return EdgeIterator(0);
421  }
422  assert(nodeOffset <= globalNodeID);
423  assert(globalNodeID < (nodeOffset + numLocalNodes));
424 
425  numBytesReadOutIndex += sizeof(uint64_t);
426 
427  uint64_t localNodeID = globalNodeID - nodeOffset;
428  return EdgeIterator(outIndexBuffer[localNodeID]);
429  }
430 
437  uint64_t edgeDestination(uint64_t globalEdgeID) {
438  if (!graphLoaded) {
439  GALOIS_DIE("Graph hasn't been loaded yet.");
440  }
441 
442  if (numLocalEdges == 0) {
443  return 0;
444  }
445  assert(edgeOffset <= globalEdgeID);
446  assert(globalEdgeID < (edgeOffset + numLocalEdges));
447 
448  numBytesReadEdgeDest += sizeof(uint32_t);
449 
450  uint64_t localEdgeID = globalEdgeID - edgeOffset;
451  return edgeDestBuffer[localEdgeID];
452  }
453 
460  template <typename K = EdgeDataType,
461  typename std::enable_if<!std::is_void<K>::value>::type* = nullptr>
462  EdgeDataType edgeData(uint64_t globalEdgeID) {
463  if (!graphLoaded) {
464  GALOIS_DIE("Graph hasn't been loaded yet.");
465  }
466  if (edgeDataBuffer == nullptr) {
467  GALOIS_DIE("Trying to get edge data when graph has no edge data.");
468  }
469 
470  if (numLocalEdges == 0) {
471  return 0;
472  }
473 
474  assert(edgeOffset <= globalEdgeID);
475  assert(globalEdgeID < (edgeOffset + numLocalEdges));
476 
477  numBytesReadEdgeData += sizeof(EdgeDataType);
478 
479  uint64_t localEdgeID = globalEdgeID - edgeOffset;
480  return edgeDataBuffer[localEdgeID];
481  }
482 
486  template <typename K = EdgeDataType,
487  typename std::enable_if<std::is_void<K>::value>::type* = nullptr>
488  unsigned edgeData(uint64_t) {
489  galois::gWarn("Getting edge data on graph when it doesn't exist\n");
490  return 0;
491  }
492 
497  numBytesReadOutIndex.reset();
498  numBytesReadEdgeDest.reset();
499  numBytesReadEdgeData.reset();
500  }
501 
508  uint64_t getBytesRead() {
509  return numBytesReadOutIndex.reduce() + numBytesReadEdgeDest.reduce() +
510  numBytesReadEdgeData.reduce();
511  }
512 
516  void resetAndFree() {
517  freeMemory();
518  resetGraphStatus();
519  }
520 };
521 } // namespace graphs
522 } // namespace galois
523 #endif
EdgeIterator edgeEnd(uint64_t globalNodeID)
Get the index to the first edge of the node after the provided node.
Definition: BufferedGraph.h:414
void reset()
Definition: Reduction.h:113
Class that loads a portion of a Galois graph from disk directly into memory buffers for access...
Definition: BufferedGraph.h:49
EdgeIterator edgeBegin(uint64_t globalNodeID)
Get the index to the first edge of the provided node THAT THIS GRAPH HAS LOADED (not necessary the fi...
Definition: BufferedGraph.h:386
EdgeDataType edgeData(uint64_t globalEdgeID)
Get the edge data of some edge.
Definition: BufferedGraph.h:462
void gDebug(Args &&...GALOIS_USED_ONLY_IN_DEBUG(args))
Prints a debug string from a sequence of things; prints nothing if NDEBUG is defined.
Definition: gIO.h:72
uint32_t size() const
Gets the number of global nodes in the graph.
Definition: BufferedGraph.h:294
#define GALOIS_DIE(...)
Definition: gIO.h:96
void resetReadCounters()
Reset reading counters.
Definition: BufferedGraph.h:496
void loadPartialGraph(const std::string &filename, uint64_t nodeStart, uint64_t nodeEnd, uint64_t edgeStart, uint64_t edgeEnd, uint64_t numGlobalNodes, uint64_t numGlobalEdges)
Given a node/edge range to load, loads the specified portion of the graph into memory buffers using r...
Definition: BufferedGraph.h:348
~BufferedGraph() noexcept
On destruction, free allocated buffers (if necessary).
Definition: BufferedGraph.h:276
uint32_t sizeEdges() const
Gets the number of global edges in the graph.
Definition: BufferedGraph.h:301
uint64_t getNodeOffset() const
Definition: BufferedGraph.h:304
unsigned edgeData(uint64_t)
Version of above function when edge data type is void.
Definition: BufferedGraph.h:488
BufferedGraph()
Class vars should be initialized by in-class initialization; all left is to reset read counters...
Definition: BufferedGraph.h:271
uint64_t getBytesRead()
Returns the total number of bytes read from this graph so far.
Definition: BufferedGraph.h:508
BufferedGraph & operator=(const BufferedGraph &)=delete
disabled copy constructor operator
T & reduce()
Returns the final reduction value.
Definition: Reduction.h:102
void loadGraph(const std::string &filename)
Loads given Galois CSR graph into memory.
Definition: BufferedGraph.h:312
boost::counting_iterator< uint64_t > EdgeIterator
Edge iterator typedef.
Definition: BufferedGraph.h:377
void gWarn(Args &&...args)
Prints a warning string from a sequence of things.
Definition: gIO.h:63
void resetAndFree()
Free all of the in memory buffers in this object and reset graph status.
Definition: BufferedGraph.h:516
uint64_t edgeDestination(uint64_t globalEdgeID)
Get the global node id of the destination of the provided edge.
Definition: BufferedGraph.h:437