Galois
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
OfflineGraph.h
Go to the documentation of this file.
1 /*
2  * This file belongs to the Galois project, a C++ library for exploiting
3  * parallelism. The code is being released under the terms of the 3-Clause BSD
4  * License (a copy is located in LICENSE.txt at the top-level directory).
5  *
6  * Copyright (C) 2018, The University of Texas at Austin. All rights reserved.
7  * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS
8  * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY,
9  * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF
10  * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF
11  * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH
12  * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances
13  * shall University be liable for incidental, special, indirect, direct or
14  * consequential damages or loss of profits, interruption of business, or
15  * related expenses which may arise from use of Software or Documentation,
16  * including but not limited to those resulting from defects in Software and/or
17  * Documentation, or loss or inaccuracy of data of any kind.
18  */
19 
20 #ifndef _GALOIS_DIST_OFFLINE_GRAPH_
21 #define _GALOIS_DIST_OFFLINE_GRAPH_
22 
23 #include <cstdint>
24 #include <fstream>
25 #include <iostream>
26 #include <mutex>
27 #include <numeric>
28 
29 #include <fcntl.h>
30 #include <sys/mman.h>
31 
32 #include <boost/iterator/counting_iterator.hpp>
33 
34 #include "galois/config.h"
35 #include "galois/graphs/Details.h"
38 
39 namespace galois {
40 namespace graphs {
41 
42 // File format V1:
43 // version (1) {uint64_t LE}
44 // EdgeType size {uint64_t LE}
45 // numNodes {uint64_t LE}
46 // numEdges {uint64_t LE}
47 // outindexs[numNodes] {uint64_t LE} (outindex[nodeid] is index of first edge
48 // for nodeid + 1 (end interator. node 0 has an implicit start iterator of 0.
49 // outedges[numEdges] {uint32_t LE}
50 // potential padding (32bit max) to Re-Align to 64bits
51 // EdgeType[numEdges] {EdgeType size}
52 
53 // File format V2:
54 // version (2) {uint64_t LE}
55 // EdgeType size {uint64_t LE}
56 // numNodes {uint64_t LE}
57 // numEdges {uint64_t LE}
58 // outindexs[numNodes] {uint64_t LE} (outindex[nodeid] is index of first edge
59 // for nodeid + 1 (end interator. node 0 has an implicit start iterator of 0.
60 // outedges[numEdges] {uint64_t LE}
61 // EdgeType[numEdges] {EdgeType size}
62 
63 class OfflineGraph {
64  std::ifstream fileEdgeDst, fileIndex, fileEdgeData;
65  std::streamoff locEdgeDst, locIndex, locEdgeData;
66 
67  uint64_t numNodes;
68  uint64_t numEdges;
69  uint64_t sizeEdgeData;
70  size_t length;
71  bool v2;
72  uint64_t numSeeksEdgeDst, numSeeksIndex, numSeeksEdgeData;
73  uint64_t numBytesReadEdgeDst, numBytesReadIndex, numBytesReadEdgeData;
74 
76 
77  uint64_t outIndexs(uint64_t node) {
78  std::lock_guard<decltype(lock)> lg(lock);
79  std::streamoff pos = (4 + node) * sizeof(uint64_t);
80 
81  // move to correct position in file
82  if (locEdgeDst != pos) {
83  numSeeksEdgeDst++;
84  fileEdgeDst.seekg(pos, fileEdgeDst.beg);
85  locEdgeDst = pos;
86  }
87 
88  // read the value
89  uint64_t retval;
90  try {
91  fileEdgeDst.read(reinterpret_cast<char*>(&retval), sizeof(uint64_t));
92  } catch (const std::ifstream::failure& e) {
93  std::cerr << "Exception while reading edge destinations:" << e.what()
94  << "\n";
95  std::cerr << "IO error flags: EOF " << fileEdgeDst.eof() << " FAIL "
96  << fileEdgeDst.fail() << " BAD " << fileEdgeDst.bad() << "\n";
97  }
98 
99  // metadata update
100  auto numBytesRead = fileEdgeDst.gcount();
101  assert(numBytesRead == sizeof(uint64_t));
102  locEdgeDst += numBytesRead;
103  numBytesReadEdgeDst += numBytesRead;
104 
105  return retval;
106  }
107 
108  uint64_t outEdges(uint64_t edge) {
109  std::lock_guard<decltype(lock)> lg(lock);
110  std::streamoff pos = (4 + numNodes) * sizeof(uint64_t) +
111  edge * (v2 ? sizeof(uint64_t) : sizeof(uint32_t));
112 
113  // move to correct position
114  if (locIndex != pos) {
115  numSeeksIndex++;
116  fileIndex.seekg(pos, fileEdgeDst.beg);
117  locIndex = pos;
118  }
119 
120  // v2 reads 64 bits, v1 reads 32 bits
121  if (v2) {
122  uint64_t retval;
123  try {
124  fileIndex.read(reinterpret_cast<char*>(&retval), sizeof(uint64_t));
125  } catch (const std::ifstream::failure& e) {
126  std::cerr << "Exception while reading index:" << e.what() << "\n";
127  std::cerr << "IO error flags: EOF " << fileIndex.eof() << " FAIL "
128  << fileIndex.fail() << " BAD " << fileIndex.bad() << "\n";
129  }
130 
131  auto numBytesRead = fileIndex.gcount();
132  assert(numBytesRead == sizeof(uint64_t));
133  locIndex += numBytesRead;
134  numBytesReadIndex += numBytesRead;
135  return retval;
136  } else {
137  uint32_t retval;
138  try {
139  fileIndex.read(reinterpret_cast<char*>(&retval), sizeof(uint32_t));
140  } catch (const std::ifstream::failure& e) {
141  std::cerr << "Exception while reading index:" << e.what() << "\n";
142  std::cerr << "IO error flags: EOF " << fileIndex.eof() << " FAIL "
143  << fileIndex.fail() << " BAD " << fileIndex.bad() << "\n";
144  }
145 
146  auto numBytesRead = fileIndex.gcount();
147  assert(numBytesRead == sizeof(uint32_t));
148  locIndex += numBytesRead;
149  numBytesReadIndex += numBytesRead;
150  return retval;
151  }
152  }
153 
154  template <typename T>
155  T edgeData(uint64_t edge) {
156  assert(sizeof(T) <= sizeEdgeData);
157  std::lock_guard<decltype(lock)> lg(lock);
158  std::streamoff pos = (4 + numNodes) * sizeof(uint64_t) +
159  numEdges * (v2 ? sizeof(uint64_t) : sizeof(uint32_t));
160 
161  // align + move to correct position
162  pos = (pos + 7) & ~7;
163  pos += edge * sizeEdgeData;
164 
165  if (locEdgeData != pos) {
166  numSeeksEdgeData++;
167  fileEdgeData.seekg(pos, fileEdgeDst.beg);
168  locEdgeData = pos;
169  }
170 
171  T retval;
172  try {
173  fileEdgeData.read(reinterpret_cast<char*>(&retval), sizeof(T));
174  } catch (const std::ifstream::failure& e) {
175  std::cerr << "Exception while reading edge data:" << e.what() << "\n";
176  std::cerr << "IO error flags: EOF " << fileEdgeData.eof() << " FAIL "
177  << fileEdgeData.fail() << " BAD " << fileEdgeData.bad() << "\n";
178  }
179 
180  auto numBytesRead = fileEdgeData.gcount();
181  assert(numBytesRead == sizeof(T));
182  locEdgeData += numBytesRead;
183  numBytesReadEdgeData += numBytesRead;
184  /*fprintf(stderr, "READ:: %ld[", edge);
185  for(int i=0; i<sizeof(T); ++i){
186  fprintf(stderr, "%c", reinterpret_cast<char*>(&retval)[i]);
187  }
188  fprintf(stderr, "]");*/
189  return retval;
190  }
191 
192 public:
193  typedef boost::counting_iterator<uint64_t> iterator;
194  typedef boost::counting_iterator<uint64_t> edge_iterator;
195  typedef uint64_t GraphNode;
196 
197  OfflineGraph(const std::string& name)
198  : fileEdgeDst(name, std::ios_base::binary),
199  fileIndex(name, std::ios_base::binary),
200  fileEdgeData(name, std::ios_base::binary), locEdgeDst(0), locIndex(0),
201  locEdgeData(0), numSeeksEdgeDst(0), numSeeksIndex(0),
202  numSeeksEdgeData(0), numBytesReadEdgeDst(0), numBytesReadIndex(0),
203  numBytesReadEdgeData(0) {
204  if (!fileEdgeDst.is_open() || !fileEdgeDst.good())
205  throw "Bad filename";
206  if (!fileIndex.is_open() || !fileIndex.good())
207  throw "Bad filename";
208  if (!fileEdgeData.is_open() || !fileEdgeData.good())
209  throw "Bad filename";
210 
211  fileEdgeDst.exceptions(std::ifstream::eofbit | std::ifstream::failbit |
212  std::ifstream::badbit);
213  fileIndex.exceptions(std::ifstream::eofbit | std::ifstream::failbit |
214  std::ifstream::badbit);
215  fileEdgeData.exceptions(std::ifstream::eofbit | std::ifstream::failbit |
216  std::ifstream::badbit);
217 
218  uint64_t ver = 0;
219 
220  try {
221  fileEdgeDst.read(reinterpret_cast<char*>(&ver), sizeof(uint64_t));
222  fileEdgeDst.read(reinterpret_cast<char*>(&sizeEdgeData),
223  sizeof(uint64_t));
224  fileEdgeDst.read(reinterpret_cast<char*>(&numNodes), sizeof(uint64_t));
225  fileEdgeDst.read(reinterpret_cast<char*>(&numEdges), sizeof(uint64_t));
226  } catch (const std::ifstream::failure& e) {
227  std::cerr << "Exception while reading graph header:" << e.what() << "\n";
228  std::cerr << "IO error flags: EOF " << fileEdgeDst.eof() << " FAIL "
229  << fileEdgeDst.fail() << " BAD " << fileEdgeDst.bad() << "\n";
230  }
231 
232  if (ver == 0 || ver > 2)
233  throw "Bad Version";
234 
235  v2 = ver == 2;
236 
237  if (!fileEdgeDst)
238  throw "Out of data";
239 
240  // File length
241  fileEdgeDst.seekg(0, fileEdgeDst.end);
242  length = fileEdgeDst.tellg();
243  if (length < sizeof(uint64_t) * (4 + numNodes) +
244  (v2 ? sizeof(uint64_t) : sizeof(uint32_t)) * numEdges)
245  throw "File too small";
246 
247  fileEdgeDst.seekg(0, std::ios_base::beg);
248  fileEdgeData.seekg(0, std::ios_base::beg);
249  fileIndex.seekg(0, std::ios_base::beg);
250  }
251 
252  uint64_t num_seeks() {
253  // std::cout << "Seeks :: " << numSeeksEdgeDst << " , " << numSeeksEdgeData
254  // << " , " << numSeeksIndex << " \n";
255  return numSeeksEdgeDst + numSeeksEdgeData + numSeeksIndex;
256  }
257 
258  uint64_t num_bytes_read() {
259  // std::cout << "Bytes read :: " << numBytesReadEdgeDst << " , " <<
260  // numBytesReadEdgeData << " , " << numBytesReadIndex << " \n";
261  return numBytesReadEdgeDst + numBytesReadEdgeData + numBytesReadIndex;
262  }
263 
265  numSeeksEdgeDst = numSeeksEdgeData = numSeeksIndex = 0;
266  numBytesReadEdgeDst = numBytesReadEdgeData = numBytesReadIndex = 0;
267  }
268 
269  OfflineGraph(OfflineGraph&&) = default;
270 
271  size_t size() const { return numNodes; }
272  size_t sizeEdges() const { return numEdges; }
273  size_t edgeSize() const { return sizeEdgeData; }
274 
275  iterator begin() { return iterator(0); }
276  iterator end() { return iterator(numNodes); }
277 
279  if (N == 0)
280  return edge_iterator(0);
281  else
282  return edge_iterator(outIndexs(N - 1));
283  }
284 
285  edge_iterator edge_end(GraphNode N) { return edge_iterator(outIndexs(N)); }
286 
287  GraphNode getEdgeDst(edge_iterator ni) { return outEdges(*ni); }
288 
290  return internal::make_no_deref_range(edge_begin(N), edge_end(N));
291  }
292 
293  template <typename T>
295  return edgeData<T>(*ni);
296  }
297 
304  uint64_t operator[](uint64_t n) { return outIndexs(n); }
305 
306  // typedefs used by divide by node below
307  typedef std::pair<iterator, iterator> NodeRange;
308  typedef std::pair<edge_iterator, edge_iterator> EdgeRange;
309  typedef std::pair<NodeRange, EdgeRange> GraphRange;
310 
324  auto divideByNode(size_t nodeWeight, size_t edgeWeight, size_t id,
325  size_t total,
326  std::vector<unsigned> scaleFactor = std::vector<unsigned>())
327  -> GraphRange {
328  return galois::graphs::divideNodesBinarySearch<OfflineGraph>(
329  numNodes, numEdges, nodeWeight, edgeWeight, id, total, *this,
330  scaleFactor);
331  }
332 };
333 
335  std::fstream file;
336  uint64_t numNodes, numEdges;
337  bool smallData;
338  uint64_t ver;
339  std::vector<uint64_t> bufferDst;
340 
341  std::deque<uint64_t> edgeOffsets;
342 
343  std::streamoff offsetOfDst(uint64_t edge) {
344  return sizeof(uint64_t) * (4 + numNodes + edge);
345  }
346  std::streamoff offsetOfData(uint64_t edge) {
347  return sizeof(uint64_t) * (4 + numNodes + numEdges) +
348  (smallData ? sizeof(float) : sizeof(double)) * edge;
349  }
350 
351  void setEdge32(uint64_t src, uint64_t offset, uint64_t dst, uint32_t val) {
352  if (src)
353  offset += edgeOffsets[src - 1];
354  file.seekg(offsetOfDst(offset), std::ios_base::beg);
355  file.write(reinterpret_cast<char*>(&dst), sizeof(uint64_t));
356  file.seekg(offsetOfData(offset), std::ios_base::beg);
357  file.write(reinterpret_cast<char*>(&val), sizeof(uint32_t));
358  }
359 
360  void setEdge64(uint64_t src, uint64_t offset, uint64_t dst, uint64_t val) {
361  if (src)
362  offset += edgeOffsets[src - 1];
363  file.seekg(offsetOfDst(offset), std::ios_base::beg);
364  file.write(reinterpret_cast<char*>(&dst), sizeof(uint64_t));
365  file.seekg(offsetOfData(offset), std::ios_base::beg);
366  file.write(reinterpret_cast<char*>(&val), sizeof(uint64_t));
367  }
368 
369  void setEdge_sorted(uint64_t dst) {
370  if (ver == 1) {
371  uint32_t dst32 = dst;
372  file.write(reinterpret_cast<char*>(&dst32), sizeof(uint32_t));
373  } else {
374  file.write(reinterpret_cast<char*>(&dst), sizeof(uint64_t));
375  }
376  }
377 
378  void setEdge_sortedBuffer() {
379  if (ver == 1) {
380  std::vector<uint32_t> tmp(bufferDst.begin(), bufferDst.end());
381  file.write(reinterpret_cast<char*>(&tmp[0]),
382  (sizeof(uint32_t) * tmp.size()));
383  }
384  file.write(reinterpret_cast<char*>(&bufferDst[0]),
385  (sizeof(uint64_t) * bufferDst.size()));
386  }
387 
388  // void setEdge64_sorted(uint64_t dst) {
389  // file.write(reinterpret_cast<char*>(&dst), sizeof(uint32_t));
390  //}
391 
392 public:
393  OfflineGraphWriter(const std::string& name, bool use32 = false,
394  uint64_t _numNodes = 0, uint64_t _numEdges = 0)
395  : file(name, std::ios_base::in | std::ios_base::out |
396  std::ios_base::binary | std::ios_base::trunc),
397  numNodes(_numNodes), numEdges(_numEdges), smallData(use32), ver(1) {
398  if (!file.is_open() || !file.good())
399  throw "Bad filename";
400  uint64_t etSize = smallData ? sizeof(float) : sizeof(double);
401  file.write(reinterpret_cast<char*>(&ver), sizeof(uint64_t));
402  file.write(reinterpret_cast<char*>(&etSize), sizeof(uint64_t));
403  file.write(reinterpret_cast<char*>(&numNodes), sizeof(uint64_t));
404  file.write(reinterpret_cast<char*>(&numEdges), sizeof(uint64_t));
405  file.seekg(0, std::ios_base::beg);
406  }
407 
409 
410  // sets the number of nodes and edges. points to an container of edge counts
411  void setCounts(std::deque<uint64_t> edgeCounts) {
412  edgeOffsets = std::move(edgeCounts);
413  numNodes = edgeOffsets.size();
414  numEdges = std::accumulate(edgeOffsets.begin(), edgeOffsets.end(), 0);
415  std::cout << " NUM EDGES : " << numEdges << "\n";
416  std::partial_sum(edgeOffsets.begin(), edgeOffsets.end(),
417  edgeOffsets.begin());
418  // Nodes are greater than 2^32 so need ver = 2.
419  if (numNodes >= 4294967296) {
420  ver = 2;
421  } else {
422  ver = 1;
423  }
424  std::cout << " USING VERSION : " << ver << "\n";
425  uint64_t etSize = 0; // smallData ? sizeof(float) : sizeof(double);
426  file.seekg(0, std::ios_base::beg);
427  file.write(reinterpret_cast<char*>(&ver), sizeof(uint64_t));
428  file.write(reinterpret_cast<char*>(&etSize), sizeof(uint64_t));
429  // file.seekg(sizeof(uint64_t)*2, std::ios_base::beg);
430  file.write(reinterpret_cast<char*>(&numNodes), sizeof(uint64_t));
431  file.write(reinterpret_cast<char*>(&numEdges), sizeof(uint64_t));
432  for (auto i : edgeOffsets)
433  file.write(reinterpret_cast<char*>(&i), sizeof(uint64_t));
434  file.seekg(0, std::ios_base::beg);
435  }
436 
437  void setEdge(uint64_t src, uint64_t offset, uint64_t dst, uint64_t val) {
438  if (smallData)
439  setEdge32(src, offset, dst, val);
440  else
441  setEdge64(src, offset, dst, val);
442  }
443 
444  void setEdgeSorted(uint64_t dst) { setEdge_sorted(dst); }
445 
446  void seekEdgesDstStart() { file.seekg(offsetOfDst(0), std::ios_base::beg); }
447 };
448 
449 } // namespace graphs
450 } // namespace galois
451 
452 #endif //_GALOIS_DIST_OFFLINE_GRAPH_
iterator end()
Definition: OfflineGraph.h:276
void setCounts(std::deque< uint64_t > edgeCounts)
Definition: OfflineGraph.h:411
std::pair< iterator, iterator > NodeRange
Definition: OfflineGraph.h:307
T getEdgeData(edge_iterator ni)
Definition: OfflineGraph.h:294
uint64_t GraphNode
Definition: OfflineGraph.h:195
auto divideByNode(size_t nodeWeight, size_t edgeWeight, size_t id, size_t total, std::vector< unsigned > scaleFactor=std::vector< unsigned >()) -> GraphRange
Returns 2 ranges (one for nodes, one for edges) for a particular division.
Definition: OfflineGraph.h:324
OfflineGraph(const std::string &name)
Definition: OfflineGraph.h:197
size_t edgeSize() const
Definition: OfflineGraph.h:273
runtime::iterable< NoDerefIterator< edge_iterator > > edges(GraphNode N)
Definition: OfflineGraph.h:289
uint64_t num_bytes_read()
Definition: OfflineGraph.h:258
std::pair< edge_iterator, edge_iterator > EdgeRange
Definition: OfflineGraph.h:308
Definition: Iterable.h:36
Definition: OfflineGraph.h:334
edge_iterator edge_begin(GraphNode N)
Definition: OfflineGraph.h:278
std::pair< NodeRange, EdgeRange > GraphRange
Definition: OfflineGraph.h:309
iterator begin()
Definition: OfflineGraph.h:275
Definition: OfflineGraph.h:63
boost::counting_iterator< uint64_t > edge_iterator
Definition: OfflineGraph.h:194
uint64_t operator[](uint64_t n)
Accesses the prefix sum on disk.
Definition: OfflineGraph.h:304
~OfflineGraphWriter()
Definition: OfflineGraph.h:408
void setEdge(uint64_t src, uint64_t offset, uint64_t dst, uint64_t val)
Definition: OfflineGraph.h:437
OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first)
Does a partial sum from first -&gt; last and writes the results to the d_first iterator.
Definition: ParallelSTL.h:314
GraphNode getEdgeDst(edge_iterator ni)
Definition: OfflineGraph.h:287
void reset_seek_counters()
Definition: OfflineGraph.h:264
size_t size() const
Definition: OfflineGraph.h:271
SimpleLock is a spinlock.
Definition: SimpleLock.h:36
void setEdgeSorted(uint64_t dst)
Definition: OfflineGraph.h:444
uint64_t num_seeks()
Definition: OfflineGraph.h:252
size_t sizeEdges() const
Definition: OfflineGraph.h:272
OfflineGraphWriter(const std::string &name, bool use32=false, uint64_t _numNodes=0, uint64_t _numEdges=0)
Definition: OfflineGraph.h:393
boost::counting_iterator< uint64_t > iterator
Definition: OfflineGraph.h:193
edge_iterator edge_end(GraphNode N)
Definition: OfflineGraph.h:285
T accumulate(InputIterator first, InputIterator last, const T &identity, const BinaryOperation &binary_op)
Definition: ParallelSTL.h:268
void seekEdgesDstStart()
Definition: OfflineGraph.h:446