28 #ifndef _GALOIS_GLUONEDGESUB_H_
29 #define _GALOIS_GLUONEDGESUB_H_
31 #include <unordered_map>
40 #ifdef GALOIS_ENABLE_GPU
50 #ifdef GALOIS_USE_BARE_MPI
51 extern BareMPI bare_mpi;
66 template <
typename GraphTy>
76 constexpr
static const char*
const RNAME =
"GluonEdges";
89 std::vector<std::vector<size_t>> masterEdges;
92 std::vector<std::vector<size_t>>& mirrorEdges;
96 #ifdef GALOIS_USE_BARE_MPI
97 std::vector<MPI_Group> mpi_identity_groups;
104 if (userGraph.sizeEdges() > 0) {
110 void inline incrementEvilPhase() {
126 void exchangeProxyInfo() {
130 for (
unsigned x = 0; x < numHosts; ++x) {
135 gSerialize(b, mirrorEdges[x]);
140 for (
unsigned x = 0; x < numHosts; ++x) {
151 incrementEvilPhase();
158 void sendInfoToHost() {
161 uint64_t totalMirrorEdges =
162 userGraph.sizeEdges() - userGraph.numOwnedEdges();
163 uint64_t totalOwnedEdges = userGraph.numOwnedEdges();
166 for (
unsigned x = 0; x < numHosts; ++x) {
171 gSerialize(b, totalMirrorEdges, totalOwnedEdges);
176 for (
unsigned x = 0; x < numHosts; ++x) {
185 uint64_t totalMirrorFromOther;
186 uint64_t totalOwnedFromOther;
188 totalOwnedFromOther);
189 totalMirrorEdges += totalMirrorFromOther;
190 totalOwnedEdges += totalOwnedFromOther;
192 incrementEvilPhase();
194 assert(userGraph.globalEdges() == totalOwnedEdges);
198 reportProxyStats(totalMirrorEdges);
206 void setupCommunication() {
220 for (uint32_t h = 0; h < masterEdges.size(); ++h) {
224 masterEdges[h][n] = userGraph.getEdgeLID(masterEdges[h][n]);
226 #if GALOIS_COMM_STATS
232 for (uint32_t h = 0; h < mirrorEdges.size(); ++h) {
236 mirrorEdges[h][n] = userGraph.getEdgeLID(mirrorEdges[h][n]);
238 #if GALOIS_COMM_STATS
248 for (
auto x = 0U; x < masterEdges.size(); ++x) {
251 std::string master_edges_str =
252 "MasterEdgesFrom_" + std::to_string(
id) +
"_To_" + std::to_string(x);
253 galois::runtime::reportStatCond_Tsum<MORE_DIST_STATS>(
254 RNAME, master_edges_str, masterEdges[x].size());
255 if (masterEdges[x].size() > maxSharedSize) {
256 maxSharedSize = masterEdges[x].size();
260 for (
auto x = 0U; x < mirrorEdges.size(); ++x) {
263 std::string mirror_edges_str =
264 "MirrorEdgesFrom_" + std::to_string(x) +
"_To_" + std::to_string(
id);
265 galois::runtime::reportStatCond_Tsum<MORE_DIST_STATS>(
266 RNAME, mirror_edges_str, mirrorEdges[x].size());
267 if (mirrorEdges[x].size() > maxSharedSize) {
268 maxSharedSize = mirrorEdges[x].size();
286 void reportProxyStats(uint64_t totalMirrorEdges) {
287 float replication_factor =
288 (float)(totalMirrorEdges + userGraph.globalEdges()) /
289 (
float)userGraph.globalEdges();
292 galois::runtime::reportStatCond_Single<MORE_DIST_STATS>(
293 RNAME,
"TotalGlobalMirrorEdges", totalMirrorEdges);
304 #ifdef GALOIS_USE_BARE_MPI
305 if (bare_mpi == noBareMPI)
308 #ifdef GALOIS_USE_LCI
311 MPI_Comm_rank(MPI_COMM_WORLD, &taskRank);
312 if ((
unsigned)taskRank !=
id)
315 MPI_Comm_size(MPI_COMM_WORLD, &numTasks);
316 if ((
unsigned)numTasks != numHosts)
320 MPI_Group world_group;
321 MPI_Comm_group(MPI_COMM_WORLD, &world_group);
322 mpi_identity_groups.resize(numHosts);
324 for (
unsigned x = 0; x < numHosts; ++x) {
325 const int g[1] = {(int)x};
326 MPI_Group_incl(world_group, 1, g, &mpi_identity_groups[x]);
331 case nonBlockingBareMPI:
334 case oneSidedBareMPI:
362 bool doNothing =
false,
364 : galois::runtime::
GlobalObject(this), userGraph(_userGraph), id(host),
365 substrateDataMode(_substrateDataMode), numHosts(numHosts), num_run(0),
366 num_round(0), mirrorEdges(userGraph.getMirrorEdges()) {
369 "GluonEdgeSubstrateConstructTime", RNAME);
370 edgeSubstrateSetupTimer.
start();
377 masterEdges.resize(numHosts);
381 "GraphCommSetupTime", RNAME);
382 Tgraph_construct_comm.
start();
383 setupCommunication();
384 Tgraph_construct_comm.
stop();
386 edgeSubstrateSetupTimer.
stop();
408 template <SyncType syncType>
409 void getOffsetsFromBitset(
const std::string& loopName,
412 size_t& bit_set_count)
const {
414 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
415 std::string offsets_timer_str(syncTypeStr +
"Offsets_" +
423 std::vector<unsigned int> t_prefix_bit_counts(
activeThreads);
428 unsigned int block_size = bitset_comm.
size() / nthreads;
429 if ((bitset_comm.
size() % nthreads) > 0)
431 assert((block_size * nthreads) >= bitset_comm.
size());
433 unsigned int start = tid * block_size;
434 unsigned int end = (tid + 1) * block_size;
435 if (end > bitset_comm.
size())
436 end = bitset_comm.
size();
438 unsigned int count = 0;
439 for (
unsigned int i = start; i < end; ++i) {
440 if (bitset_comm.
test(i))
444 t_prefix_bit_counts[tid] = count;
449 t_prefix_bit_counts[i] += t_prefix_bit_counts[i - 1];
452 bit_set_count = t_prefix_bit_counts[activeThreads - 1];
456 if (bit_set_count > 0) {
457 offsets.
resize(bit_set_count);
462 unsigned int block_size = bitset_comm.
size() / nthreads;
463 if ((bitset_comm.
size() % nthreads) > 0)
465 assert((block_size * nthreads) >= bitset_comm.
size());
467 unsigned int start = tid * block_size;
468 unsigned int end = (tid + 1) * block_size;
469 if (end > bitset_comm.
size())
470 end = bitset_comm.
size();
472 unsigned int count = 0;
473 unsigned int t_prefix_bit_count;
475 t_prefix_bit_count = 0;
477 t_prefix_bit_count = t_prefix_bit_counts[tid - 1];
480 for (
unsigned int i = start; i < end; ++i) {
481 if (bitset_comm.
test(i)) {
482 offsets[t_prefix_bit_count + count] = i;
513 template <
typename FnTy, SyncType syncType>
514 void getBitsetAndOffsets(
const std::string& loopName,
515 const std::vector<size_t>& indices,
519 size_t& bit_set_count,
521 if (substrateDataMode !=
onlyData) {
523 std::string syncTypeStr =
524 (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
525 std::string doall_str(syncTypeStr +
"Bitset_" + loopName);
534 size_t lid = indices[n];
535 if (bitset_compute.
test(lid)) {
539 #if GALOIS_COMM_STATS
545 getOffsetsFromBitset<syncType>(loopName, bitset_comm, offsets,
550 get_data_mode<typename FnTy::ValTy>(bit_set_count, indices.size());
567 template <SyncType syncType>
568 void convertLIDToGID(
const std::string& loopName,
569 const std::vector<size_t>& indices,
571 galois::gWarn(
"LID to GID edge conversion is extremely inefficient at the "
573 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
574 std::string doall_str(syncTypeStr +
"_LID2GID_" +
580 static_cast<uint32_t
>(userGraph.getEdgeGID(indices[offsets[n]]));
582 #if GALOIS_COMM_STATS
596 template <SyncType syncType>
597 void convertGIDToLID(
const std::string& loopName,
599 galois::gWarn(
"convert GID to LID used in sync call (not optimized)");
600 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
601 std::string doall_str(syncTypeStr +
"_GID2LID_" +
606 [&](
size_t n) { offsets[n] = userGraph.getEdgeLID(offsets[n]); },
607 #if GALOIS_COMM_STATS
629 SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy,
bool async,
630 typename std::enable_if<!BitsetFnTy::is_vector_bitset()>::type* =
nullptr>
631 void getSendBuffer(std::string loopName,
unsigned x,
633 auto& sharedEdges = (syncType == syncReduce) ? mirrorEdges : masterEdges;
635 if (BitsetFnTy::is_valid()) {
636 syncExtract<syncType, SyncFnTy, BitsetFnTy, async>(loopName, x,
639 syncExtract<syncType, SyncFnTy, async>(loopName, x, sharedEdges[x], b);
642 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
643 std::string statSendBytes_str(syncTypeStr +
"SendBytes_" +
666 template <
bool async, SyncType syncType,
typename VecType>
667 void serializeMessage(std::string loopName,
DataCommMode data_mode,
668 size_t bit_set_count, std::vector<size_t>& indices,
672 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
673 std::string serialize_timer_str(syncTypeStr +
"SerializeMessage_" +
676 serialize_timer_str.c_str(), RNAME);
677 if (data_mode ==
noData) {
680 gSerialize(b, data_mode);
684 offsets.
resize(bit_set_count);
685 convertLIDToGID<syncType>(loopName, indices, offsets);
686 val_vec.resize(bit_set_count);
688 gSerialize(b, data_mode, bit_set_count, offsets, val_vec);
691 offsets.
resize(bit_set_count);
692 val_vec.resize(bit_set_count);
694 gSerialize(b, data_mode, bit_set_count, offsets, val_vec);
697 val_vec.resize(bit_set_count);
699 gSerialize(b, data_mode, bit_set_count, bit_set_comm, val_vec);
703 gSerialize(b, data_mode, val_vec);
732 template <SyncType syncType,
typename VecType>
733 void deserializeMessage(std::string loopName,
DataCommMode data_mode,
735 size_t& bit_set_count,
738 size_t& buf_start,
size_t& retval, VecType& val_vec) {
739 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
740 std::string serialize_timer_str(syncTypeStr +
"DeserializeMessage_" +
743 serialize_timer_str.c_str(), RNAME);
744 Tdeserialize.
start();
752 convertGIDToLID<syncType>(loopName, offsets);
784 bool nothingToSend(
unsigned host, SyncType syncType) {
785 auto& sharedEdges = (syncType == syncReduce) ? mirrorEdges : masterEdges;
786 return (sharedEdges[host].size() == 0);
798 bool nothingToRecv(
unsigned host, SyncType syncType) {
799 auto& sharedEdges = (syncType == syncReduce) ? masterEdges : mirrorEdges;
800 return (sharedEdges[host].size() == 0);
817 template <
typename SyncFnTy>
818 void reportRedundantSize(std::string loopName, std::string syncTypeStr,
819 uint32_t totalToSend,
size_t bitSetCount,
821 size_t redundant_size =
822 (totalToSend - bitSetCount) *
sizeof(
typename SyncFnTy::ValTy);
823 size_t bit_set_size = (bitSetComm.
get_vec().size() *
sizeof(uint64_t));
825 if (redundant_size > bit_set_size) {
826 std::string statSavedBytes_str(syncTypeStr +
"SavedBytes_" +
829 galois::runtime::reportStatCond_Tsum<MORE_DIST_STATS>(
830 RNAME, statSavedBytes_str, (redundant_size - bit_set_size));
850 template <
typename FnTy, SyncType syncType>
851 inline typename FnTy::ValTy extractWrapper(
size_t lid) {
852 if (syncType == syncReduce) {
853 auto val = FnTy::extract(lid, userGraph.getEdgeData(lid));
857 return FnTy::extract(lid, userGraph.getEdgeData(lid));
876 template <
typename FnTy, SyncType syncType>
877 inline typename FnTy::ValTy extractWrapper(
size_t lid,
unsigned vecIndex) {
878 if (syncType == syncReduce) {
879 auto val = FnTy::extract(lid, userGraph.getEdgeData(lid), vecIndex);
880 FnTy::reset(lid, userGraph.getEdgeData(lid), vecIndex);
883 return FnTy::extract(lid, userGraph.getEdgeData(lid), vecIndex);
908 template <
typename FnTy, SyncType syncType,
bool identity_offsets =
false,
909 bool parallelize =
true>
910 void extractSubset(
const std::string& loopName,
911 const std::vector<size_t>& indices,
size_t size,
916 std::string syncTypeStr =
917 (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
918 std::string doall_str(syncTypeStr +
"ExtractVal_" + loopName);
922 [&](
unsigned int n) {
924 if (identity_offsets)
928 size_t lid = indices[offset];
929 val_vec[n - start] = extractWrapper<FnTy, syncType>(lid);
931 #if GALOIS_COMM_STATS
936 for (
unsigned n = start; n < start + size; ++n) {
938 if (identity_offsets)
943 size_t lid = indices[offset];
944 val_vec[n - start] = extractWrapper<FnTy, syncType>(lid);
976 template <
typename FnTy, SyncType syncType,
bool identity_offsets =
false,
977 bool parallelize =
true,
bool vecSync =
false,
978 typename std::enable_if<vecSync>::type* =
nullptr>
979 void extractSubset(
const std::string& loopName,
980 const std::vector<size_t>& indices,
size_t size,
983 unsigned vecIndex,
size_t start = 0) {
987 std::string syncTypeStr =
988 (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
989 std::string doall_str(syncTypeStr +
"ExtractValVector_" + loopName);
993 [&](
unsigned int n) {
995 if (identity_offsets)
999 size_t lid = indices[offset];
1000 val_vec[n - start] = extractWrapper<FnTy, syncType>(lid, vecIndex);
1002 #if GALOIS_COMM_STATS
1007 for (
unsigned n = start; n < start + size; ++n) {
1008 unsigned int offset;
1009 if (identity_offsets)
1012 offset = offsets[n];
1013 size_t lid = indices[offset];
1014 val_vec[n - start] = extractWrapper<FnTy, syncType>(lid, vecIndex);
1043 template <
typename FnTy,
typename SeqTy, SyncType syncType,
1044 bool identity_offsets =
false,
bool parallelize =
true>
1045 void extractSubset(
const std::string& loopName,
1046 const std::vector<size_t>& indices,
size_t size,
1051 std::string syncTypeStr =
1052 (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1053 std::string doall_str(syncTypeStr +
"ExtractVal_" + loopName);
1057 [&](
unsigned int n) {
1058 unsigned int offset;
1059 if (identity_offsets)
1062 offset = offsets[n];
1064 size_t lid = indices[offset];
1065 gSerializeLazy(b, lseq, n - start,
1066 extractWrapper<FnTy, syncType>(lid));
1068 #if GALOIS_COMM_STATS
1073 for (
unsigned int n = start; n < start + size; ++n) {
1074 unsigned int offset;
1075 if (identity_offsets)
1078 offset = offsets[n];
1079 size_t lid = indices[offset];
1080 gSerializeLazy(b, lseq, n - start, extractWrapper<FnTy, syncType>(lid));
1097 template <
typename FnTy, SyncType syncType>
1099 if (syncType == syncReduce) {
1100 return FnTy::extract_reset_batch(x, b.
getVec().
data());
1102 return FnTy::extract_batch(x, b.
getVec().
data());
1124 template <
typename FnTy, SyncType syncType>
1127 if (syncType == syncReduce) {
1128 return FnTy::extract_reset_batch(x, b.
getVec().
data(), &s, &data_mode);
1130 return FnTy::extract_batch(x, b.
getVec().
data(), &s, &data_mode);
1149 template <
typename FnTy, SyncType syncType,
bool async>
1150 inline void setWrapper(
size_t lid,
typename FnTy::ValTy val,
1152 if (syncType == syncReduce) {
1153 if (FnTy::reduce(lid, userGraph.getEdgeData(lid), val)) {
1154 if (bit_set_compute.
size() != 0) {
1155 bit_set_compute.
set(lid);
1160 FnTy::reduce(lid, userGraph.getEdgeData(lid), val);
1162 FnTy::setVal(lid, userGraph.getEdgeData(lid), val);
1163 assert(FnTy::extract(lid, userGraph.getEdgeData(lid)) == val);
1191 template <
typename VecTy,
typename FnTy, SyncType syncType,
bool async,
1192 bool identity_offsets =
false,
bool parallelize =
true>
1193 void setSubset(
const std::string& loopName,
const VecTy& indices,
size_t size,
1197 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1198 std::string doall_str(syncTypeStr +
"SetVal_" +
1204 [&](
unsigned int n) {
1205 unsigned int offset;
1206 if (identity_offsets)
1209 offset = offsets[n];
1210 auto lid = indices[offset];
1211 setWrapper<FnTy, syncType, async>(lid, val_vec[n - start],
1214 #if GALOIS_COMM_STATS
1219 for (
unsigned int n = start; n < start + size; ++n) {
1220 unsigned int offset;
1221 if (identity_offsets)
1224 offset = offsets[n];
1225 auto lid = indices[offset];
1226 setWrapper<FnTy, syncType, async>(lid, val_vec[n - start],
1243 template <
typename FnTy, SyncType syncType,
bool async>
1245 if (syncType == syncReduce) {
1272 template <
typename FnTy, SyncType syncType,
bool async>
1275 if (syncType == syncReduce) {
1306 template <SyncType syncType,
typename SyncFnTy,
bool async,
1308 typename SyncFnTy::ValTy>::value>::type* =
nullptr>
1309 void syncExtract(std::string loopName,
unsigned from_id,
1310 std::vector<size_t>& indices,
1312 uint32_t num = indices.size();
1316 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1317 std::string extract_timer_str(syncTypeStr +
"Extract_" +
1321 std::string extract_batch_timer_str(syncTypeStr +
"ExtractBatch_" +
1324 extract_batch_timer_str.c_str(), RNAME);
1333 (num *
sizeof(
typename SyncFnTy::ValTy)));
1335 Textractbatch.start();
1336 bool batch_succeeded =
1337 extractBatchWrapper<SyncFnTy, syncType>(from_id, b);
1338 Textractbatch.stop();
1340 if (!batch_succeeded) {
1342 val_vec.
reserve(maxSharedSize);
1345 auto lseq = gSerializeLazySeq(
1348 extractSubset<SyncFnTy, decltype(lseq), syncType, true, true>(
1349 loopName, indices, num, offsets, b, lseq);
1352 (num *
sizeof(
typename SyncFnTy::ValTy)));
1364 std::string metadata_str(syncTypeStr +
"MetadataMode_" +
1365 std::to_string(data_mode) +
"_" +
1367 galois::runtime::reportStatCond_Single<MORE_DIST_STATS>(RNAME, metadata_str,
1388 template <SyncType syncType,
typename SyncFnTy,
bool async,
1390 typename SyncFnTy::ValTy>::value>::type* =
nullptr>
1391 void syncExtract(std::string loopName,
unsigned from_id,
1392 std::vector<size_t>& indices,
1394 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1395 std::string extract_timer_str(syncTypeStr +
"Extract_" +
1399 std::string extract_batch_timer_str(syncTypeStr +
"ExtractBatch_" +
1402 extract_batch_timer_str.c_str(), RNAME);
1406 uint32_t num = indices.size();
1415 (num *
sizeof(
typename SyncFnTy::ValTy)));
1417 Textractbatch.start();
1418 bool batch_succeeded =
1419 extractBatchWrapper<SyncFnTy, syncType>(from_id, b);
1420 Textractbatch.stop();
1422 if (!batch_succeeded) {
1424 val_vec.
reserve(maxSharedSize);
1428 extractSubset<SyncFnTy, syncType, true, true>(loopName, indices, num,
1429 dummyVector, val_vec);
1433 (num *
sizeof(
typename SyncFnTy::ValTy)));
1446 std::string metadata_str(syncTypeStr +
"MetadataMode_" +
1447 std::to_string(data_mode) +
"_" +
1449 galois::runtime::reportStatCond_Single<MORE_DIST_STATS>(RNAME, metadata_str,
1471 SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy,
bool async,
1472 typename std::enable_if<!BitsetFnTy::is_vector_bitset()>::type* =
nullptr>
1473 void syncExtract(std::string loopName,
unsigned from_id,
1474 std::vector<size_t>& indices,
1477 uint64_t manualBitsetCount = bit_set_compute.
count();
1478 uint32_t num = indices.size();
1483 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1484 std::string extract_timer_str(syncTypeStr +
"Extract_" +
1488 std::string extract_alloc_timer_str(syncTypeStr +
"ExtractAlloc_" +
1491 extract_alloc_timer_str.c_str(), RNAME);
1492 std::string extract_batch_timer_str(syncTypeStr +
"ExtractBatch_" +
1495 extract_batch_timer_str.c_str(), RNAME);
1501 if (num > 0 && manualBitsetCount > 0) {
1503 size_t bit_set_count = 0;
1504 Textractalloc.start();
1505 if (substrateDataMode ==
gidsData) {
1507 sizeof(
size_t) + (num *
sizeof(
unsigned int)) +
1508 sizeof(
size_t) + (num *
sizeof(
typename SyncFnTy::ValTy)));
1511 sizeof(
size_t) + (num *
sizeof(
unsigned int)) +
1512 sizeof(
size_t) + (num *
sizeof(
typename SyncFnTy::ValTy)));
1513 }
else if (substrateDataMode ==
bitsetData) {
1514 size_t bitset_alloc_size = ((num + 63) / 64) *
sizeof(uint64_t);
1518 + bitset_alloc_size +
sizeof(
size_t) +
1519 (num *
sizeof(
typename SyncFnTy::ValTy)));
1521 size_t bitset_alloc_size = ((num + 63) / 64) *
sizeof(uint64_t);
1525 + bitset_alloc_size +
sizeof(
size_t) +
1526 (num *
sizeof(
typename SyncFnTy::ValTy)));
1528 Textractalloc.stop();
1530 Textractbatch.start();
1531 bool batch_succeeded = extractBatchWrapper<SyncFnTy, syncType>(
1532 from_id, b, bit_set_count, data_mode);
1533 Textractbatch.stop();
1537 if (!batch_succeeded) {
1538 Textractalloc.start();
1540 bit_set_comm.
reserve(maxSharedSize);
1541 offsets.
reserve(maxSharedSize);
1542 val_vec.
reserve(maxSharedSize);
1543 bit_set_comm.
resize(num);
1546 Textractalloc.stop();
1548 getBitsetAndOffsets<SyncFnTy, syncType>(
1549 loopName, indices, bit_set_compute, bit_set_comm, offsets,
1550 bit_set_count, data_mode);
1553 bit_set_count = indices.size();
1554 extractSubset<SyncFnTy, syncType, true, true>(
1555 loopName, indices, bit_set_count, offsets, val_vec);
1556 }
else if (data_mode !=
1558 extractSubset<SyncFnTy, syncType, false, true>(
1559 loopName, indices, bit_set_count, offsets, val_vec);
1561 serializeMessage<async, syncType>(loopName, data_mode, bit_set_count,
1562 indices, offsets, bit_set_comm,
1565 if (data_mode ==
noData) {
1568 gSerialize(b, data_mode);
1570 }
else if (data_mode ==
gidsData) {
1571 b.resize(
sizeof(
DataCommMode) +
sizeof(bit_set_count) +
1572 sizeof(
size_t) + (bit_set_count *
sizeof(
unsigned int)) +
1574 (bit_set_count *
sizeof(
typename SyncFnTy::ValTy)));
1576 b.resize(
sizeof(
DataCommMode) +
sizeof(bit_set_count) +
1577 sizeof(
size_t) + (bit_set_count *
sizeof(
unsigned int)) +
1579 (bit_set_count *
sizeof(
typename SyncFnTy::ValTy)));
1581 size_t bitset_alloc_size = ((num + 63) / 64) *
sizeof(uint64_t);
1582 b.resize(
sizeof(
DataCommMode) +
sizeof(bit_set_count) +
1585 + bitset_alloc_size +
sizeof(
size_t) +
1586 (bit_set_count *
sizeof(
typename SyncFnTy::ValTy)));
1589 (num *
sizeof(
typename SyncFnTy::ValTy)));
1593 reportRedundantSize<SyncFnTy>(loopName, syncTypeStr, num, bit_set_count,
1605 std::string metadata_str(syncTypeStr +
"MetadataMode_" +
1606 std::to_string(data_mode) +
"_" +
1608 galois::runtime::reportStatCond_Single<MORE_DIST_STATS>(RNAME, metadata_str,
1612 #ifdef GALOIS_USE_BARE_MPI
1616 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy>
1617 void sync_mpi_send(std::string loopName) {
1618 static std::vector<galois::runtime::SendBuffer> b;
1619 static std::vector<MPI_Request> request;
1621 request.resize(numHosts, MPI_REQUEST_NULL);
1623 for (
unsigned h = 1; h < numHosts; ++h) {
1624 unsigned x = (
id + h) % numHosts;
1626 if (nothingToSend(x, syncType))
1630 MPI_Test(&request[x], &ready, MPI_STATUS_IGNORE);
1632 assert(b[x].size() > 0);
1633 MPI_Wait(&request[x], MPI_STATUS_IGNORE);
1635 if (b[x].size() > 0) {
1636 b[x].getVec().clear();
1639 getSendBuffer<syncType, SyncFnTy, BitsetFnTy>(loopName, x, b[x]);
1641 MPI_Isend((uint8_t*)b[x].linearData(), b[x].size(), MPI_BYTE, x, 32767,
1642 MPI_COMM_WORLD, &request[x]);
1645 if (BitsetFnTy::is_valid() && syncType == syncBroadcast) {
1646 reset_bitset(&BitsetFnTy::reset_range);
1653 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy>
1654 void sync_mpi_put(std::string loopName,
const MPI_Group& mpi_access_group,
1655 const std::vector<MPI_Win>& window) {
1657 MPI_Win_start(mpi_access_group, 0, window[
id]);
1659 std::vector<galois::runtime::SendBuffer> b(numHosts);
1660 std::vector<size_t> size(numHosts);
1661 uint64_t send_buffers_size = 0;
1663 for (
unsigned h = 1; h < numHosts; ++h) {
1664 unsigned x = (
id + h) % numHosts;
1666 if (nothingToSend(x, syncType))
1669 getSendBuffer<syncType, SyncFnTy, BitsetFnTy>(loopName, x, b[x]);
1671 size[x] = b[x].size();
1672 send_buffers_size += size[x];
1673 MPI_Put((uint8_t*)&size[x],
sizeof(
size_t), MPI_BYTE, x, 0,
1674 sizeof(
size_t), MPI_BYTE, window[
id]);
1675 MPI_Put((uint8_t*)b[x].linearData(), size[x], MPI_BYTE, x,
sizeof(
size_t),
1676 size[x], MPI_BYTE, window[
id]);
1680 net.incrementMemUsage(send_buffers_size);
1682 MPI_Win_complete(window[
id]);
1683 net.decrementMemUsage(send_buffers_size);
1685 if (BitsetFnTy::is_valid() && syncType == syncBroadcast) {
1686 reset_bitset(&BitsetFnTy::reset_range);
1701 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy,
1703 void syncNetSend(std::string loopName) {
1709 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1710 std::string statNumMessages_str(syncTypeStr +
"NumMessages_" +
1713 size_t numMessages = 0;
1714 for (
unsigned h = 1; h < numHosts; ++h) {
1715 unsigned x = (
id + h) % numHosts;
1717 if (nothingToSend(x, syncType))
1720 getSendBuffer<syncType, SyncFnTy, BitsetFnTy, async>(loopName, x, b);
1722 if ((!async) || (b.
size() > 0)) {
1723 size_t syncTypePhase = 0;
1724 if (async && (syncType == syncBroadcast))
1735 if (BitsetFnTy::is_valid() && syncType == syncBroadcast) {
1736 reset_bitset(&BitsetFnTy::reset_range);
1752 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy,
1754 void syncSend(std::string loopName) {
1755 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1760 syncNetSend<syncType, SyncFnTy, BitsetFnTy, async>(loopName);
1784 SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy,
bool async,
1785 typename std::enable_if<!BitsetFnTy::is_vector_bitset()>::type* =
nullptr>
1787 std::string loopName) {
1788 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
1789 std::string set_timer_str(syncTypeStr +
"Set_" +
1792 std::string set_batch_timer_str(syncTypeStr +
"SetBatch_" +
1795 set_batch_timer_str.c_str(), RNAME);
1801 auto& sharedEdges = (syncType == syncReduce) ? masterEdges : mirrorEdges;
1802 uint32_t num = sharedEdges[from_id].
size();
1811 if (data_mode !=
noData) {
1814 bool batch_succeeded =
1815 setBatchWrapper<SyncFnTy, syncType, async>(from_id, buf, data_mode);
1819 if (!batch_succeeded) {
1820 size_t bit_set_count = num;
1821 size_t buf_start = 0;
1825 deserializeMessage<syncType>(loopName, data_mode, num, buf,
1826 bit_set_count, offsets, bit_set_comm,
1827 buf_start, retval, val_vec);
1829 bit_set_comm.reserve(maxSharedSize);
1830 offsets.reserve(maxSharedSize);
1831 val_vec.
reserve(maxSharedSize);
1836 size_t bit_set_count2;
1837 getOffsetsFromBitset<syncType>(loopName, bit_set_comm, offsets,
1839 assert(bit_set_count == bit_set_count2);
1843 setSubset<decltype(sharedEdges[from_id]), SyncFnTy, syncType, async,
1844 true,
true>(loopName, sharedEdges[from_id], bit_set_count,
1845 offsets, val_vec, bit_set_compute);
1847 setSubset<decltype(sharedEdges[from_id]), SyncFnTy, syncType, async,
1848 true,
true>(loopName, sharedEdges[from_id], bit_set_count,
1849 offsets, val_vec, bit_set_compute, buf_start);
1850 }
else if (data_mode ==
gidsData) {
1851 setSubset<decltype(offsets), SyncFnTy, syncType, async, true, true>(
1852 loopName, offsets, bit_set_count, offsets, val_vec,
1855 setSubset<decltype(sharedEdges[from_id]), SyncFnTy, syncType, async,
1856 false,
true>(loopName, sharedEdges[from_id],
1857 bit_set_count, offsets, val_vec,
1871 #ifdef GALOIS_USE_BARE_MPI
1875 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy>
1876 void sync_mpi_recv_post(std::string loopName,
1877 std::vector<MPI_Request>& request,
1878 const std::vector<std::vector<uint8_t>>& rb) {
1879 for (
unsigned h = 1; h < numHosts; ++h) {
1880 unsigned x = (
id + numHosts - h) % numHosts;
1881 if (nothingToRecv(x, syncType))
1884 MPI_Irecv((uint8_t*)rb[x].data(), rb[x].size(), MPI_BYTE, x, 32767,
1885 MPI_COMM_WORLD, &request[x]);
1892 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy>
1893 void sync_mpi_recv_wait(std::string loopName,
1894 std::vector<MPI_Request>& request,
1895 const std::vector<std::vector<uint8_t>>& rb) {
1896 for (
unsigned h = 1; h < numHosts; ++h) {
1897 unsigned x = (
id + numHosts - h) % numHosts;
1898 if (nothingToRecv(x, syncType))
1902 MPI_Wait(&request[x], &status);
1905 MPI_Get_count(&status, MPI_BYTE, &size);
1909 syncRecvApply<syncType, SyncFnTy, BitsetFnTy>(x, rbuf, loopName);
1916 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy>
1917 void sync_mpi_get(std::string loopName,
const std::vector<MPI_Win>& window,
1918 const std::vector<std::vector<uint8_t>>& rb) {
1919 for (
unsigned h = 1; h < numHosts; ++h) {
1920 unsigned x = (
id + numHosts - h) % numHosts;
1921 if (nothingToRecv(x, syncType))
1924 MPI_Win_wait(window[x]);
1927 memcpy(&size, rb[x].data(),
sizeof(
size_t));
1930 rb[x].begin() +
sizeof(
size_t) + size);
1932 MPI_Win_post(mpi_identity_groups[x], 0, window[x]);
1934 syncRecvApply<syncType, SyncFnTy, BitsetFnTy>(x, rbuf, loopName);
1949 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy,
1951 void syncNetRecv(std::string loopName) {
1958 size_t syncTypePhase = 0;
1959 if (syncType == syncBroadcast)
1968 syncRecvApply<syncType, SyncFnTy, BitsetFnTy, async>(
1969 p->first, p->second, loopName);
1973 for (
unsigned x = 0; x < numHosts; ++x) {
1976 if (nothingToRecv(x, syncType))
1986 syncRecvApply<syncType, SyncFnTy, BitsetFnTy, async>(
1987 p->first, p->second, loopName);
1989 incrementEvilPhase();
2003 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy,
2005 void syncRecv(std::string loopName) {
2006 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
2011 syncNetRecv<syncType, SyncFnTy, BitsetFnTy, async>(loopName);
2018 #ifdef GALOIS_USE_BARE_MPI
2022 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy>
2023 void syncNonblockingMPI(std::string loopName,
2024 bool use_bitset_to_send =
true) {
2025 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
2031 static std::vector<std::vector<uint8_t>> rb;
2032 static std::vector<MPI_Request> request;
2034 if (rb.size() == 0) {
2036 auto& sharedEdges = (syncType == syncReduce) ? masterEdges : mirrorEdges;
2037 rb.resize(numHosts);
2038 request.resize(numHosts, MPI_REQUEST_NULL);
2040 for (
unsigned h = 1; h < numHosts; ++h) {
2041 unsigned x = (
id + numHosts - h) % numHosts;
2042 if (nothingToRecv(x, syncType))
2046 (sharedEdges[x].size() *
sizeof(
typename SyncFnTy::ValTy));
2047 size +=
sizeof(size_t);
2056 sync_mpi_recv_post<syncType, SyncFnTy, BitsetFnTy>(loopName, request, rb);
2060 if (use_bitset_to_send) {
2061 sync_mpi_send<syncType, SyncFnTy, BitsetFnTy>(loopName);
2063 sync_mpi_send<syncType, SyncFnTy, galois::InvalidBitsetFnTy>(loopName);
2068 sync_mpi_recv_wait<syncType, SyncFnTy, BitsetFnTy>(loopName, request, rb);
2075 template <SyncType syncType,
typename SyncFnTy,
typename BitsetFnTy>
2076 void syncOnesidedMPI(std::string loopName,
bool use_bitset_to_send =
true) {
2077 std::string syncTypeStr = (syncType == syncReduce) ?
"Reduce" :
"Broadcast";
2083 static std::vector<MPI_Win> window;
2084 static MPI_Group mpi_access_group;
2085 static std::vector<std::vector<uint8_t>> rb;
2087 if (window.size() == 0) {
2089 auto& sharedEdges = (syncType == syncReduce) ? masterEdges : mirrorEdges;
2090 window.resize(numHosts);
2091 rb.resize(numHosts);
2093 uint64_t recv_buffers_size = 0;
2094 for (
unsigned x = 0; x < numHosts; ++x) {
2095 size_t size = sharedEdges[x].size() *
sizeof(
typename SyncFnTy::ValTy);
2096 size +=
sizeof(size_t);
2098 size +=
sizeof(size_t);
2099 recv_buffers_size += size;
2104 MPI_Info_create(&info);
2105 MPI_Info_set(info,
"no_locks",
"true");
2106 MPI_Info_set(info,
"same_disp_unit",
"true");
2108 MPI_Win_create(rb[x].data(), size, 1, info, MPI_COMM_WORLD, &window[x]);
2110 MPI_Info_free(&info);
2113 net.incrementMemUsage(recv_buffers_size);
2115 for (
unsigned h = 1; h < numHosts; ++h) {
2116 unsigned x = (
id + numHosts - h) % numHosts;
2117 if (nothingToRecv(x, syncType))
2121 MPI_Win_post(mpi_identity_groups[x], 0, window[x]);
2126 std::vector<int> access_hosts;
2127 for (
unsigned h = 1; h < numHosts; ++h) {
2128 unsigned x = (
id + h) % numHosts;
2130 if (nothingToSend(x, syncType))
2133 access_hosts.push_back(x);
2135 MPI_Group world_group;
2136 MPI_Comm_group(MPI_COMM_WORLD, &world_group);
2138 MPI_Group_incl(world_group, access_hosts.size(), access_hosts.data(),
2144 if (use_bitset_to_send) {
2145 sync_mpi_put<syncType, SyncFnTy, BitsetFnTy>(loopName, mpi_access_group,
2148 sync_mpi_put<syncType, SyncFnTy, galois::InvalidBitsetFnTy>(
2149 loopName, mpi_access_group, window);
2154 sync_mpi_get<syncType, SyncFnTy, BitsetFnTy>(loopName, window, rb);
2171 template <
typename ReduceFnTy,
typename BitsetFnTy,
bool async>
2172 inline void reduce(std::string loopName) {
2176 TsyncReduce.
start();
2178 #ifdef GALOIS_USE_BARE_MPI
2182 syncSend<syncReduce, ReduceFnTy, BitsetFnTy, async>(loopName);
2183 syncRecv<syncReduce, ReduceFnTy, BitsetFnTy, async>(loopName);
2184 #ifdef GALOIS_USE_BARE_MPI
2186 case nonBlockingBareMPI:
2187 syncNonblockingMPI<syncReduce, ReduceFnTy, BitsetFnTy>(loopName);
2189 case oneSidedBareMPI:
2190 syncOnesidedMPI<syncReduce, ReduceFnTy, BitsetFnTy>(loopName);
2208 template <
typename BroadcastFnTy,
typename BitsetFnTy,
bool async>
2209 inline void broadcast(std::string loopName) {
2214 TsyncBroadcast.
start();
2216 bool use_bitset =
true;
2218 #ifdef GALOIS_USE_BARE_MPI
2223 syncSend<syncBroadcast, BroadcastFnTy, BitsetFnTy, async>(loopName);
2228 syncRecv<syncBroadcast, BroadcastFnTy, BitsetFnTy, async>(loopName);
2229 #ifdef GALOIS_USE_BARE_MPI
2231 case nonBlockingBareMPI:
2232 syncNonblockingMPI<syncBroadcast, BroadcastFnTy, BitsetFnTy>(loopName,
2235 case oneSidedBareMPI:
2236 syncOnesidedMPI<syncBroadcast, BroadcastFnTy, BitsetFnTy>(loopName,
2244 TsyncBroadcast.stop();
2255 template <
typename SyncFnTy,
typename BitsetFnTy,
bool async>
2256 inline void sync_any_to_any(std::string loopName) {
2258 reduce<SyncFnTy, BitsetFnTy, async>(loopName);
2259 broadcast<SyncFnTy, BitsetFnTy, async>(loopName);
2278 inline void sync(std::string loopName) {
2283 sync_any_to_any<SyncFnTy, BitsetFnTy, async>(loopName);
2291 #ifdef GALOIS_ENABLE_GPU
2293 using GraphNode =
typename GraphTy::GraphNode;
2294 using edge_iterator =
typename GraphTy::edge_iterator;
2295 using EdgeTy =
typename GraphTy::EdgeType;
2298 template <
bool isVoidType,
2299 typename std::enable_if<isVoidType>::type* =
nullptr>
2301 const size_t GALOIS_UNUSED(index),
2302 const edge_iterator& GALOIS_UNUSED(e)) {
2306 template <
bool isVoidType,
2307 typename std::enable_if<!isVoidType>::type* =
nullptr>
2309 const edge_iterator& e) {
2310 m.
edge_data[index] = userGraph.getEdgeData(e);
2314 void getEdgeMarshalGraph(
EdgeMarshalGraph& m,
bool loadProxyEdges =
true) {
2315 m.
nnodes = userGraph.size();
2316 m.
nedges = userGraph.sizeEdges();
2317 m.
numOwned = userGraph.numMasters();
2328 if (std::is_void<EdgeTy>::value) {
2331 if (!std::is_same<EdgeTy, edge_data_type>::value) {
2332 galois::gWarn(
"Edge data type mismatch between CPU and GPU\n");
2341 [&](
const GraphNode& nodeID) {
2344 userGraph.getGID(nodeID);
2345 m.
row_start[nodeID] = *(userGraph.edge_begin(nodeID));
2346 for (
auto e = userGraph.edge_begin(nodeID);
2347 e != userGraph.edge_end(nodeID); e++) {
2349 setMarshalEdge<std::is_void<EdgeTy>::value>(m, edgeID, e);
2350 m.edge_dst[edgeID] = userGraph.getEdgeDst(e);
2359 if (loadProxyEdges) {
2361 (
unsigned int*)calloc(masterEdges.size(),
sizeof(
unsigned int));
2364 (
unsigned int**)calloc(masterEdges.size(),
sizeof(
unsigned int*));
2367 for (uint32_t h = 0; h < masterEdges.size(); ++h) {
2370 if (masterEdges[h].size() > 0) {
2371 m.
master_edges[h] = (
unsigned int*)calloc(masterEdges[h].size(),
2372 sizeof(
unsigned int));
2374 std::copy(masterEdges[h].begin(), masterEdges[h].end(),
2382 (
unsigned int*)calloc(mirrorEdges.size(),
sizeof(
unsigned int));
2385 (
unsigned int**)calloc(mirrorEdges.size(),
sizeof(
unsigned int*));
2387 for (uint32_t h = 0; h < mirrorEdges.size(); ++h) {
2390 if (mirrorEdges[h].size() > 0) {
2391 m.
mirror_edges[h] = (
unsigned int*)calloc(mirrorEdges[h].size(),
2392 sizeof(
unsigned int));
2394 std::copy(mirrorEdges[h].begin(), mirrorEdges[h].end(),
2406 #endif // het galois def
2442 #if GALOIS_PER_ROUND_STATS
2443 return std::string(std::to_string(num_run) +
"_" +
2444 std::to_string(num_round));
2446 return std::string(std::to_string(num_run));
2458 #if GALOIS_PER_ROUND_STATS
2459 return std::string(std::string(loop_name) +
"_" + std::to_string(num_run) +
2460 "_" + std::to_string(num_round));
2462 return std::string(std::string(loop_name) +
"_" + std::to_string(num_run));
2478 unsigned alterID)
const {
2479 #if GALOIS_PER_ROUND_STATS
2480 return std::string(std::string(loop_name) +
"_" + std::to_string(alterID) +
2481 "_" + std::to_string(num_run) +
"_" +
2482 std::to_string(num_round));
2484 return std::string(std::string(loop_name) +
"_" + std::to_string(alterID) +
2485 "_" + std::to_string(num_run));
2490 template <
typename GraphTy>
2495 #endif // header guard
Contains macros for easily defining common Galois sync structures and the field flags class used for ...
std::string get_run_identifier(std::string loop_name, unsigned alterID) const
Get a run identifier using the set run and set round and append to the passed in string in addition t...
Definition: GluonEdgeSubstrate.h:2477
unsigned int * num_master_edges
Definition: EdgeHostDecls.h:51
void reserve(uint64_t n)
Reserves capacity for the bitset.
Definition: libgalois/include/galois/DynamicBitset.h:90
__global__ void bitset_reset_range(DynamicBitset *__restrict__ bitset, size_t vec_begin, size_t vec_end, bool test1, size_t bit_index1, uint64_t mask1, bool test2, size_t bit_index2, uint64_t mask2)
Definition: DeviceEdgeSync.h:298
size_t nedges
Definition: EdgeHostDecls.h:40
unsigned int numOwned
Definition: EdgeHostDecls.h:41
unsigned int getActiveThreads() noexcept
Returns the number of threads in use.
Definition: Threads.cpp:37
unsigned int ** mirror_edges
Definition: EdgeHostDecls.h:54
Definition: DataCommMode.h:35
unsigned int ** master_edges
Definition: EdgeHostDecls.h:52
index_type * row_start
Definition: EdgeHostDecls.h:47
void set_num_run(const uint32_t runNum)
Set the run number.
Definition: GluonEdgeSubstrate.h:2417
void resize(size_t n)
Definition: PODResizeableArray.h:142
Concurrent dynamically allocated bitset.
Definition: libgalois/include/galois/DynamicBitset.h:47
Buffer for serialization of data.
Definition: Serialize.h:56
void reserve(size_t s)
Reserve more space in the serialize buffer.
Definition: Serialize.h:110
GluonEdgeSubstrate()=delete
Delete default constructor: this class NEEDS to have a graph passed into it.
unsigned int index_type
Definition: EdgeHostDecls.h:33
Contains the DynamicBitSet class and most of its implementation.
void gDeserialize(DeSerializeBuffer &buf, T1 &&t1, Args &&...args)
Deserialize data in a buffer into a series of objects.
Definition: Serialize.h:1032
const char * loopname
Definition: Executor_ParaMeter.h:145
#define GALOIS_DIE(...)
Definition: gIO.h:96
Definition: DataCommMode.h:37
void reportStat_Tsum(const S1 ®ion, const S2 &category, const T &value)
Definition: Statistics.h:562
vTy & getVec()
Returns vector of data stored in this serialize buffer.
Definition: Serialize.h:115
bool test(size_t index) const
Check a bit to see if it is currently set.
Definition: libgalois/include/galois/DynamicBitset.h:192
Contains forward declarations and the definition of the EdgeMarshalGraph class, which is used to mars...
void reserve(size_t n)
Definition: PODResizeableArray.h:129
unsigned int numNodesWithEdges
Definition: EdgeHostDecls.h:43
int id
Definition: EdgeHostDecls.h:45
A class to be inherited from so that all child classes will have a tracked unique ID...
Definition: GlobalObj.h:43
void sync(std::string loopName)
Main sync call exposed to the user that calls the correct sync function based on provided template ar...
Definition: GluonEdgeSubstrate.h:2278
uint64_t count() const
Count how many bits are set in the bitset.
Definition: libgalois/include/galois/DynamicBitset.h:320
void resize(uint64_t n)
Resizes the bitset.
Definition: libgalois/include/galois/DynamicBitset.h:78
GluonEdgeSubstrate(GraphTy &_userGraph, unsigned host, unsigned numHosts, bool doNothing=false, DataCommMode _substrateDataMode=DataCommMode::noData)
Constructor for GluonEdgeSubstrate.
Definition: GluonEdgeSubstrate.h:361
unsigned int * num_mirror_edges
Definition: EdgeHostDecls.h:53
Contains the DataCommMode enumeration and a function that chooses a data comm mode based on its argum...
node_data_type * node_data
Definition: EdgeHostDecls.h:49
void reportStat_Single(const S1 ®ion, const S2 &category, const T &value)
Definition: Statistics.h:544
const Ty max(std::atomic< Ty > &a, const Ty &b)
Definition: AtomicHelpers.h:40
std::string get_run_identifier() const
Get a run identifier using the set run and set round.
Definition: GluonEdgeSubstrate.h:2441
void reset()
Gets the space taken by the bitset.
Definition: libgalois/include/galois/DynamicBitset.h:110
Contains declaration of DistStatManager, which reports runtime statistics of a distributed applicatio...
size_type size() const
Returns the size of the serialize buffer.
Definition: Serialize.h:125
void gPrint(Args &&...args)
Prints a sequence of things.
Definition: gIO.h:47
unsigned int activeThreads
Definition: Threads.cpp:26
NetworkInterface & getSystemNetworkInterface()
Get the network interface.
Definition: Network.cpp:131
size_t size() const
Gets the size of the bitset.
Definition: libgalois/include/galois/DynamicBitset.h:99
A structure representing an empty bitset.
Definition: libgalois/include/galois/DynamicBitset.h:413
unsigned int beginMaster
Definition: EdgeHostDecls.h:42
size_type size() const
Definition: PODResizeableArray.h:125
index_type * edge_dst
Definition: EdgeHostDecls.h:48
void reset(Ty &var, Ty val)
Definition: AtomicHelpers.h:202
send no data
Definition: DataCommMode.h:34
Definition: DataCommMode.h:38
Gluon communication substrate that handles communication given a user graph.
Definition: GluonEdgeSubstrate.h:67
void do_all(const RangeFunc &rangeMaker, FunctionTy &&fn, const Args &...args)
Standard do-all loop.
Definition: Loops.h:71
bool set(size_t index)
Set a bit in the bitset.
Definition: libgalois/include/galois/DynamicBitset.h:206
unsigned int node_data_type
Definition: EdgeHostDecls.h:34
Definition: DataCommMode.h:36
GlobalObject(const GlobalObject &)=delete
void start()
Definition: Timer.cpp:82
void on_each(FunctionTy &&fn, const Args &...args)
Low-level parallel loop.
Definition: Loops.h:86
size_t nnodes
Definition: EdgeHostDecls.h:39
Defines the GlobalObject class, which is a base class that other classes inherit from to be assigned ...
substrate::Barrier & getHostBarrier()
Returns a host barrier, which is a regular MPI-Like Barrier for all hosts.
Definition: libdist/src/Barrier.cpp:109
unsigned getOffset() const
Gets the current offset into the deserialize buffer.
Definition: Serialize.h:210
void resize(size_t bytes)
Definition: Serialize.h:103
DataCommMode enforcedDataMode
Specifies what format to send metadata in.
Definition: GluonSubstrate.cpp:29
unsigned numHosts
Definition: EdgeHostDecls.h:46
uint32_t evilPhase
Variable that keeps track of which network send/recv phase a program is currently on...
Definition: Network.cpp:36
Buffer for deserialization of data.
Definition: Serialize.h:147
auto iterate(C &cont)
Definition: Range.h:323
uint32_t get_run_num() const
Get the set run number.
Definition: GluonEdgeSubstrate.h:2424
pointer data()
Definition: PODResizeableArray.h:174
void gWarn(Args &&...args)
Prints a warning string from a sequence of things.
Definition: gIO.h:63
edge_data_type * edge_data
Definition: EdgeHostDecls.h:50
unsigned edge_data_type
Definition: EdgeHostDecls.h:35
void set_num_round(const uint32_t round)
Set the round number for use in the run identifier.
Definition: GluonEdgeSubstrate.h:2431
DataCommMode
Enumeration of data communication modes that can be used in synchronization.
Definition: DataCommMode.h:33
Contains the BareMPI enum and the command line option that controls bare MPI usage.
const auto & get_vec() const
Returns the underlying bitset representation to the user.
Definition: libgalois/include/galois/DynamicBitset.h:63
Definition: DataCommMode.h:39
Indicates if T is memory copyable.
Definition: ExtraTraits.h:64
Definition: DataCommMode.h:40
vTy & getVec()
Get the underlying vector storing the data of the deserialize buffer.
Definition: Serialize.h:244
void stop()
Definition: Timer.cpp:87
Galois Timer that automatically reports stats upon destruction Provides statistic interface around ti...
Definition: Timer.h:63
Definition: EdgeHostDecls.h:38
std::string get_run_identifier(std::string loop_name) const
Get a run identifier using the set run and set round and append to the passed in string.
Definition: GluonEdgeSubstrate.h:2457