Galois
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Executor_ParaMeter.h
Go to the documentation of this file.
1 /*
2  * This file belongs to the Galois project, a C++ library for exploiting
3  * parallelism. The code is being released under the terms of the 3-Clause BSD
4  * License (a copy is located in LICENSE.txt at the top-level directory).
5  *
6  * Copyright (C) 2018, The University of Texas at Austin. All rights reserved.
7  * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS
8  * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY,
9  * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF
10  * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF
11  * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH
12  * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances
13  * shall University be liable for incidental, special, indirect, direct or
14  * consequential damages or loss of profits, interruption of business, or
15  * related expenses which may arise from use of Software or Documentation,
16  * including but not limited to those resulting from defects in Software and/or
17  * Documentation, or loss or inaccuracy of data of any kind.
18  */
19 
20 #ifndef GALOIS_RUNTIME_EXECUTOR_PARAMETER_H
21 #define GALOIS_RUNTIME_EXECUTOR_PARAMETER_H
22 
23 #include <algorithm>
24 #include <cstdio>
25 #include <cstdlib>
26 #include <ctime>
27 #include <deque>
28 #include <random>
29 #include <vector>
30 
31 #include "galois/config.h"
32 #include "galois/gIO.h"
33 #include "galois/Mem.h"
34 #include "galois/Reduction.h"
35 #include "galois/runtime/Context.h"
40 #include "galois/Traits.h"
42 
43 namespace galois {
44 namespace runtime {
45 
46 namespace ParaMeter {
47 
48 struct StepStatsBase {
49  static inline void printHeader(FILE* out) {
50  fprintf(out,
51  "LOOPNAME, STEP, PARALLELISM, WORKLIST_SIZE, NEIGHBORHOOD_SIZE\n");
52  }
53 
54  static inline void dump(FILE* out, const char* loopname, size_t step,
55  size_t parallelism, size_t wlSize, size_t nhSize) {
56  assert(out && "StepStatsBase::dump() file handle is null");
57  fprintf(out, "%s, %zu, %zu, %zu, %zu\n", loopname, step, parallelism,
58  wlSize, nhSize);
59  }
60 };
61 
64 
65  const size_t step;
67  const size_t wlSize;
68 
69  explicit OrderedStepStats(size_t _step, size_t _wlsz)
70  : Base(), step(_step), parallelism(), wlSize(_wlsz) {}
71 
72  explicit OrderedStepStats(size_t _step, size_t par, size_t _wlsz)
73  : Base(), step(_step), parallelism(), wlSize(_wlsz) {
74  parallelism += par;
75  }
76 
77  void dump(FILE* out, const char* loopname) {
78  Base::dump(out, loopname, step, parallelism.reduce(), wlSize, 0ul);
79  }
80 };
81 
84 
85  size_t step;
89 
90  UnorderedStepStats(void) : Base(), step(0) {}
91 
92  void nextStep(void) {
93  ++step;
95  wlSize.reset();
96  nhSize.reset();
97  }
98 
99  void dump(FILE* out, const char* loopname) {
100  Base::dump(out, loopname, step, parallelism.reduce(), wlSize.reduce(),
101  nhSize.reduce());
102  }
103 };
104 
105 // Single ParaMeter stats file per run of an app
106 // which includes all instances of for_each loops
107 // run with ParaMeter Executor
108 FILE* getStatsFile(void);
109 void closeStatsFile(void);
110 
111 template <typename T>
112 class FIFO_WL {
114 protected:
116 
119 
120 public:
121  FIFO_WL(void) : curr(new PTcont()), next(new PTcont()) {}
123  ~FIFO_WL(void) {
124  delete curr;
125  curr = nullptr;
126  delete next;
127  next = nullptr;
128  }
129 
131 
132  void pushNext(const T& item) { next->get().push_back(item); }
133 
134  void nextStep(void) {
135  std::swap(curr, next);
137  }
138 
139  bool empty(void) const { return next->empty_all(); }
140 };
141 
142 template <typename T>
143 class RAND_WL : public FIFO_WL<T> {
144  using Base = FIFO_WL<T>;
146 public:
147  auto iterateCurr(void) {
149  [&](int, int) {
150  auto& lwl = Base::curr->get();
151 
152  std::random_device r;
153  std::mt19937 rng(r());
154  std::shuffle(lwl.begin(), lwl.end(), rng);
155  },
156  std::make_tuple());
157 
159  }
160 };
161 
162 template <typename T>
163 class LIFO_WL : public FIFO_WL<T> {
164  using Base = FIFO_WL<T>;
165 
166 public:
167  auto iterateCurr(void) {
168 
169  // TODO: use reverse iterator instead of std::reverse
171  [&](int, int) {
172  auto& lwl = Base::curr->get();
173  std::reverse(lwl.begin(), lwl.end());
174  },
175  std::make_tuple());
176 
178  }
179 };
180 
181 enum class SchedType { FIFO, RAND, LIFO };
182 
183 template <typename T, SchedType SCHED>
184 struct ChooseWL {};
185 
186 template <typename T>
187 struct ChooseWL<T, SchedType::FIFO> {
188  using type = FIFO_WL<T>;
189 };
190 
191 template <typename T>
192 struct ChooseWL<T, SchedType::LIFO> {
193  using type = LIFO_WL<T>;
194 };
195 
196 template <typename T>
197 struct ChooseWL<T, SchedType::RAND> {
198  using type = RAND_WL<T>;
199 };
200 
201 template <class T, class FunctionTy, class ArgsTy>
203 
204  using value_type = T;
205  using GenericWL = typename get_trait_type<wl_tag, ArgsTy>::type::type;
206  using WorkListTy = typename GenericWL::template retype<T>;
207  using dbg = galois::debug<1>;
208 
209  constexpr static bool needsStats = !has_trait<no_stats_tag, ArgsTy>();
210  constexpr static bool needsPush = !has_trait<no_pushes_tag, ArgsTy>();
211  constexpr static bool needsAborts = !has_trait<no_conflicts_tag, ArgsTy>();
212  constexpr static bool needsPia = has_trait<per_iter_alloc_tag, ArgsTy>();
213  constexpr static bool needsBreak = has_trait<parallel_break_tag, ArgsTy>();
214 
215  struct IterationContext {
216  T item;
217  bool doabort;
220 
221  explicit IterationContext(const T& v) : item(v), doabort(false) {}
222 
223  void reset() {
224  doabort = false;
225  if (needsPia)
226  facing.resetAlloc();
227 
228  if (needsPush)
229  facing.getPushBuffer().clear();
230  }
231  };
232 
234 
235 private:
236  PWL m_wl;
237  FunctionTy m_func;
238  const char* loopname;
239  FILE* m_statsFile;
241  galois::GReduceLogicalOr m_broken;
242 
243  IterationContext* newIteration(const T& item) {
244  IterationContext* it = m_iterAlloc.allocate(1);
245  assert(it && "IterationContext allocation failed");
246 
247  m_iterAlloc.construct(it, item);
248 
249  it->reset();
250  return it;
251  }
252 
253  unsigned abortIteration(IterationContext* it) {
254  assert(it && "nullptr arg");
255  assert(it->doabort &&
256  "aborting an iteration without setting its doabort flag");
257 
258  unsigned numLocks = it->ctx.cancelIteration();
259  it->reset();
260 
261  m_wl.pushNext(it);
262  return numLocks;
263  }
264 
265  unsigned commitIteration(IterationContext* it) {
266  assert(it && "nullptr arg");
267 
268  if (needsPush) {
269  for (const auto& item : it->facing.getPushBuffer()) {
270  IterationContext* child = newIteration(item);
271  m_wl.pushNext(child);
272  }
273  }
274 
275  unsigned numLocks = it->ctx.commitIteration();
276  it->reset();
277 
278  m_iterAlloc.destroy(it);
279  m_iterAlloc.deallocate(it, 1);
280 
281  return numLocks;
282  }
283 
284 private:
285  void runSimpleStep(UnorderedStepStats& stats) {
287  m_wl.iterateCurr(),
288  [&, this](IterationContext* it) {
289  stats.wlSize += 1;
290 
291  setThreadContext(&(it->ctx));
292 
293  m_func(it->item, it->facing.data());
294  stats.parallelism += 1;
295  unsigned nh = commitIteration(it);
296  stats.nhSize += nh;
297 
298  setThreadContext(nullptr);
299  },
300  std::make_tuple(galois::steal(), galois::loopname("ParaM-Simple")));
301  }
302 
303  void runCautiousStep(UnorderedStepStats& stats){galois::runtime::do_all_gen(
304  m_wl.iterateCurr(),
305  [&, this](IterationContext* it) {
306  stats.wlSize += 1;
307 
308  setThreadContext(&(it->ctx));
309  bool broke = false;
310 
311  if (needsBreak) {
312  it->facing.setBreakFlag(&broke);
313  }
314 #ifdef GALOIS_USE_LONGJMP_ABORT
315  int flag = 0;
316  if ((flag = setjmp(execFrame)) == 0) {
317  m_func(it->item, it->facing.data());
318 
319  } else {
320 #elif GALOIS_USE_EXCEPTION_ABORT
321  try {
322  m_func(it->item, it->facing.data());
323 
324  } catch (const ConflictFlag& flag) {
325 #endif
326  clearConflictLock();
327  switch (flag) {
329  it->doabort = true;
330  break;
331  default:
332  std::abort();
333  }
334  }
335 
336  if (needsBreak && broke) {
337  m_broken.update(true);
338  }
339 
340  setThreadContext(nullptr);
341  },
342  std::make_tuple(galois::steal(), galois::loopname("ParaM-Expand-NH")));
343 
345  m_wl.iterateCurr(),
346  [&, this](IterationContext* it) {
347  if (it->doabort) {
348  abortIteration(it);
349 
350  } else {
351  stats.parallelism += 1;
352  unsigned nh = commitIteration(it);
353  stats.nhSize += nh;
354  }
355  },
356  std::make_tuple(galois::steal(), galois::loopname("ParaM-Commit")));
357 }
358 
359 template <typename R>
360 void execute(const R& range) {
361 
363  [&, this](const unsigned, const unsigned) {
364  auto p = range.local_pair();
365 
366  for (auto i = p.first; i != p.second; ++i) {
367  IterationContext* it = newIteration(*i);
368  m_wl.pushNext(it);
369  }
370  },
371  std::make_tuple());
372 
373  UnorderedStepStats stats;
374 
375  while (!m_wl.empty()) {
376 
377  m_wl.nextStep();
378 
379  if (needsAborts) {
380  runCautiousStep(stats);
381 
382  } else {
383  runSimpleStep(stats);
384  }
385 
386  // dbg::print("Step: ", stats.step, ", Parallelism: ",
387  // stats.parallelism.reduce());
388  assert(stats.parallelism.reduce() && "ERROR: No Progress");
389 
390  stats.dump(m_statsFile, loopname);
391  stats.nextStep();
392 
393  if (needsBreak && m_broken.reduce()) {
394  break;
395  }
396 
397  } // end while
398 
399  closeStatsFile();
400 }
401 
402 public:
403 ParaMeterExecutor(const FunctionTy& f, const ArgsTy& args)
404  : m_func(f), loopname(galois::internal::getLoopName(args)),
405  m_statsFile(getStatsFile()) {}
406 
407 // called serially once
408 template <typename RangeTy>
409 void init(const RangeTy& range) {
410  execute(range);
411 }
412 
413 // called once on each thread followed by a barrier
414 template <typename RangeTy>
415 void initThread(const RangeTy&) const {}
416 
417 void operator()(void) {}
418 
419 }; // namespace ParaMeter
420 
421 } // namespace runtime
422 } // namespace galois
423 
424 namespace worklists {
425 
426 template <class T = int, runtime::ParaMeter::SchedType SCHED =
428 class ParaMeter {
429 public:
430  template <bool _concurrent>
432 
433  template <typename _T>
435 
436  using value_type = T;
437 
438  constexpr static const runtime::ParaMeter::SchedType SCHEDULE = SCHED;
439 
443 };
444 
445 } // namespace worklists
446 
447 namespace runtime {
448 
449 // hookup into galois::for_each. Invoke galois::for_each with
450 // wl<galois::worklists::ParaMeter<> >
451 template <class T, class FunctionTy, class ArgsTy>
452 struct ForEachExecutor<galois::worklists::ParaMeter<T>, FunctionTy, ArgsTy>
453  : public ParaMeter::ParaMeterExecutor<T, FunctionTy, ArgsTy> {
454  using SuperTy = ParaMeter::ParaMeterExecutor<T, FunctionTy, ArgsTy>;
455  ForEachExecutor(const FunctionTy& f, const ArgsTy& args) : SuperTy(f, args) {}
456 };
457 
459 template <typename R, typename F, typename ArgsTuple>
460 void for_each_ParaMeter(const R& range, const F& func,
461  const ArgsTuple& argsTuple) {
462 
463  using T = typename R::values_type;
464 
466  argsTuple, std::make_tuple(wl_tag{}),
467  std::make_tuple(wl<galois::worklists::ParaMeter<>>()));
468 
469  using Tpl_ty = decltype(tpl);
470 
471  using Exec = runtime::ParaMeter::ParaMeterExecutor<T, F, Tpl_ty>;
472  Exec exec(func, tpl);
473 
474  exec.execute(range);
475 }
476 
477 } // end namespace runtime
478 } // end namespace galois
479 #endif
480 
481 /*
482  * requirements:
483  * - support random and fifo schedules, maybe lifo
484  * - write stats to a single file.
485  * - support multi-threaded execution
486  *
487  * interface:
488  * - file set by environment variable
489  * - ParaMeter invoked by choosing wl type, e.g. ParaMeter<>::with_rand, or
490  * ParaMeter<>::fifo
491  */
void pushNext(const T &item)
Definition: Executor_ParaMeter.h:132
GAccumulator< size_t > parallelism
Definition: Executor_ParaMeter.h:86
const size_t wlSize
Definition: Executor_ParaMeter.h:67
Definition: Executor_ParaMeter.h:122
auto iterateCurr(void)
Definition: Executor_ParaMeter.h:130
void setThreadContext(SimpleRuntimeContext *n)
used by the parallel code to set up conflict detection per thread
Definition: Context.cpp:31
static void dump(FILE *out, const char *loopname, size_t step, size_t parallelism, size_t wlSize, size_t nhSize)
Definition: Executor_ParaMeter.h:54
GAccumulator< size_t > wlSize
Definition: Executor_ParaMeter.h:87
void reset()
Definition: Reduction.h:113
~FIFO_WL(void)
Definition: Executor_ParaMeter.h:123
const size_t step
Definition: Executor_ParaMeter.h:65
void for_each_ParaMeter(const R &range, const F &func, const ArgsTuple &argsTuple)
invoke ParaMeter tool to execute a for_each style loop
Definition: Executor_ParaMeter.h:460
thread_local std::jmp_buf execFrame
Definition: Context.cpp:29
size_t step
Definition: Executor_ParaMeter.h:85
void init(const RangeTy &range)
Definition: Executor_ParaMeter.h:409
void nextStep(void)
Definition: Executor_ParaMeter.h:92
Definition: Traits.h:197
container_type & get()
Definition: PerThreadContainer.h:315
void closeStatsFile(void)
Definition: ParaMeter.cpp:100
Definition: Executor_ParaMeter.h:82
GAccumulator< size_t > parallelism
Definition: Executor_ParaMeter.h:66
void initThread(const RangeTy &) const
Definition: Executor_ParaMeter.h:415
Definition: Executor_ParaMeter.h:62
LocalRange< T > makeLocalRange(T &obj)
Definition: Range.h:75
Definition: Executor_ParaMeter.h:143
static void printHeader(FILE *out)
Definition: Executor_ParaMeter.h:49
auto iterateCurr(void)
Definition: Executor_ParaMeter.h:147
GAccumulator< size_t > nhSize
Definition: Executor_ParaMeter.h:88
Returns the type associated with the given trait in a tuple.
Definition: Traits.h:121
void nextStep(void)
Definition: Executor_ParaMeter.h:134
ParaMeterExecutor(const FunctionTy &f, const ArgsTy &args)
Definition: Executor_ParaMeter.h:403
void clear_all_parallel(void)
Definition: PerThreadContainer.h:423
ForEachExecutor(const FunctionTy &f, const ArgsTy &args)
Definition: Executor_ParaMeter.h:455
void on_each_gen(FunctionTy &&fn, const TupleTy &tpl)
Definition: Executor_OnEach.h:74
ParaMeter::ParaMeterExecutor< T, FunctionTy, ArgsTy > SuperTy
Definition: Executor_ParaMeter.h:454
void construct(U *p, Args &&...args) const
Definition: runtime/Mem.h:766
SchedType
Definition: Executor_ParaMeter.h:181
bool empty_all() const
Definition: PerThreadContainer.h:429
pointer allocate(size_type size)
Definition: runtime/Mem.h:754
OrderedStepStats(size_t _step, size_t _wlsz)
Definition: Executor_ParaMeter.h:69
s_wl< T, Args...> wl(Args &&...args)
Definition: Traits.h:219
Definition: Executor_ParaMeter.h:184
T value_type
Definition: Executor_ParaMeter.h:436
void do_all_gen(const R &range, F &&func, const ArgsTuple &argsTuple)
Definition: Executor_DoAll.h:530
bool empty(void) const
Definition: Executor_ParaMeter.h:139
PTcont * curr
Definition: Executor_ParaMeter.h:117
Definition: Traits.h:206
void dump(FILE *out, const char *loopname)
Definition: Executor_ParaMeter.h:99
ConflictFlag
Definition: libgalois/include/galois/runtime/Context.h:41
void destroy(pointer ptr) const
Definition: runtime/Mem.h:770
Definition: Executor_ParaMeter.h:112
void reset(Ty &var, Ty val)
Definition: AtomicHelpers.h:202
void operator()(void)
Definition: Executor_ParaMeter.h:417
void dump(FILE *out, const char *loopname)
Definition: Executor_ParaMeter.h:77
auto iterateCurr(void)
Definition: Executor_ParaMeter.h:167
Definition: gIO.h:113
Definition: libgalois/include/galois/runtime/Context.h:135
IterationContext(const T &v)
Definition: Executor_ParaMeter.h:128
T & reduce()
Returns the final reduction value.
Definition: Reduction.h:102
void deallocate(pointer ptr, size_type GALOIS_USED_ONLY_IN_DEBUG(len))
Definition: runtime/Mem.h:760
Definition: libgalois/include/galois/runtime/Context.h:42
Wrapper< T, std::deque< T >, true > LIFO
Definition: Simple.h:89
PTcont * next
Definition: Executor_ParaMeter.h:118
Wrapper< T, std::deque< T >, false > FIFO
Definition: Simple.h:83
OrderedStepStats(size_t _step, size_t par, size_t _wlsz)
Definition: Executor_ParaMeter.h:72
Definition: PerThreadContainer.h:454
Definition: Executor_ParaMeter.h:48
logical OR reduction
Definition: Reduction.h:217
FIFO_WL(void)
Definition: Executor_ParaMeter.h:121
void update(T &&rhs)
Updates the thread local value by applying the reduction operator to current and newly provided value...
Definition: Reduction.h:90
Definition: Executor_ParaMeter.h:202
FILE * getStatsFile(void)
Definition: ParaMeter.cpp:96
Definition: Executor_ParaMeter.h:163
Definition: Executor_ParaMeter.h:428
constexpr auto get_default_trait_values(std::index_sequence<> GALOIS_UNUSED(seq), S GALOIS_UNUSED(source), T GALOIS_UNUSED(tags), D GALOIS_UNUSED(defaults))
Returns a tuple that has an element from defaults[i] for every type from tags[i] missing in source...
Definition: Traits.h:148
UnorderedStepStats(void)
Definition: Executor_ParaMeter.h:90