Galois
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ParallelSTL.h
Go to the documentation of this file.
1 /*
2  * This file belongs to the Galois project, a C++ library for exploiting
3  * parallelism. The code is being released under the terms of the 3-Clause BSD
4  * License (a copy is located in LICENSE.txt at the top-level directory).
5  *
6  * Copyright (C) 2018, The University of Texas at Austin. All rights reserved.
7  * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS
8  * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY,
9  * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF
10  * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF
11  * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH
12  * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances
13  * shall University be liable for incidental, special, indirect, direct or
14  * consequential damages or loss of profits, interruption of business, or
15  * related expenses which may arise from use of Software or Documentation,
16  * including but not limited to those resulting from defects in Software and/or
17  * Documentation, or loss or inaccuracy of data of any kind.
18  */
19 
20 #ifndef GALOIS_PARALLELSTL_H
21 #define GALOIS_PARALLELSTL_H
22 
23 #include "galois/config.h"
25 #include "galois/NoDerefIterator.h"
26 #include "galois/runtime/Range.h"
27 #include "galois/Reduction.h"
28 #include "galois/Traits.h"
29 #include "galois/UserContext.h"
30 #include "galois/Threads.h"
31 #include "galois/worklists/Chunk.h"
32 
33 namespace galois {
35 // TODO: rename to gstl?
36 namespace ParallelSTL {
37 
38 template <class InputIterator, class Predicate>
39 size_t count_if(InputIterator first, InputIterator last, Predicate pred) {
40 
42 
43  galois::do_all(galois::iterate(first, last), [&](const auto& v) {
44  if (pred(v)) {
45  count += 1;
46  }
47  });
48 
49  return count.reduce();
50 }
51 
52 template <typename InputIterator, class Predicate>
54 
58  Predicate& f;
59 
60  find_if_helper(AccumulatorTy& a, Predicate& p) : accum(a), f(p) {}
61  void operator()(const InputIterator& v, UserContext<InputIterator>& ctx) {
62  if (f(*v)) {
63  *accum.getLocal() = v;
64  ctx.breakLoop();
65  }
66  }
67 };
68 
69 template <class InputIterator, class Predicate>
70 InputIterator find_if(InputIterator first, InputIterator last, Predicate pred) {
72  typedef typename HelperTy::AccumulatorTy AccumulatorTy;
74  AccumulatorTy accum;
75  HelperTy helper(accum, pred);
79  galois::parallel_break(), galois::wl<WL>());
80  for (unsigned i = 0; i < accum.size(); ++i) {
81  if (*accum.getRemote(i))
82  return **accum.getRemote(i);
83  }
84  return last;
85 }
86 
87 template <class Iterator>
88 Iterator choose_rand(Iterator first, Iterator last) {
89  size_t dist = std::distance(first, last);
90  if (dist)
91  std::advance(first, rand() % dist);
92  return first;
93 }
94 
95 template <class Compare>
96 struct sort_helper {
97  Compare comp;
98 
100  template <class value_type>
101  struct neq_to : public std::binary_function<value_type, value_type, bool> {
102  Compare comp;
103  neq_to(Compare c) : comp(c) {}
104  bool operator()(const value_type& a, const value_type& b) const {
105  return comp(a, b) || comp(b, a);
106  }
107  };
108 
109  sort_helper(Compare c) : comp(c) {}
110 
111  template <class RandomAccessIterator, class Context>
112  void operator()(std::pair<RandomAccessIterator, RandomAccessIterator> bounds,
113  Context& ctx) {
114  if (std::distance(bounds.first, bounds.second) <= 1024) {
115  std::sort(bounds.first, bounds.second, comp);
116  } else {
117  typedef
119  RandomAccessIterator pivot = choose_rand(bounds.first, bounds.second);
120  VT pv = *pivot;
121  pivot = std::partition(bounds.first, bounds.second,
122  std::bind(comp, std::placeholders::_1, pv));
123  // push the lower bit
124  if (bounds.first != pivot)
125  ctx.push(std::make_pair(bounds.first, pivot));
126  // adjust the upper bit
127  pivot =
128  std::find_if(pivot, bounds.second,
129  std::bind(neq_to<VT>(comp), std::placeholders::_1, pv));
130  // push the upper bit
131  if (bounds.second != pivot)
132  ctx.push(std::make_pair(pivot, bounds.second));
133  }
134  }
135 };
136 
137 template <typename RandomAccessIterator, class Predicate>
138 std::pair<RandomAccessIterator, RandomAccessIterator>
139 dual_partition(RandomAccessIterator first1, RandomAccessIterator last1,
140  RandomAccessIterator first2, RandomAccessIterator last2,
141  Predicate pred) {
142  typedef std::reverse_iterator<RandomAccessIterator> RI;
143  RI first3(last2), last3(first2);
144  while (true) {
145  while (first1 != last1 && pred(*first1))
146  ++first1;
147  if (first1 == last1)
148  break;
149  while (first3 != last3 && !pred(*first3))
150  ++first3;
151  if (first3 == last3)
152  break;
153  std::swap(*first1++, *first3++);
154  }
155  return std::make_pair(first1, first3.base());
156 }
157 
158 template <typename RandomAccessIterator, class Predicate>
160  typedef std::pair<RandomAccessIterator, RandomAccessIterator> RP;
162  RandomAccessIterator first, last;
163  RandomAccessIterator rfirst, rlast;
165  Predicate pred;
166  typename std::iterator_traits<RandomAccessIterator>::difference_type
168  return 1024;
169  }
170 
171  partition_helper_state(RandomAccessIterator f, RandomAccessIterator l,
172  Predicate p)
173  : first(f), last(l), rfirst(l), rlast(f), pred(p) {}
175  Lock.lock();
176  unsigned BS = std::min(BlockSize(), std::distance(first, last));
177  last -= BS;
178  RandomAccessIterator rv = last;
179  Lock.unlock();
180  return std::make_pair(rv, rv + BS);
181  }
183  Lock.lock();
184  unsigned BS = std::min(BlockSize(), std::distance(first, last));
185  RandomAccessIterator rv = first;
186  first += BS;
187  Lock.unlock();
188  return std::make_pair(rv, rv + BS);
189  }
190  void update(RP low, RP high) {
191  Lock.lock();
192  if (low.first != low.second) {
193  rfirst = std::min(rfirst, low.first);
194  rlast = std::max(rlast, low.second);
195  }
196  if (high.first != high.second) {
197  rfirst = std::min(rfirst, high.first);
198  rlast = std::max(rlast, high.second);
199  }
200  Lock.unlock();
201  }
202  };
203 
205 
207 
208  void operator()(unsigned, unsigned) {
209  RP high, low;
210  do {
211  RP parts = dual_partition(low.first, low.second, high.first, high.second,
212  state->pred);
213  low.first = parts.first;
214  high.second = parts.second;
215  if (low.first == low.second)
216  low = state->takeLow();
217  if (high.first == high.second)
218  high = state->takeHigh();
219  } while (low.first != low.second && high.first != high.second);
220  state->update(low, high);
221  }
222 };
223 
224 template <class RandomAccessIterator, class Predicate>
225 RandomAccessIterator partition(RandomAccessIterator first,
226  RandomAccessIterator last, Predicate pred) {
227  if (std::distance(first, last) <= 1024)
228  return std::partition(first, last, pred);
230  typename P::partition_helper_state s(first, last, pred);
231  on_each(P(&s));
232  if (s.rfirst == first && s.rlast == last) { // perfect !
233  // abort();
234  return s.first;
235  }
236  return std::partition(s.rfirst, s.rlast, pred);
237 }
238 
239 struct pair_dist {
240  template <typename RP>
241  bool operator()(const RP& x, const RP& y) {
242  return std::distance(x.first, x.second) > std::distance(y.first, y.second);
243  }
244 };
245 
246 template <class RandomAccessIterator, class Compare>
247 void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp) {
248  if (std::distance(first, last) <= 1024) {
249  std::sort(first, last, comp);
250  return;
251  }
253 
254  for_each(galois::iterate({std::make_pair(first, last)}),
256  galois::wl<WL>());
257 }
258 
259 template <class RandomAccessIterator>
260 void sort(RandomAccessIterator first, RandomAccessIterator last) {
262  first, last,
263  std::less<
265 }
266 
267 template <class InputIterator, class T, typename BinaryOperation>
268 T accumulate(InputIterator first, InputIterator last, const T& identity,
269  const BinaryOperation& binary_op) {
270 
271  auto id_fn = [=]() { return identity; };
272 
273  auto r = make_reducible(binary_op, id_fn);
274 
275  do_all(galois::iterate(first, last), [&](const T& v) { r.update(v); });
276 
277  return r.reduce();
278 }
279 
280 template <class InputIterator, class T>
281 T accumulate(InputIterator first, InputIterator last, const T& identity = T()) {
282  return accumulate(first, last, identity, std::plus<T>());
283 }
284 
285 template <class InputIterator, class MapFn, class T, class ReduceFn>
286 T map_reduce(InputIterator first, InputIterator last, MapFn map_fn,
287  ReduceFn reduce_fn, const T& identity) {
288 
289  auto id_fn = [=]() { return identity; };
290 
291  auto r = make_reducible(reduce_fn, id_fn);
292 
293  galois::do_all(galois::iterate(first, last),
294  [&](const auto& v) { r.update(map_fn(v)); });
295 
296  return r.reduce();
297 }
298 
299 template <typename I>
300 std::enable_if_t<!std::is_scalar<internal::Val_ty<I>>::value> destroy(I first,
301  I last) {
302  using T = internal::Val_ty<I>;
303  do_all(iterate(first, last), [=](T& i) { (&i)->~T(); });
304 }
305 
306 template <class I>
307 std::enable_if_t<std::is_scalar<internal::Val_ty<I>>::value> destroy(I, I) {}
308 
313 template <class InputIt, class OutputIt>
314 OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first) {
315  using ValueType = typename std::iterator_traits<InputIt>::value_type;
316 
317  size_t sizeOfVector = std::distance(first, last);
318 
319  // only bother with parallel execution if vector is larger than some size
320  if (sizeOfVector >= 1024) {
321  const size_t numBlocks = galois::getActiveThreads();
322  const size_t blockSize = (sizeOfVector + numBlocks - 1) / numBlocks;
323  assert(numBlocks * blockSize >= sizeOfVector);
324 
325  std::vector<ValueType> localSums(numBlocks);
326 
327  // get the block sums
329  galois::iterate((size_t)0, numBlocks), [&](const size_t& block) {
330  // block start can extend past sizeOfVector if doesn't divide evenly
331  size_t blockStart = std::min(block * blockSize, sizeOfVector);
332  size_t blockEnd = std::min((block + 1) * blockSize, sizeOfVector);
333  assert(blockStart <= blockEnd);
334 
335  // partial accumulation of each block done now
336  std::partial_sum(first + blockStart, first + blockEnd,
337  d_first + blockStart);
338  // save the last number in this block: used for block prefix sum
339  if (blockEnd > 0) {
340  localSums[block] = *(d_first + blockEnd - 1);
341  } else {
342  localSums[block] = 0;
343  }
344  });
345 
346  // bulkPrefix[i] holds the starting sum of a particular block i
347  std::vector<ValueType> bulkPrefix(numBlocks);
348  // exclusive scan on local sums to get number to add to each block's
349  // set of indices
350  // Not using std::exclusive_scan because apparently it doesn't work for
351  // some compilers
352  ValueType runningSum = 0;
353  for (size_t i = 0; i < numBlocks; i++) {
354  bulkPrefix[i] = runningSum;
355  runningSum += localSums[i];
356  }
357 
359  galois::iterate((size_t)0, numBlocks), [&](const size_t& block) {
360  // add the sums of previous elements to blocks
361  ValueType numToAdd = bulkPrefix[block];
362  size_t blockStart = std::min(block * blockSize, sizeOfVector);
363  size_t blockEnd = std::min((block + 1) * blockSize, sizeOfVector);
364  assert(blockStart <= blockEnd);
365 
366  // transform applies addition to appropriate range
367  std::transform(d_first + blockStart, d_first + blockEnd,
368  d_first + blockStart,
369  [&](ValueType& val) { return val + numToAdd; });
370  });
371 
372  // return the iterator past the last element written
373  return d_first + sizeOfVector;
374  } else {
375  // vector is small; do it serially using standard library
376  return std::partial_sum(first, last, d_first);
377  }
378 }
379 
380 } // end namespace ParallelSTL
381 } // end namespace galois
382 #endif
galois::optional< InputIterator > ElementTy
Definition: ParallelSTL.h:55
std::pair< RandomAccessIterator, RandomAccessIterator > dual_partition(RandomAccessIterator first1, RandomAccessIterator last1, RandomAccessIterator first2, RandomAccessIterator last2, Predicate pred)
Definition: ParallelSTL.h:139
bool operator()(const RP &x, const RP &y)
Definition: ParallelSTL.h:241
Definition: Traits.h:235
void operator()(const InputIterator &v, UserContext< InputIterator > &ctx)
Definition: ParallelSTL.h:61
partition_helper_state(RandomAccessIterator f, RandomAccessIterator l, Predicate p)
Definition: ParallelSTL.h:171
std::pair< RandomAccessIterator, RandomAccessIterator > RP
Definition: ParallelSTL.h:160
Definition: ParallelSTL.h:159
unsigned int getActiveThreads() noexcept
Returns the number of threads in use.
Definition: Threads.cpp:37
Definition: ParallelSTL.h:96
RandomAccessIterator rfirst
Definition: ParallelSTL.h:163
Iterator choose_rand(Iterator first, Iterator last)
Definition: ParallelSTL.h:88
void operator()(std::pair< RandomAccessIterator, RandomAccessIterator > bounds, Context &ctx)
Definition: ParallelSTL.h:112
substrate::PerThreadStorage< ElementTy > AccumulatorTy
Definition: ParallelSTL.h:56
T * getLocal()
Definition: PerThreadStorage.h:128
void operator()(unsigned, unsigned)
Definition: ParallelSTL.h:208
substrate::SimpleLock Lock
Definition: ParallelSTL.h:164
size_t count_if(InputIterator first, InputIterator last, Predicate pred)
Definition: ParallelSTL.h:39
Definition: Traits.h:229
Compare comp
Definition: ParallelSTL.h:97
Compare comp
Definition: ParallelSTL.h:102
void unlock() const
Definition: SimpleLock.h:68
void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
Definition: ParallelSTL.h:247
void update(RP low, RP high)
Definition: ParallelSTL.h:190
Definition: Traits.h:260
Galois version of boost::optional.
Definition: optional.h:34
bool operator()(const value_type &a, const value_type &b) const
Definition: ParallelSTL.h:104
const Ty max(std::atomic< Ty > &a, const Ty &b)
Definition: AtomicHelpers.h:40
Definition: PerThreadStorage.h:88
Definition: ParallelSTL.h:239
auto make_reducible(const MergeFn &mergeFn, const IdFn &idFn)
make_reducible creates a Reducible from a merge function and identity function.
Definition: Reduction.h:125
RandomAccessIterator last
Definition: ParallelSTL.h:162
This is the object passed to the user&#39;s parallel loop.
Definition: UserContext.h:37
partition_helper_state * state
Definition: ParallelSTL.h:206
RandomAccessIterator rlast
Definition: ParallelSTL.h:163
void do_all(const RangeFunc &rangeMaker, FunctionTy &&fn, const Args &...args)
Standard do-all loop.
Definition: Loops.h:71
T map_reduce(InputIterator first, InputIterator last, MapFn map_fn, ReduceFn reduce_fn, const T &identity)
Definition: ParallelSTL.h:286
OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first)
Does a partial sum from first -&gt; last and writes the results to the d_first iterator.
Definition: ParallelSTL.h:314
const Ty min(std::atomic< Ty > &a, const Ty &b)
Definition: AtomicHelpers.h:70
sort_helper(Compare c)
Definition: ParallelSTL.h:109
RandomAccessIterator partition(RandomAccessIterator first, RandomAccessIterator last, Predicate pred)
Definition: ParallelSTL.h:225
void on_each(FunctionTy &&fn, const Args &...args)
Low-level parallel loop.
Definition: Loops.h:86
NoDerefIterator< Iterator > make_no_deref_iterator(Iterator it)
Convenience function to create NoDerefIterator.
Definition: NoDerefIterator.h:48
void lock() const
Definition: SimpleLock.h:55
std::iterator_traits< RandomAccessIterator >::difference_type BlockSize()
Definition: ParallelSTL.h:167
T & reduce()
Returns the final reduction value.
Definition: Reduction.h:102
std::enable_if_t<!std::is_scalar< internal::Val_ty< I > >::value > destroy(I first, I last)
Definition: ParallelSTL.h:300
SimpleLock is a spinlock.
Definition: SimpleLock.h:36
void for_each(const RangeFunc &rangeMaker, FunctionTy &&fn, const Args &...args)
Galois unordered set iterator.
Definition: Loops.h:52
Definition: ParallelSTL.h:53
void breakLoop()
Signal break in parallel loop, current iteration continues untill natural termination.
Definition: UserContext.h:78
RandomAccessIterator first
Definition: ParallelSTL.h:162
auto iterate(C &cont)
Definition: Range.h:323
InputIterator find_if(InputIterator first, InputIterator last, Predicate pred)
Definition: ParallelSTL.h:70
AccumulatorTy & accum
Definition: ParallelSTL.h:57
find_if_helper(AccumulatorTy &a, Predicate &p)
Definition: ParallelSTL.h:60
Predicate & f
Definition: ParallelSTL.h:58
T value_type
Definition: Executor_ParaMeter.h:111
T accumulate(InputIterator first, InputIterator last, const T &identity, const BinaryOperation &binary_op)
Definition: ParallelSTL.h:268
partition_helper(partition_helper_state *s)
Definition: ParallelSTL.h:204
neq_to(Compare c)
Definition: ParallelSTL.h:103
Not equal in terms of less-than.
Definition: ParallelSTL.h:101
internal::ChunkMaster< T, ConExtLinkedQueue, true, false, ChunkSize, Concurrent > PerSocketChunkFIFO
Distributed chunked FIFO.
Definition: Chunk.h:296