20 #ifndef GALOIS_PARALLELSTL_H
21 #define GALOIS_PARALLELSTL_H
23 #include "galois/config.h"
36 namespace ParallelSTL {
38 template <
class InputIterator,
class Predicate>
39 size_t count_if(InputIterator first, InputIterator last, Predicate pred) {
52 template <
typename InputIterator,
class Predicate>
69 template <
class InputIterator,
class Predicate>
70 InputIterator
find_if(InputIterator first, InputIterator last, Predicate pred) {
72 typedef typename HelperTy::AccumulatorTy AccumulatorTy;
75 HelperTy helper(accum, pred);
80 for (
unsigned i = 0; i < accum.size(); ++i) {
81 if (*accum.getRemote(i))
82 return **accum.getRemote(i);
87 template <
class Iterator>
89 size_t dist = std::distance(first, last);
91 std::advance(first, rand() % dist);
95 template <
class Compare>
100 template <
class value_type>
101 struct neq_to :
public std::binary_function<value_type, value_type, bool> {
111 template <
class RandomAccessIterator,
class Context>
112 void operator()(std::pair<RandomAccessIterator, RandomAccessIterator> bounds,
114 if (std::distance(bounds.first, bounds.second) <= 1024) {
119 RandomAccessIterator pivot =
choose_rand(bounds.first, bounds.second);
122 std::bind(
comp, std::placeholders::_1, pv));
124 if (bounds.first != pivot)
125 ctx.push(std::make_pair(bounds.first, pivot));
131 if (bounds.second != pivot)
132 ctx.push(std::make_pair(pivot, bounds.second));
137 template <
typename RandomAccessIterator,
class Predicate>
138 std::pair<RandomAccessIterator, RandomAccessIterator>
140 RandomAccessIterator first2, RandomAccessIterator last2,
142 typedef std::reverse_iterator<RandomAccessIterator> RI;
143 RI first3(last2), last3(first2);
145 while (first1 != last1 && pred(*first1))
149 while (first3 != last3 && !pred(*first3))
153 std::swap(*first1++, *first3++);
155 return std::make_pair(first1, first3.base());
158 template <
typename RandomAccessIterator,
class Predicate>
160 typedef std::pair<RandomAccessIterator, RandomAccessIterator>
RP;
166 typename std::iterator_traits<RandomAccessIterator>::difference_type
178 RandomAccessIterator rv =
last;
180 return std::make_pair(rv, rv + BS);
185 RandomAccessIterator rv =
first;
188 return std::make_pair(rv, rv + BS);
192 if (low.first != low.second) {
196 if (high.first != high.second) {
213 low.first = parts.first;
214 high.second = parts.second;
215 if (low.first == low.second)
217 if (high.first == high.second)
219 }
while (low.first != low.second && high.first != high.second);
224 template <
class RandomAccessIterator,
class Predicate>
225 RandomAccessIterator
partition(RandomAccessIterator first,
226 RandomAccessIterator last, Predicate pred) {
227 if (std::distance(first, last) <= 1024)
230 typename P::partition_helper_state s(first, last, pred);
232 if (s.rfirst == first && s.rlast == last) {
240 template <
typename RP>
242 return std::distance(x.first, x.second) > std::distance(y.first, y.second);
246 template <
class RandomAccessIterator,
class Compare>
247 void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp) {
248 if (std::distance(first, last) <= 1024) {
259 template <
class RandomAccessIterator>
260 void sort(RandomAccessIterator first, RandomAccessIterator last) {
267 template <
class InputIterator,
class T,
typename BinaryOperation>
268 T
accumulate(InputIterator first, InputIterator last,
const T& identity,
269 const BinaryOperation& binary_op) {
271 auto id_fn = [=]() {
return identity; };
280 template <
class InputIterator,
class T>
281 T
accumulate(InputIterator first, InputIterator last,
const T& identity = T()) {
282 return accumulate(first, last, identity, std::plus<T>());
285 template <
class InputIterator,
class MapFn,
class T,
class ReduceFn>
286 T
map_reduce(InputIterator first, InputIterator last, MapFn map_fn,
287 ReduceFn reduce_fn,
const T& identity) {
289 auto id_fn = [=]() {
return identity; };
294 [&](
const auto& v) { r.update(map_fn(v)); });
299 template <
typename I>
300 std::enable_if_t<!std::is_scalar<internal::Val_ty<I>>::value>
destroy(I first,
302 using T = internal::Val_ty<I>;
307 std::enable_if_t<std::is_scalar<internal::Val_ty<I>>::value>
destroy(I, I) {}
313 template <
class InputIt,
class OutputIt>
314 OutputIt
partial_sum(InputIt first, InputIt last, OutputIt d_first) {
317 size_t sizeOfVector = std::distance(first, last);
320 if (sizeOfVector >= 1024) {
322 const size_t blockSize = (sizeOfVector + numBlocks - 1) / numBlocks;
323 assert(numBlocks * blockSize >= sizeOfVector);
325 std::vector<ValueType> localSums(numBlocks);
331 size_t blockStart =
std::min(block * blockSize, sizeOfVector);
332 size_t blockEnd =
std::min((block + 1) * blockSize, sizeOfVector);
333 assert(blockStart <= blockEnd);
337 d_first + blockStart);
340 localSums[block] = *(d_first + blockEnd - 1);
342 localSums[block] = 0;
347 std::vector<ValueType> bulkPrefix(numBlocks);
352 ValueType runningSum = 0;
353 for (
size_t i = 0; i < numBlocks; i++) {
354 bulkPrefix[i] = runningSum;
355 runningSum += localSums[i];
361 ValueType numToAdd = bulkPrefix[block];
362 size_t blockStart =
std::min(block * blockSize, sizeOfVector);
363 size_t blockEnd =
std::min((block + 1) * blockSize, sizeOfVector);
364 assert(blockStart <= blockEnd);
367 std::transform(d_first + blockStart, d_first + blockEnd,
368 d_first + blockStart,
369 [&](ValueType& val) {
return val + numToAdd; });
373 return d_first + sizeOfVector;
galois::optional< InputIterator > ElementTy
Definition: ParallelSTL.h:55
std::pair< RandomAccessIterator, RandomAccessIterator > dual_partition(RandomAccessIterator first1, RandomAccessIterator last1, RandomAccessIterator first2, RandomAccessIterator last2, Predicate pred)
Definition: ParallelSTL.h:139
bool operator()(const RP &x, const RP &y)
Definition: ParallelSTL.h:241
void operator()(const InputIterator &v, UserContext< InputIterator > &ctx)
Definition: ParallelSTL.h:61
partition_helper_state(RandomAccessIterator f, RandomAccessIterator l, Predicate p)
Definition: ParallelSTL.h:171
std::pair< RandomAccessIterator, RandomAccessIterator > RP
Definition: ParallelSTL.h:160
Definition: ParallelSTL.h:159
unsigned int getActiveThreads() noexcept
Returns the number of threads in use.
Definition: Threads.cpp:37
Definition: ParallelSTL.h:96
RandomAccessIterator rfirst
Definition: ParallelSTL.h:163
Iterator choose_rand(Iterator first, Iterator last)
Definition: ParallelSTL.h:88
void operator()(std::pair< RandomAccessIterator, RandomAccessIterator > bounds, Context &ctx)
Definition: ParallelSTL.h:112
substrate::PerThreadStorage< ElementTy > AccumulatorTy
Definition: ParallelSTL.h:56
T * getLocal()
Definition: PerThreadStorage.h:128
void operator()(unsigned, unsigned)
Definition: ParallelSTL.h:208
substrate::SimpleLock Lock
Definition: ParallelSTL.h:164
size_t count_if(InputIterator first, InputIterator last, Predicate pred)
Definition: ParallelSTL.h:39
Compare comp
Definition: ParallelSTL.h:97
Compare comp
Definition: ParallelSTL.h:102
void unlock() const
Definition: SimpleLock.h:68
Definition: ParallelSTL.h:161
void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
Definition: ParallelSTL.h:247
void update(RP low, RP high)
Definition: ParallelSTL.h:190
Galois version of boost::optional.
Definition: optional.h:34
bool operator()(const value_type &a, const value_type &b) const
Definition: ParallelSTL.h:104
const Ty max(std::atomic< Ty > &a, const Ty &b)
Definition: AtomicHelpers.h:40
Definition: PerThreadStorage.h:88
Definition: ParallelSTL.h:239
auto make_reducible(const MergeFn &mergeFn, const IdFn &idFn)
make_reducible creates a Reducible from a merge function and identity function.
Definition: Reduction.h:125
RandomAccessIterator last
Definition: ParallelSTL.h:162
This is the object passed to the user's parallel loop.
Definition: UserContext.h:37
partition_helper_state * state
Definition: ParallelSTL.h:206
RandomAccessIterator rlast
Definition: ParallelSTL.h:163
void do_all(const RangeFunc &rangeMaker, FunctionTy &&fn, const Args &...args)
Standard do-all loop.
Definition: Loops.h:71
T map_reduce(InputIterator first, InputIterator last, MapFn map_fn, ReduceFn reduce_fn, const T &identity)
Definition: ParallelSTL.h:286
OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first)
Does a partial sum from first -> last and writes the results to the d_first iterator.
Definition: ParallelSTL.h:314
const Ty min(std::atomic< Ty > &a, const Ty &b)
Definition: AtomicHelpers.h:70
sort_helper(Compare c)
Definition: ParallelSTL.h:109
RandomAccessIterator partition(RandomAccessIterator first, RandomAccessIterator last, Predicate pred)
Definition: ParallelSTL.h:225
void on_each(FunctionTy &&fn, const Args &...args)
Low-level parallel loop.
Definition: Loops.h:86
NoDerefIterator< Iterator > make_no_deref_iterator(Iterator it)
Convenience function to create NoDerefIterator.
Definition: NoDerefIterator.h:48
void lock() const
Definition: SimpleLock.h:55
std::iterator_traits< RandomAccessIterator >::difference_type BlockSize()
Definition: ParallelSTL.h:167
T & reduce()
Returns the final reduction value.
Definition: Reduction.h:102
std::enable_if_t<!std::is_scalar< internal::Val_ty< I > >::value > destroy(I first, I last)
Definition: ParallelSTL.h:300
SimpleLock is a spinlock.
Definition: SimpleLock.h:36
void for_each(const RangeFunc &rangeMaker, FunctionTy &&fn, const Args &...args)
Galois unordered set iterator.
Definition: Loops.h:52
Definition: ParallelSTL.h:53
void breakLoop()
Signal break in parallel loop, current iteration continues untill natural termination.
Definition: UserContext.h:78
RandomAccessIterator first
Definition: ParallelSTL.h:162
RP takeHigh()
Definition: ParallelSTL.h:174
auto iterate(C &cont)
Definition: Range.h:323
InputIterator find_if(InputIterator first, InputIterator last, Predicate pred)
Definition: ParallelSTL.h:70
AccumulatorTy & accum
Definition: ParallelSTL.h:57
Predicate pred
Definition: ParallelSTL.h:165
find_if_helper(AccumulatorTy &a, Predicate &p)
Definition: ParallelSTL.h:60
Predicate & f
Definition: ParallelSTL.h:58
T value_type
Definition: Executor_ParaMeter.h:111
T accumulate(InputIterator first, InputIterator last, const T &identity, const BinaryOperation &binary_op)
Definition: ParallelSTL.h:268
partition_helper(partition_helper_state *s)
Definition: ParallelSTL.h:204
neq_to(Compare c)
Definition: ParallelSTL.h:103
RP takeLow()
Definition: ParallelSTL.h:182
Not equal in terms of less-than.
Definition: ParallelSTL.h:101
internal::ChunkMaster< T, ConExtLinkedQueue, true, false, ChunkSize, Concurrent > PerSocketChunkFIFO
Distributed chunked FIFO.
Definition: Chunk.h:296