Galois
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PerThreadChunk.h
Go to the documentation of this file.
1 /*
2  * This file belongs to the Galois project, a C++ library for exploiting
3  * parallelism. The code is being released under the terms of the 3-Clause BSD
4  * License (a copy is located in LICENSE.txt at the top-level directory).
5  *
6  * Copyright (C) 2018, The University of Texas at Austin. All rights reserved.
7  * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS
8  * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY,
9  * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF
10  * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF
11  * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH
12  * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances
13  * shall University be liable for incidental, special, indirect, direct or
14  * consequential damages or loss of profits, interruption of business, or
15  * related expenses which may arise from use of Software or Documentation,
16  * including but not limited to those resulting from defects in Software and/or
17  * Documentation, or loss or inaccuracy of data of any kind.
18  */
19 
20 #ifndef GALOIS_WORKLIST_PERTHREADCHUNK_H
21 #define GALOIS_WORKLIST_PERTHREADCHUNK_H
22 
23 #include "galois/FixedSizeRing.h"
24 #include "galois/runtime/Mem.h"
28 #include "galois/Threads.h"
30 
31 namespace galois {
32 namespace worklists {
33 
34 struct ChunkHeader {
37 };
38 
41  ChunkHeader* tail;
42 
43  void prepend(ChunkHeader* C) {
44  // Find tail of stolen stuff
45  ChunkHeader* t = C;
46  while (t->next) {
47  t = t->next;
48  }
49  head.lock();
50  t->next = head.getValue();
51  if (!t->next)
52  tail = t;
53  head.unlock_and_set(C);
54  }
55 
56 public:
57  PerThreadChunkQueue() : tail(0) {}
58 
59  bool empty() const { return !tail; }
60 
61  void push(ChunkHeader* obj) {
62  head.lock();
63  obj->next = 0;
64  if (tail) {
65  tail->next = obj;
66  tail = obj;
67  head.unlock();
68  } else {
69  assert(!head.getValue());
70  tail = obj;
71  head.unlock_and_set(obj);
72  }
73  }
74 
76  // lock free Fast path empty case
77  if (empty())
78  return 0;
79 
80  head.lock();
81  ChunkHeader* h = head.getValue();
82  if (!h) {
83  head.unlock();
84  return 0;
85  }
86  if (tail == h) {
87  tail = 0;
88  assert(!h->next);
89  head.unlock_and_clear();
90  } else {
91  head.unlock_and_set(h->next);
92  h->next = 0;
93  }
94  return h;
95  }
96 
98  // Don't do work on empty victims (lockfree check)
99  if (victim.empty())
100  return 0;
101  // Steal everything
102  victim.head.lock();
103  ChunkHeader* C = victim.head.getValue();
104  if (C)
105  victim.tail = 0;
106  victim.head.unlock_and_clear();
107  if (!C)
108  return 0; // Didn't get anything
109  ChunkHeader* retval = C;
110  C = C->next;
111  retval->next = 0;
112  if (!C)
113  return retval; // Only got one thing
114  prepend(C);
115  return retval;
116  }
117 
119  // Don't do work on empty victims (lockfree check)
120  if (victim.empty())
121  return 0;
122  // Steal half
123  victim.head.lock();
124  ChunkHeader* C = victim.head.getValue();
125  ChunkHeader* ntail = C;
126  bool count = false;
127  while (C) {
128  C = C->next;
129  if (count)
130  ntail = ntail->next;
131  count = !count;
132  }
133  if (ntail) {
134  C = ntail->next;
135  ntail->next = 0;
136  victim.tail = ntail;
137  }
138  victim.head.unlock();
139  if (!C)
140  return 0; // Didn't get anything
141  ChunkHeader* retval = C;
142  C = C->next;
143  retval->next = 0;
144  if (!C)
145  return retval; // Only got one thing
146  prepend(C);
147  return retval;
148  }
149 };
150 
153 
154  void prepend(ChunkHeader* C) {
155  // Find tail of stolen stuff
156  ChunkHeader* tail = C;
157  while (tail->next) {
158  tail = tail->next;
159  }
160  head.lock();
161  tail->next = head.getValue();
162  head.unlock_and_set(C);
163  }
164 
165 public:
166  bool empty() const { return !head.getValue(); }
167 
168  void push(ChunkHeader* obj) {
169  ChunkHeader* oldhead = 0;
170  do {
171  oldhead = head.getValue();
172  obj->next = oldhead;
173  } while (!head.CAS(oldhead, obj));
174  }
175 
177  // lock free Fast empty path
178  if (empty())
179  return 0;
180 
181  // Disable CAS
182  head.lock();
183  ChunkHeader* retval = head.getValue();
184  ChunkHeader* setval = 0;
185  if (retval) {
186  setval = retval->next;
187  retval->next = 0;
188  }
189  head.unlock_and_set(setval);
190  return retval;
191  }
192 
194  // Don't do work on empty victims (lockfree check)
195  if (victim.empty())
196  return 0;
197  // Steal everything
198  victim.head.lock();
199  ChunkHeader* C = victim.head.getValue();
200  victim.head.unlock_and_clear();
201  if (!C)
202  return 0; // Didn't get anything
203  ChunkHeader* retval = C;
204  C = C->next;
205  retval->next = 0;
206  if (!C)
207  return retval; // Only got one thing
208  prepend(C);
209  return retval;
210  }
211 
213  // Don't do work on empty victims (lockfree check)
214  if (victim.empty())
215  return 0;
216  // Steal half
217  victim.head.lock();
218  ChunkHeader* C = victim.head.getValue();
219  ChunkHeader* ntail = C;
220  bool count = false;
221  while (C) {
222  C = C->next;
223  if (count)
224  ntail = ntail->next;
225  count = !count;
226  }
227  if (ntail) {
228  C = ntail->next;
229  ntail->next = 0;
230  }
231  victim.head.unlock();
232  if (!C)
233  return 0; // Didn't get anything
234  ChunkHeader* retval = C;
235  C = C->next;
236  retval->next = 0;
237  if (!C)
238  return retval; // Only got one thing
239  prepend(C);
240  return retval;
241  }
242 };
243 
244 template <typename InnerWL>
245 class StealingQueue : private boost::noncopyable {
247 
249  ChunkHeader* doSteal() {
250  std::pair<InnerWL, unsigned>& me = *local.getLocal();
251  auto& tp = substrate::getThreadPool();
252  unsigned id = tp.getTID();
253  unsigned pkg = substrate::ThreadPool::getSocket();
254  unsigned num = galois::getActiveThreads();
255 
256  // First steal from this socket
257  for (unsigned eid = id + 1; eid < num; ++eid) {
258  if (tp.getSocket(eid) == pkg) {
259  ChunkHeader* c = me.first.stealHalfAndPop(local.getRemote(eid)->first);
260  if (c)
261  return c;
262  }
263  }
264  for (unsigned eid = 0; eid < id; ++eid) {
265  if (tp.getSocket(eid) == pkg) {
266  ChunkHeader* c = me.first.stealHalfAndPop(local.getRemote(eid)->first);
267  if (c)
268  return c;
269  }
270  }
271 
272  // Leaders can cross socket
274  unsigned eid = (id + me.second) % num;
275  ++me.second;
276  if (id != eid && tp.isLeader(eid)) {
277  ChunkHeader* c = me.first.stealAllAndPop(local.getRemote(eid)->first);
278  if (c)
279  return c;
280  }
281  }
282  return 0;
283  }
284 
285 public:
286  void push(ChunkHeader* c) { local.getLocal()->first.push(c); }
287 
289  if (ChunkHeader* c = local.getLocal()->first.pop())
290  return c;
291  return doSteal();
292  }
293 };
294 
295 template <bool IsLocallyLIFO, int ChunkSize, typename Container, typename T>
296 struct PerThreadChunkMaster : private boost::noncopyable {
297  template <typename _T>
299 
300  template <bool _concurrent>
302 
303  template <int _chunk_size>
304  using with_chunk_size =
306 
307 private:
308  class Chunk : public ChunkHeader,
309  public galois::FixedSizeRing<T, ChunkSize> {};
310 
313  Container worklist;
314 
315  Chunk* mkChunk() {
316  Chunk* ptr = alloc.allocate(1);
317  alloc.construct(ptr);
318  return ptr;
319  }
320 
321  void delChunk(Chunk* ptr) {
322  alloc.destroy(ptr);
323  alloc.deallocate(ptr, 1);
324  }
325 
326  void swapInPush(std::pair<Chunk*, Chunk*>& d) {
327  if (!IsLocallyLIFO)
328  std::swap(d.first, d.second);
329  }
330 
331  Chunk*& getPushChunk(std::pair<Chunk*, Chunk*>& d) {
332  if (!IsLocallyLIFO)
333  return d.second;
334  else
335  return d.first;
336  }
337 
338  Chunk*& getPopChunk(std::pair<Chunk*, Chunk*>& d) { return d.first; }
339 
340  bool doPush(Chunk* c, const T& val) { return c->push_back(val); }
341 
342  galois::optional<T> doPop(Chunk* c) {
343  if (!IsLocallyLIFO)
344  return c->extract_front();
345  else
346  return c->extract_back();
347  }
348 
349  void push_internal(std::pair<Chunk*, Chunk*>&, Chunk*& n, const T& val) {
350  // Simple case, space in current chunk
351  if (n && doPush(n, val))
352  return;
353  // full chunk, push
354  if (n)
355  worklist.push(static_cast<ChunkHeader*>(n));
356  // get empty chunk;
357  n = mkChunk();
358  // There better be some room in the new chunk
359  doPush(n, val);
360  }
361 
362 public:
363  typedef T value_type;
364 
366 
367  void push(value_type val) {
368  std::pair<Chunk*, Chunk*>& tld = *data.getLocal();
369  Chunk*& n = getPushChunk(tld);
370  push_internal(tld, n, val);
371  }
372 
373  template <typename Iter>
374  void push(Iter b, Iter e) {
375  std::pair<Chunk*, Chunk*>& tld = *data.getLocal();
376  Chunk*& n = getPushChunk(tld);
377  while (b != e)
378  push_internal(tld, n, *b++);
379  }
380 
381  template <typename RangeTy>
382  void push_initial(const RangeTy& range) {
383  auto rp = range.local_pair();
384  push(rp.first, rp.second);
385  }
386 
388  std::pair<Chunk*, Chunk*>& tld = *data.getLocal();
389  Chunk*& n = getPopChunk(tld);
391  // simple case, things in current chunk
392  if (n && (retval = doPop(n)))
393  return retval;
394  // empty chunk, trash it
395  if (n)
396  delChunk(n);
397  // get a new chunk
398  n = static_cast<Chunk*>(worklist.pop());
399  if (n && (retval = doPop(n)))
400  return retval;
401  // try stealing the push buffer if we can
402  swapInPush(tld);
403  if (n)
404  retval = doPop(n);
405  return retval;
406  }
407 };
408 
409 template <int ChunkSize = 64, typename T = int>
410 using PerThreadChunkLIFO =
411  PerThreadChunkMaster<true, ChunkSize, StealingQueue<PerThreadChunkStack>,
412  T>;
414 
415 template <int ChunkSize = 64, typename T = int>
416 using PerThreadChunkFIFO =
418  T>;
420 
421 } // namespace worklists
422 } // namespace galois
423 #endif
void push_initial(const RangeTy &range)
Definition: PerThreadChunk.h:382
PerThreadChunkMaster< true, ChunkSize, StealingQueue< PerThreadChunkStack >, T > PerThreadChunkLIFO
Definition: PerThreadChunk.h:412
PtrLock is a spinlock and a pointer.
Definition: PtrLock.h:42
ThreadPool & getThreadPool(void)
return a reference to system thread pool
Definition: ThreadPool.cpp:259
unsigned int getActiveThreads() noexcept
Returns the number of threads in use.
Definition: Threads.cpp:37
ChunkHeader * prev
Definition: PerThreadChunk.h:36
T * getLocal()
Definition: PerThreadStorage.h:128
#define GALOIS_ATTRIBUTE_NOINLINE
Definition: CompilerSpecific.h:46
void push(Iter b, Iter e)
Definition: PerThreadChunk.h:374
void push(ChunkHeader *obj)
Definition: PerThreadChunk.h:168
bool empty() const
Definition: PerThreadChunk.h:59
ChunkHeader * stealHalfAndPop(PerThreadChunkStack &victim)
Definition: PerThreadChunk.h:212
Definition: PerThreadChunk.h:34
ChunkHeader * next
Definition: PerThreadChunk.h:35
void construct(U *p, Args &&...args) const
Definition: runtime/Mem.h:766
Galois version of boost::optional.
Definition: optional.h:34
bool empty() const
Definition: PerThreadChunk.h:166
Ordered collection of bounded size.
Definition: FixedSizeRing.h:207
pointer allocate(size_type size)
Definition: runtime/Mem.h:754
Definition: PerThreadStorage.h:88
ChunkHeader * pop()
Definition: PerThreadChunk.h:288
Definition: PerThreadChunk.h:245
T value_type
Definition: PerThreadChunk.h:363
void destroy(pointer ptr) const
Definition: runtime/Mem.h:770
#define GALOIS_WLCOMPILECHECK(name)
Definition: WLCompileCheck.h:26
static unsigned getSocket()
Definition: ThreadPool.h:207
static bool isLeader()
Definition: ThreadPool.h:205
ChunkHeader * stealAllAndPop(PerThreadChunkStack &victim)
Definition: PerThreadChunk.h:193
Definition: PerThreadChunk.h:151
void push(ChunkHeader *c)
Definition: PerThreadChunk.h:286
void deallocate(pointer ptr, size_type GALOIS_USED_ONLY_IN_DEBUG(len))
Definition: runtime/Mem.h:760
void push(ChunkHeader *obj)
Definition: PerThreadChunk.h:61
PerThreadChunkQueue()
Definition: PerThreadChunk.h:57
PerThreadChunkMaster()
Definition: PerThreadChunk.h:365
Definition: PerThreadChunk.h:39
ChunkHeader * pop()
Definition: PerThreadChunk.h:176
galois::optional< value_type > pop()
Definition: PerThreadChunk.h:387
T * getRemote(unsigned int thread)
Definition: PerThreadStorage.h:149
ChunkHeader * pop()
Definition: PerThreadChunk.h:75
ChunkHeader * stealHalfAndPop(PerThreadChunkQueue &victim)
Definition: PerThreadChunk.h:118
void push(value_type val)
Definition: PerThreadChunk.h:367
ChunkHeader * stealAllAndPop(PerThreadChunkQueue &victim)
Definition: PerThreadChunk.h:97
Definition: PerThreadChunk.h:296