00001
00024 #ifndef GALOIS_RUNTIME_PERTHREADSTORAGE_H
00025 #define GALOIS_RUNTIME_PERTHREADSTORAGE_H
00026
00027 #include "Galois/config.h"
00028 #include "Galois/Runtime/ll/TID.h"
00029 #include "Galois/Runtime/ll/HWTopo.h"
00030 #include "Galois/Runtime/ThreadPool.h"
00031 #include "Galois/Runtime/ActiveThreads.h"
00032
00033 #include <boost/utility.hpp>
00034
00035 #include <cassert>
00036 #include <vector>
00037
00038 #include GALOIS_CXX11_STD_HEADER(utility)
00039
00040 namespace Galois {
00041 namespace Runtime {
00042
00043 class PerBackend {
00044 static const unsigned MAX_SIZE = 30;
00045 static const unsigned MIN_SIZE = 3;
00046
00047 unsigned int nextLoc;
00048 std::vector<char*> heads;
00049 std::vector<std::vector<unsigned> > freeOffsets;
00050
00051 void initCommon();
00052
00053 static unsigned nextLog2(unsigned size);
00054
00055 public:
00056 PerBackend(): nextLoc(0) {
00057 freeOffsets.resize(MAX_SIZE);
00058 }
00059
00060 char* initPerThread();
00061 char* initPerPackage();
00062
00063 #ifdef GALOIS_USE_EXP
00064 char* initPerThread_cilk ();
00065 char* initPerPackage_cilk ();
00066 #endif // GALOIS_USE_EXP
00067
00068 unsigned allocOffset(const unsigned size);
00069 void deallocOffset(const unsigned offset, const unsigned size);
00070 void* getRemote(unsigned thread, unsigned offset);
00071 void* getLocal(unsigned offset, char* base) {
00072 return &base[offset];
00073 }
00074
00075
00076 void* getLocal(unsigned offset, unsigned id) {
00077 return &heads[id][offset];
00078 }
00079 };
00080
00081 extern __thread char* ptsBase;
00082 PerBackend& getPTSBackend();
00083
00084 extern __thread char* ppsBase;
00085 PerBackend& getPPSBackend();
00086
00087 void initPTS();
00088
00089 #ifdef GALOIS_USE_EXP
00090 void initPTS_cilk ();
00091 #endif // GALOIS_USE_EXP
00092
00093
00094 template<typename T>
00095 class PerThreadStorage: private boost::noncopyable {
00096 protected:
00097 unsigned offset;
00098 PerBackend& b;
00099
00100 public:
00101 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1310
00102
00103 PerThreadStorage(): b(getPTSBackend()) {
00104
00105
00106 Galois::Runtime::getSystemThreadPool();
00107
00108 offset = b.allocOffset(sizeof(T));
00109 for (unsigned n = 0; n < LL::getMaxThreads(); ++n)
00110 new (b.getRemote(n, offset)) T();
00111 }
00112 #endif
00113
00114 template<typename... Args>
00115 PerThreadStorage(Args&&... args) :b(getPTSBackend()) {
00116
00117
00118 Galois::Runtime::getSystemThreadPool();
00119
00120 offset = b.allocOffset(sizeof(T));
00121 for (unsigned n = 0; n < LL::getMaxThreads(); ++n)
00122 new (b.getRemote(n, offset)) T(std::forward<Args>(args)...);
00123 }
00124
00125 ~PerThreadStorage() {
00126 for (unsigned n = 0; n < LL::getMaxThreads(); ++n)
00127 reinterpret_cast<T*>(b.getRemote(n, offset))->~T();
00128 b.deallocOffset(offset, sizeof(T));
00129 }
00130
00131 T* getLocal() const {
00132 void* ditem = b.getLocal(offset, ptsBase);
00133 return reinterpret_cast<T*>(ditem);
00134 }
00135
00137 T* getLocal(unsigned int thread) const {
00138 void* ditem = b.getLocal(offset, thread);
00139 return reinterpret_cast<T*>(ditem);
00140 }
00141
00142 T* getRemote(unsigned int thread) const {
00143 void* ditem = b.getRemote(thread, offset);
00144 return reinterpret_cast<T*>(ditem);
00145 }
00146
00147 unsigned size() const {
00148 return LL::getMaxThreads();
00149 }
00150 };
00151
00152 template<typename T>
00153 class PerPackageStorage: private boost::noncopyable {
00154 protected:
00155 unsigned offset;
00156 PerBackend& b;
00157
00158 public:
00159 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1310
00160
00161 PerPackageStorage(): b(getPPSBackend()) {
00162
00163
00164 Galois::Runtime::getSystemThreadPool();
00165
00166 offset = b.allocOffset(sizeof(T));
00167 for (unsigned n = 0; n < LL::getMaxPackages(); ++n)
00168 new (b.getRemote(LL::getLeaderForPackage(n), offset)) T();
00169 }
00170 #endif
00171
00172 template<typename... Args>
00173 PerPackageStorage(Args&&... args) :b(getPPSBackend()) {
00174
00175
00176 Galois::Runtime::getSystemThreadPool();
00177
00178 offset = b.allocOffset(sizeof(T));
00179 for (unsigned n = 0; n < LL::getMaxPackages(); ++n)
00180 new (b.getRemote(LL::getLeaderForPackage(n), offset)) T(std::forward<Args>(args)...);
00181 }
00182
00183 ~PerPackageStorage() {
00184 for (unsigned n = 0; n < LL::getMaxPackages(); ++n)
00185 reinterpret_cast<T*>(b.getRemote(LL::getLeaderForPackage(n), offset))->~T();
00186 b.deallocOffset(offset, sizeof(T));
00187 }
00188
00189 T* getLocal() const {
00190 void* ditem = b.getLocal(offset, ppsBase);
00191 return reinterpret_cast<T*>(ditem);
00192 }
00193
00195 T* getLocal(unsigned int thread) const {
00196 void* ditem = b.getLocal(offset, thread);
00197 return reinterpret_cast<T*>(ditem);
00198 }
00199
00200 T* getRemote(unsigned int thread) const {
00201 void* ditem = b.getRemote(thread, offset);
00202 return reinterpret_cast<T*>(ditem);
00203 }
00204
00205 T* getRemoteByPkg(unsigned int pkg) const {
00206 void* ditem = b.getRemote(LL::getLeaderForPackage(pkg), offset);
00207 return reinterpret_cast<T*>(ditem);
00208 }
00209
00210 unsigned size() const {
00211 return LL::getMaxThreads();
00212 }
00213 };
00214
00215 }
00216 }
00217
00218 #endif