00001 // MCS queuing spin lock -*- C++ -*- 00002 /* 00003 Galois, a framework to exploit amorphous data-parallelism in irregular 00004 programs. 00005 00006 Copyright (C) 2011, The University of Texas at Austin. All rights reserved. 00007 UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS SOFTWARE 00008 AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR ANY 00009 PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY 00010 WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF TRADE. 00011 NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO THE USE OF THE 00012 SOFTWARE OR DOCUMENTATION. Under no circumstances shall University be liable 00013 for incidental, special, indirect, direct or consequential damages or loss of 00014 profits, interruption of business, or related expenses which may arise from use 00015 of Software or Documentation, including but not limited to those resulting from 00016 defects in Software and/or Documentation, or loss or inaccuracy of data of any 00017 kind. 00018 */ 00019 00020 #ifndef _QUEUING_LOCK_H 00021 #define _QUEUING_LOCK_H 00022 00023 #include <cassert> 00024 00025 namespace GaloisRuntime { 00026 00027 template<bool isALock> 00028 class QueuingLock; 00029 template<> 00030 class QueuingLock<true> { 00031 PerCPU_ring<int> flags; 00032 int queuelast; 00033 int myplaceL; 00034 00035 public: 00036 QueuingLock() : flags(0), queuelast(0) { 00037 flags.get(0) = 1; 00038 for (int i = 1; i < flags.size(); ++i) { 00039 flags.get(i) = 0; 00040 } 00041 } 00042 00043 void lock() 00044 { 00045 int myplace = __sync_fetch_and_add(&queuelast, 1); // get ticket 00046 while (!flags.get(myplace % getSystemThreadPool().getActiveThreads())) {}; 00047 //now in CS 00048 myplaceL = myplace; 00049 } 00050 void unlock() { 00051 flags.get(myplaceL % getSystemThreadPool().getActiveThreads()) = 0; 00052 flags.get((myplaceL + 1) % getSystemThreadPool().getActiveThreads()) = 1; 00053 } 00054 }; 00055 00056 // template<> 00057 // class QueuingLock<true> { 00058 // //K42 MCS queuing spinlock 00059 00060 // struct Element { 00061 // Element * nextThread; // guy blocked on me 00062 // union { 00063 // uintptr_t waiter; // 1 while waiting 00064 // Element* tail; // tail of blocked queue 00065 // }; 00066 // }; 00067 00068 // Element _lock; 00069 00070 // bool CompareAndStoreSynced(volatile uintptr_t* ptr, 00071 // uintptr_t oldval, 00072 // uintptr_t newval) { 00073 // bool retval; 00074 // // __sync_synchronized(); 00075 // __asm__ __volatile__ ("# KillMemory" : : : "memory"); 00076 // retval = __sync_bool_compare_and_swap(ptr, oldval, newval); 00077 // __asm__ __volatile__ ("# KillMemory" : : : "memory"); 00078 // // __sync_synchronized(); 00079 // return retval; 00080 // } 00081 00082 // template<typename T> 00083 // T FetchAndNop(volatile T* ptr) { 00084 // return *ptr; 00085 // } 00086 00087 // public: 00088 // QueuingLock() {} 00089 00090 // void lock() 00091 // { 00092 // Element waiterel; 00093 // Element* myel=&waiterel; 00094 // Element* el; 00095 00096 // while (1) { 00097 // el=_lock.tail; 00098 // if (el==0) { 00099 // //Lock not held 00100 // if (CompareAndStoreSynced((uintptr_t *)(&_lock.tail), 00101 // 0, (uintptr_t)(&_lock))) { 00102 // // got the lock, return 00103 // return; 00104 // } 00105 // //Try again, something changed 00106 // } else { 00107 // // lock is held 00108 // // queue on lock by first making myself tail 00109 // // and then pointing previous tail at me 00110 00111 // myel->nextThread = 0; 00112 // myel->waiter = 1; 00113 // if (CompareAndStoreSynced((uintptr_t *)(&_lock.tail), 00114 // (uintptr_t)el, (uintptr_t)(myel))) { 00115 // // queued on the lock - now complete chain 00116 // el->nextThread = myel; 00117 // while (FetchAndNop(&(myel->waiter)) != 0) { 00118 // } 00119 // // at this point, I have the lock. lock.tail 00120 // // points to me if there are no other waiters 00121 // // lock.nextThread is not in use 00122 // _lock.nextThread = 0; 00123 // // CompareAndStore "bets" that there are no 00124 // // waiters. If it succeeds, the lock is put into 00125 // // the held, nowaiters state. 00126 // if (!CompareAndStoreSynced((uintptr_t *)(&_lock.tail), 00127 // (uintptr_t)(myel), (uintptr_t)(&_lock))) { 00128 // // failed to convert lock back to held/no waiters 00129 // // because there is another waiter 00130 // // spin on my nextThread - it may not be updated yet 00131 // while (FetchAndNop((uintptr_t*)&(myel->nextThread)) == 0) { 00132 // } 00133 // // record head of waiter list in lock, thus 00134 // // eliminating further need for myel 00135 // _lock.nextThread = 00136 // (Element *) FetchAndNop((uintptr_t*)&(myel->nextThread)); 00137 // } 00138 // // lock is held by me 00139 // return; 00140 // } 00141 // //Try again, something changed 00142 // } 00143 // } 00144 // } 00145 00146 // void unlock() 00147 // { 00148 // Element* el; 00149 // // CompareAndStore betting there are no waiters 00150 // // if it succeeds, the lock is placed back in the free state 00151 // if (!CompareAndStoreSynced((uintptr_t*)(&_lock.tail), (uintptr_t)(&_lock), 0)) { 00152 // // there is a waiter - but nextThread may not be updated yet 00153 // while ((el=(Element*)FetchAndNop((uintptr_t*)&_lock.nextThread)) == 0) { 00154 // } 00155 // el->waiter = 0; 00156 // // waiter is responsible for completeing the lock state transition 00157 // } 00158 // } 00159 // }; 00160 00161 // template<> 00162 // class QueuingLock<true> { 00163 00164 // struct qnode { 00165 // qnode* next; 00166 // bool locked; 00167 // qnode() : next(0), locked(false) {} 00168 // }; 00169 00170 // PerCPU<qnode> _I; 00171 // qnode* _lock; 00172 00173 // //No good and portable atomic xchg 00174 // qnode* atomic_xchg(qnode** ptr, qnode* val) { 00175 // do { 00176 // qnode* oldval = *ptr; 00177 // if (__sync_bool_compare_and_swap(ptr, oldval, val)) 00178 // return oldval; 00179 // } while (true); 00180 // } 00181 00182 // public: 00183 // QueuingLock() : _I(0), _lock(0) {} 00184 00185 // void lock() { 00186 // qnode* I = &_I.get(); 00187 // I->next = 0; 00188 // qnode* pred = atomic_xchg(&_lock, I); 00189 // if (pred) { // queue was empty 00190 // I->locked = true; 00191 // pred->next = I; 00192 // while (I->locked) { //spin 00193 // #if defined(__i386__) || defined(__amd64__) 00194 // asm volatile ( "pause"); 00195 // #endif 00196 // }; 00197 // } 00198 // } 00199 00200 // void unlock() { 00201 // qnode* I = &_I.get(); 00202 // if (!I->next) { // no know successor 00203 // if (__sync_bool_compare_and_swap(&_lock, I, 0)) 00204 // return; 00205 // while (!I->next) { // spin 00206 // #if defined(__i386__) || defined(__amd64__) 00207 // asm volatile ( "pause"); 00208 // #endif 00209 // } 00210 // } 00211 // I->next->locked = false; 00212 // } 00213 // }; 00214 00215 template<> 00216 class QueuingLock<false> { 00217 public: 00218 void lock() const {} 00219 void unlock() const {} 00220 }; 00221 00222 00223 } 00224 00225 #endif