Galois: QueuingLock.h Source File

00001 // MCS queuing spin lock -*- C++ -*-
00002 /*
00003 Galois, a framework to exploit amorphous data-parallelism in irregular
00004 programs.
00005 
00006 Copyright (C) 2011, The University of Texas at Austin. All rights reserved.
00007 UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS SOFTWARE
00008 AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR ANY
00009 PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY
00010 WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF TRADE.
00011 NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO THE USE OF THE
00012 SOFTWARE OR DOCUMENTATION. Under no circumstances shall University be liable
00013 for incidental, special, indirect, direct or consequential damages or loss of
00014 profits, interruption of business, or related expenses which may arise from use
00015 of Software or Documentation, including but not limited to those resulting from
00016 defects in Software and/or Documentation, or loss or inaccuracy of data of any
00017 kind.
00018 */
00019 
00020 #ifndef _QUEUING_LOCK_H
00021 #define _QUEUING_LOCK_H
00022 
00023 #include <cassert>
00024 
00025 namespace GaloisRuntime {
00026 
00027 template<bool isALock>
00028 class QueuingLock;
00029 template<>
00030 class QueuingLock<true> {
00031   PerCPU_ring<int> flags;
00032   int queuelast;
00033   int myplaceL;
00034 
00035 public:
00036   QueuingLock() : flags(0), queuelast(0) {
00037     flags.get(0) = 1;
00038     for (int i = 1; i < flags.size(); ++i) {
00039       flags.get(i) = 0;
00040     }
00041   }
00042   
00043   void lock()
00044   {
00045     int myplace = __sync_fetch_and_add(&queuelast, 1); // get ticket
00046     while (!flags.get(myplace % getSystemThreadPool().getActiveThreads())) {};
00047     //now in CS
00048     myplaceL = myplace;
00049   }
00050   void unlock() {
00051     flags.get(myplaceL % getSystemThreadPool().getActiveThreads()) = 0;
00052     flags.get((myplaceL + 1) % getSystemThreadPool().getActiveThreads()) = 1;
00053   }
00054 };
00055 
00056 // template<>
00057 // class QueuingLock<true> {
00058 //   //K42 MCS queuing spinlock
00059 
00060 //   struct Element {
00061 //     Element * nextThread;           // guy blocked on me
00062 //     union {
00063 //       uintptr_t waiter;                // 1 while waiting
00064 //       Element* tail;              // tail of blocked queue
00065 //     };
00066 //   };
00067 
00068 //   Element _lock;
00069 
00070 //   bool CompareAndStoreSynced(volatile uintptr_t* ptr,
00071 //                                 uintptr_t oldval,
00072 //                                 uintptr_t newval) {
00073 //     bool retval;
00074 //     //    __sync_synchronized();
00075 //     __asm__ __volatile__ ("# KillMemory" : : : "memory");
00076 //     retval = __sync_bool_compare_and_swap(ptr, oldval, newval);
00077 //     __asm__ __volatile__ ("# KillMemory" : : : "memory");
00078 //     //    __sync_synchronized();
00079 //     return retval;
00080 //   }
00081 
00082 //   template<typename T>
00083 //   T FetchAndNop(volatile T* ptr) {
00084 //     return *ptr;
00085 //   }
00086 
00087 // public:
00088 //   QueuingLock() {}
00089   
00090 //   void lock()
00091 //   {
00092 //     Element waiterel;
00093 //     Element* myel=&waiterel;
00094 //     Element* el;
00095     
00096 //     while (1) {
00097 //       el=_lock.tail;
00098 //       if (el==0) {
00099 //      //Lock not held
00100 //      if (CompareAndStoreSynced((uintptr_t *)(&_lock.tail),
00101 //                                0, (uintptr_t)(&_lock))) {
00102 //        // got the lock, return
00103 //        return;
00104 //      }
00105 //      //Try again, something changed
00106 //       } else {
00107 //      // lock is held
00108 //      // queue on lock by first making myself tail
00109 //      // and then pointing previous tail at me
00110         
00111 //      myel->nextThread = 0;
00112 //      myel->waiter = 1;
00113 //      if (CompareAndStoreSynced((uintptr_t *)(&_lock.tail),
00114 //                                (uintptr_t)el, (uintptr_t)(myel))) {
00115 //        // queued on the lock - now complete chain
00116 //        el->nextThread = myel;
00117 //        while (FetchAndNop(&(myel->waiter)) != 0) {
00118 //        }
00119 //        // at this point, I have the lock.  lock.tail
00120 //        // points to me if there are no other waiters
00121 //        // lock.nextThread is not in use
00122 //        _lock.nextThread = 0;
00123 //        // CompareAndStore "bets" that there are no
00124 //        // waiters.  If it succeeds, the lock is put into
00125 //        // the held, nowaiters state.
00126 //        if (!CompareAndStoreSynced((uintptr_t *)(&_lock.tail),
00127 //                                   (uintptr_t)(myel), (uintptr_t)(&_lock))) {
00128 //          // failed to convert lock back to held/no waiters
00129 //          // because there is another waiter
00130 //          // spin on my nextThread - it may not be updated yet
00131 //          while (FetchAndNop((uintptr_t*)&(myel->nextThread)) == 0) {
00132 //          }
00133 //          // record head of waiter list in lock, thus
00134 //          // eliminating further need for myel
00135 //          _lock.nextThread =
00136 //            (Element *) FetchAndNop((uintptr_t*)&(myel->nextThread));
00137 //        }
00138 //        // lock is held by me
00139 //        return;
00140 //      }
00141 //      //Try again, something changed
00142 //       }
00143 //     }
00144 //   }
00145 
00146 //   void unlock()
00147 //   {
00148 //     Element* el;
00149 //     // CompareAndStore betting there are no waiters
00150 //     // if it succeeds, the lock is placed back in the free state
00151 //     if (!CompareAndStoreSynced((uintptr_t*)(&_lock.tail), (uintptr_t)(&_lock), 0)) {
00152 //       // there is a waiter - but nextThread may not be updated yet
00153 //       while ((el=(Element*)FetchAndNop((uintptr_t*)&_lock.nextThread)) == 0) {
00154 //       }
00155 //       el->waiter = 0;
00156 //       // waiter is responsible for completeing the lock state transition
00157 //     }
00158 //   }
00159 // };
00160 
00161 // template<>
00162 // class QueuingLock<true> {
00163 
00164 //   struct qnode {
00165 //     qnode* next;
00166 //     bool locked;
00167 //     qnode() : next(0), locked(false) {}
00168 //   };
00169 
00170 //   PerCPU<qnode> _I;
00171 //   qnode* _lock;
00172 
00173 //   //No good and portable atomic xchg
00174 //   qnode* atomic_xchg(qnode** ptr, qnode* val) {
00175 //     do {
00176 //       qnode* oldval = *ptr;
00177 //       if (__sync_bool_compare_and_swap(ptr, oldval, val))
00178 //      return oldval;
00179 //     } while (true);
00180 //   }
00181 
00182 // public:
00183 //   QueuingLock() : _I(0), _lock(0) {}
00184 
00185 //   void lock() {
00186 //     qnode* I = &_I.get();
00187 //     I->next = 0;
00188 //     qnode* pred = atomic_xchg(&_lock, I);
00189 //     if (pred) { // queue was empty
00190 //       I->locked = true;
00191 //       pred->next = I;
00192 //       while (I->locked) { //spin
00193 // #if defined(__i386__) || defined(__amd64__)
00194 //      asm volatile ( "pause");
00195 // #endif
00196 //       };
00197 //     }
00198 //   }
00199 
00200 //   void unlock() {
00201 //     qnode* I = &_I.get();
00202 //     if (!I->next) { // no know successor
00203 //       if (__sync_bool_compare_and_swap(&_lock, I, 0))
00204 //      return;
00205 //       while (!I->next) { // spin
00206 // #if defined(__i386__) || defined(__amd64__)
00207 //      asm volatile ( "pause");
00208 // #endif
00209 //       }
00210 //     }
00211 //     I->next->locked = false;
00212 //   }
00213 // };
00214 
00215 template<>
00216 class QueuingLock<false> {
00217 public:
00218   void lock() const {}
00219   void unlock() const {}
00220 };
00221 
00222 
00223 }
00224 
00225 #endif