Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
mutex.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// mutexes, condition variables, etc.
19
20#ifndef REALM_MUTEX_H
21#define REALM_MUTEX_H
22
23#include "realm/realm_config.h"
24
25#include "realm/utils.h"
26#include "realm/atomics.h"
27
28#include <stdint.h>
29
30// if enabled, we count how many times a doorbell is passed over and
31// print warnings if it seems like a lot - this has known false positives
32// (e.g. an UnfairCondVar used to wake up any one of a pool of sleeping
33// threads or an UnfairMutex under heavy contention), so do not enable
34// by default
35// TODO: control with command-line switch?
36// define REALM_ENABLE_STARVATION_CHECKS
37
38namespace Realm {
39
40 // Realm mutexes come in a few different flavors:
41 // a) UnfairMutex - provides mutual exclusion, using doorbells to pass the
42 // mutex on to a waiting thread on release, favoring the most recent
43 // waiters who are still spinning in order to avoid kernel-space
44 // operations as much as possible. This is wildly unfair, but if your
45 // need is just to protect a short critical section with minimal
46 // overhead, this should provide the best throughput
47 // b) FIFOMutex - also uses doorbells, but favors the oldest waiter, even if
48 // they've gone to sleep (and therefore need kernel assistance to wake),
49 // in order to preserve fairness
50 // c) KernelMutex - directly uses the system library/kernel mutex support
51 // (e.g. pthread_mutex), and is at the mercy of whatever scheduling
52 // algorithms those use (which are almost certainly not aware of which
53 // Realm threads have dedicated cores) - use only as a last resort
54
55 class UnfairMutex;
56 class FIFOMutex;
57 class KernelMutex;
58
59 // each has a matching CondVar - we don't set a default, use
60 // (whatever)Mutex::CondVar to get the right one
61 class UnfairCondVar;
62 class FIFOCondVar;
63 class KernelCondVar;
64
65#ifdef REALM_DEFAULT_MUTEX
66 typedef REALM_DEFAULT_MUTEX Mutex;
67#else
69#endif
70
71 // a mutual exclusion checker does not guarantee mutual exclusion, but
72 // instead tests dynamically whether mutual exclusion has been achieved
73 // through other means
74 // unlike real mutexes where "lock" has memory acquire semantics and
75 // "unlock" has memory release semantics, the checker tries to impose
76 // as little memory ordering as possible
77 // finally, although the MutexChecker is compatible with the templated
78 // AutoLock, it's encouraged to use 'MutexChecker::CheckedScope' for
79 // the better diagnostic info
81 public:
82 MutexChecker(const char *_name, void *_object = 0, int _limit = 1);
84
85 // no associated CondVar
86
88 public:
89 CheckedScope(MutexChecker &_checker, const char *_name, void *_object = 0);
91
92 protected:
93 friend class MutexChecker;
94
96 const char *name;
97 void *object;
98 };
99
100 void lock(CheckedScope *cs = 0);
101 bool trylock(CheckedScope *cs = 0);
102 void unlock(CheckedScope *cs = 0);
103
104 protected:
105 void lock_fail(int actval, CheckedScope *cs);
106 void unlock_fail(int actval, CheckedScope *cs);
107
108 const char *name;
109 void *object;
110 int limit;
112 };
113
114 // a doorbell is used to notify a thread that whatever condition it has been
115 // waiting for has been satisfied - all operations are lock-free in user space,
116 // but wait and notify may cross into kernel space if the doorbell owner goes
117 // (or tries to go) to sleep
118 // the notification process allows 31 bits worth of information to be
119 // delivered as well
121 protected:
122 // not constructed directly - use get_thread_doorbell
125
126 public:
127 // gets the doorbell for the current thread (you can't lookup anybody else's)
129
130 // each doorbell can be separately configured for how long it should spin
131 // before going to sleep
132 static const long long DOORBELL_SLEEP_IMMEDIATE = 0;
133 static const long long DOORBELL_SLEEP_NEVER = -1;
134 static const long long DOORBELL_SLEEP_DEFAULT = 10000; // 10 us
135
136 void set_sleep_timeout(long long timeout_in_ns);
137
138 // waiter interface
139 void prepare();
140 void cancel();
141 bool satisfied(); // indicates satisfaction - wait must be called but is guaranteed to
142 // be immediate
143 uint32_t wait();
144
145 // waker interface
146 bool is_sleeping() const;
147 void notify(uint32_t data);
148 // prewake does not ring the doorbell, but it does pre-wake the waiter
149 // if needed so that some latency of kernel scheduling can be hidden
150 void prewake();
152
153 protected:
154 // "slow" versions of the above when the fast case isn't met
155 uint32_t wait_slow();
158
159 static const uint32_t STATE_IDLE = 0;
160 static const uint32_t STATE_PENDING_AWAKE = 2;
161 static const uint32_t STATE_PENDING_ASLEEP = 4;
162 static const uint32_t STATE_PENDING_PREWAKE = 6;
163 static const uint32_t STATE_SATISFIED_BIT = 1; // LSB
164 static const unsigned STATE_SATISFIED_VAL_SHIFT = 1;
166
167 long long sleep_timeout;
169
170 // useful for debugging
171 uintptr_t owner_tid;
172#ifdef REALM_ENABLE_STARVATION_CHECKS
173 // lower bound on number of times this doorbell has been passed over
174 int starvation_count;
175
176 static atomic<int> starvation_limit;
177 void increase_starvation_count(int to_add, void *db_list);
178#endif
179
180 friend class DoorbellList;
182 };
183
184 // a list of doorbells that are waiting to be run - like an up/down
185 // semaphore, a doorbell list can go "negative" - a pop that preceeds
186 // a push will immediately satisfy the push rather than delaying the popper
188 public:
191
192 // returns true if the doorbell is added, false if it consumed an available
193 // "doorbell to be named later" token
194 // adding doorbells is always lock-free and can be called concurrently
196
197 // extracts the a doorbell from the list and returns it, or increments
198 // the "extra notifies" count (if 'allow_extra') and returns nullptr
199 // this is lock-free w.r.t. adders, but only one thread may be performing
200 // extraction/notification at a time
201 // comes in two flavors, so choose wisely:
202 // a) oldest-first - perhaps more "fair" but more expensive
203 // b) newest-first - easier, and maybe better for cache locality?
204 Doorbell *extract_oldest(bool prefer_spinning, bool allow_extra);
205 Doorbell *extract_newest(bool prefer_spinning, bool allow_extra);
206
207 // helper routines to extract and notify exactly 'count' doorbells -
208 // TODO: make slightly cheaper than repeated calls to extract
209 void notify_oldest(unsigned count, bool prefer_spinning);
210 void notify_newest(unsigned count, bool prefer_spinning);
211
212 protected:
213 // this field is either:
214 // a) the first Doorbell in the waiting stack, or
215 // b) 2*extra_notifies - 1, if notifies are waiting for matching doorbells
217
218#ifdef DEBUG_REALM
219 MutexChecker mutex_check;
220#endif
221 };
222
224 public:
226
228
229 // these are all inlined and very fast in the non-contended case
230 void lock();
231 bool trylock();
232 void unlock();
233
234 protected:
235 // slower lock/unlock paths when contention exists
236 void lock_slow();
238
239 friend class UnfairCondVar;
240
241 // internal state consists of an LSB indicating a lock and the rest of
242 // the bits indicating how many waiters exist - the actual waiters are
243 // (or will eventually be) in the doorbell list
246 };
247
249 public:
251
253
254 // these are all inlined and very fast in the non-contended case
255 void lock();
256 bool trylock();
257 void unlock();
258
259 protected:
260 // slower lock/unlock paths when contention exists
261 void lock_slow();
263
264 friend class FIFOCondVar;
265
266 // internal state consists of an LSB indicating a lock and the rest of
267 // the bits indicating how many waiters exist - the actual waiters are
268 // (or will eventually be) in the doorbell list
271 };
272
274 public:
277
279
280 // these call into libpthread (or the equivalent), so YMMV
281 void lock(void);
282 bool trylock(void);
283 void unlock(void);
284
285 protected:
286 friend class KernelCondVar;
287
288 // the actual implementation of the mutex is hidden
289 // but if we don't define it here, we run afoul of
290 // rules type-punning, so use macros to let mutex.cc's inclusion
291 // of this file behave a little differently
292 alignas(8) uint64_t placeholder[8]; // 64 bytes, at least 8 byte aligned
293 };
294
295 // a delegating mutex allows lock-free mutual exclusion between threads that
296 // want to work on a shared resource - a lock attempt either succeeds
297 // immediately or delegates the intended work "units" to some other thread
298 // that already holds the mutex
300 public:
302
303 // attempts to enter the mutual exclusion zone in order to do 'work_units'
304 // units of work - a non-zero return indicates success with a potentially-
305 // different number of units of work that need to be performed, while a
306 // return of zero indicates contention but a guarantee that some other
307 // thread will perform the work instead
308 // a small amount of temporary state must be preserved by the caller across
309 // the call to attempt_enter and any/all calls to attempt_exit (this is
310 // not stored in the object to avoid false sharing)
311 uint64_t attempt_enter(uint64_t work_units, uint64_t &tstate);
312
313 // attempts to exit the mutual exclusion zone once all known work has been
314 // performed - a zero return indicates success, while a non-zero return
315 // indicates the caller is still in the mutex and has been given more work
316 // to do
317 uint64_t attempt_exit(uint64_t &tstate);
318
319 protected:
320 // LSB indicates some thread is in the mutex, while the rest of the bits
321 // accumulate delegated work
323 };
324
326 public:
328
330
331 // these require that you hold the underlying mutex when you call
332 void signal();
333 void broadcast();
334 void wait();
335
336 protected:
337 // no need for atomics here - we're protected by the mutex
338 unsigned num_waiters;
340 };
341
343 public:
345
347
348 // these require that you hold the underlying mutex when you call
349 void signal();
350 void broadcast();
351 void wait();
352
353 protected:
354 // no need for atomics here - we're protected by the mutex
355 unsigned num_waiters;
357 };
358
360 public:
363
365
366 // these require that you hold the underlying mutex when you call
367 void signal(void);
368 void broadcast(void);
369 void wait(void);
370
371 // wait with a timeout - returns true if awakened by a signal and
372 // false if the timeout expires first
373 bool timedwait(long long max_nsec);
374
375 protected:
376 // the actual implementation of the condition variable is hidden
377 // but if we don't define it here, we run afoul of
378 // rules type-punning, so use macros to let mutex.cc's inclusion
379 // of this file behave a little differently
380 alignas(8) uint64_t placeholder[8]; // 64 bytes, at least 8 byte aligned
381 };
382
383 template <typename LT = Mutex>
384 class AutoLock {
385 public:
386 AutoLock(LT &_mutex);
388
389 void release();
390 void reacquire();
391
392 protected:
393 LT &mutex;
394 bool held;
395 };
396
397 // a reader/writer lock allows one or more readers OR one writer at a time
398 class RWLock {
399 public:
402
403 // should never be copied (or moved)
404 RWLock(const RWLock &) = delete;
405 RWLock(RWLock &&) = delete;
406 RWLock &operator=(const RWLock &) = delete;
407 RWLock &operator=(RWLock &&) = delete;
408
409 void wrlock();
410 bool trywrlock();
411 void rdlock();
412 bool tryrdlock();
413 void unlock();
414
415 // to allow use with AutoLock<T>, the RWLock provides "aspects" that
416 // look like normal locks
417 struct Writer {
418 Writer(RWLock &_rwlock)
419 : rwlock(_rwlock)
420 {}
421 void lock() { rwlock.wrlock(); }
423 void unlock() { rwlock.unlock(); }
424
425 protected:
427 };
428
429 struct Reader {
430 Reader(RWLock &_rwlock)
431 : rwlock(_rwlock)
432 {}
433 void lock() { rwlock.rdlock(); }
435 void unlock() { rwlock.unlock(); }
436
437 protected:
439 };
440
443
444 // allow free coercion to these aspects
445 operator Writer &() { return writer; }
446 operator Reader &() { return reader; }
447
448 protected:
451 // the actual implementation of the rwlock is hidden
452 // but if we don't define it here, we run afoul of
453 // rules type-punning, so use macros to let mutex.cc's inclusion
454 // of this file behave a little differently
455#ifdef REALM_ON_MACOS
456 // apparently pthread_rwlock_t's are LARGE on macOS
457 alignas(8) uint64_t placeholder[32]; // 256 bytes, at least 8 byte aligned
458#else
459 alignas(8) uint64_t placeholder[8]; // 64 bytes, at least 8 byte aligned
460#endif
461 };
462}; // namespace Realm
463
464#include "realm/mutex.inl"
465
466#endif
Definition mutex.h:384
AutoLock(LT &_mutex)
bool held
Definition mutex.h:394
LT & mutex
Definition mutex.h:393
Definition mutex.h:299
atomic< uint64_t > state
Definition mutex.h:322
uint64_t attempt_enter(uint64_t work_units, uint64_t &tstate)
uint64_t attempt_exit(uint64_t &tstate)
Definition mutex.h:187
void notify_newest(unsigned count, bool prefer_spinning)
Doorbell * extract_newest(bool prefer_spinning, bool allow_extra)
Doorbell * extract_oldest(bool prefer_spinning, bool allow_extra)
void notify_oldest(unsigned count, bool prefer_spinning)
bool add_doorbell(Doorbell *db)
atomic< uintptr_t > head_or_count
Definition mutex.h:216
Definition mutex.h:120
bool is_sleeping() const
atomic< uint32_t > state
Definition mutex.h:165
Doorbell * next_doorbell
Definition mutex.h:181
uintptr_t owner_tid
Definition mutex.h:171
void set_sleep_timeout(long long timeout_in_ns)
long long sleep_timeout
Definition mutex.h:167
long long next_sleep_time
Definition mutex.h:168
uint32_t wait_slow()
void notify(uint32_t data)
uint32_t wait()
static Doorbell * get_thread_doorbell()
void cancel_prewake()
Definition mutex.h:342
DoorbellList db_list
Definition mutex.h:356
FIFOMutex & mutex
Definition mutex.h:346
FIFOCondVar(FIFOMutex &_mutex)
unsigned num_waiters
Definition mutex.h:355
Definition mutex.h:248
DoorbellList db_list
Definition mutex.h:270
atomic< uint32_t > state
Definition mutex.h:269
FIFOCondVar CondVar
Definition mutex.h:252
Definition mutex.h:359
bool timedwait(long long max_nsec)
KernelMutex & mutex
Definition mutex.h:364
KernelCondVar(KernelMutex &_mutex)
Definition mutex.h:273
bool trylock(void)
KernelCondVar CondVar
Definition mutex.h:278
MutexChecker & checker
Definition mutex.h:95
const char * name
Definition mutex.h:96
CheckedScope(MutexChecker &_checker, const char *_name, void *_object=0)
void * object
Definition mutex.h:97
Definition mutex.h:80
atomic< int > cur_count
Definition mutex.h:111
int limit
Definition mutex.h:110
void lock_fail(int actval, CheckedScope *cs)
const char * name
Definition mutex.h:108
bool trylock(CheckedScope *cs=0)
void unlock_fail(int actval, CheckedScope *cs)
void * object
Definition mutex.h:109
void lock(CheckedScope *cs=0)
MutexChecker(const char *_name, void *_object=0, int _limit=1)
void unlock(CheckedScope *cs=0)
Definition mutex.h:398
RWLock & operator=(const RWLock &)=delete
RWLock(const RWLock &)=delete
bool trywrlock()
AutoLock< Reader > AutoReaderLock
Definition mutex.h:442
Writer writer
Definition mutex.h:449
AutoLock< Writer > AutoWriterLock
Definition mutex.h:441
RWLock & operator=(RWLock &&)=delete
RWLock(RWLock &&)=delete
bool tryrdlock()
uint64_t placeholder[8]
Definition mutex.h:459
Reader reader
Definition mutex.h:450
Definition mutex.h:325
DoorbellList db_list
Definition mutex.h:339
UnfairCondVar(UnfairMutex &_mutex)
UnfairMutex & mutex
Definition mutex.h:329
unsigned num_waiters
Definition mutex.h:338
Definition mutex.h:223
UnfairCondVar CondVar
Definition mutex.h:227
atomic< uint32_t > state
Definition mutex.h:244
DoorbellList db_list
Definition mutex.h:245
Definition atomics.h:31
Definition utils.h:368
#define REALM_INTERNAL_API_EXTERNAL_LINKAGE
Definition compiler_support.h:218
Definition activemsg.h:38
UnfairMutex Mutex
Definition mutex.h:68
Definition mutex.h:429
RWLock & rwlock
Definition mutex.h:438
void lock()
Definition mutex.h:433
void trylock()
Definition mutex.h:434
Reader(RWLock &_rwlock)
Definition mutex.h:430
void unlock()
Definition mutex.h:435
Definition mutex.h:417
RWLock & rwlock
Definition mutex.h:426
void unlock()
Definition mutex.h:423
void trylock()
Definition mutex.h:422
Writer(RWLock &_rwlock)
Definition mutex.h:418
void lock()
Definition mutex.h:421