-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
Copy pathMutex.zig
285 lines (234 loc) · 9.13 KB
/
Mutex.zig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
//! Mutex is a synchronization primitive which enforces atomic access to a shared region of code known as the "critical section".
//! It does this by blocking ensuring only one thread is in the critical section at any given point in time by blocking the others.
//! Mutex can be statically initialized and is at most `@sizeOf(u64)` large.
//! Use `lock()` or `tryLock()` to enter the critical section and `unlock()` to leave it.
//!
//! Example:
//! ```
//! var m = Mutex{};
//!
//! {
//! m.lock();
//! defer m.unlock();
//! // ... critical section code
//! }
//!
//! if (m.tryLock()) {
//! defer m.unlock();
//! // ... critical section code
//! }
//! ```
const std = @import("../std.zig");
const builtin = @import("builtin");
const Mutex = @This();
const os = std.os;
const assert = std.debug.assert;
const testing = std.testing;
const Atomic = std.atomic.Atomic;
const Futex = std.Thread.Futex;
impl: Impl = .{},
/// Tries to acquire the mutex without blocking the caller's thread.
/// Returns `false` if the calling thread would have to block to acquire it.
/// Otherwise, returns `true` and the caller should `unlock()` the Mutex to release it.
pub fn tryLock(self: *Mutex) bool {
return self.impl.tryLock();
}
/// Acquires the mutex, blocking the caller's thread until it can.
/// It is undefined behavior if the mutex is already held by the caller's thread.
/// Once acquired, call `unlock()` on the Mutex to release it.
pub fn lock(self: *Mutex) void {
self.impl.lock();
}
/// Releases the mutex which was previously acquired with `lock()` or `tryLock()`.
/// It is undefined behavior if the mutex is unlocked from a different thread that it was locked from.
pub fn unlock(self: *Mutex) void {
self.impl.unlock();
}
const Impl = if (builtin.single_threaded)
SingleThreadedImpl
else if (builtin.os.tag == .windows)
WindowsImpl
else if (builtin.os.tag.isDarwin())
DarwinImpl
else
FutexImpl;
const SingleThreadedImpl = struct {
is_locked: bool = false,
fn tryLock(self: *Impl) bool {
if (self.is_locked) return false;
self.is_locked = true;
return true;
}
fn lock(self: *Impl) void {
if (!self.tryLock()) {
unreachable; // deadlock detected
}
}
fn unlock(self: *Impl) void {
assert(self.is_locked);
self.is_locked = false;
}
};
// SRWLOCK on windows is almost always faster than Futex solution.
// It also implements an efficient Condition with requeue support for us.
const WindowsImpl = struct {
srwlock: os.windows.SRWLOCK = .{},
fn tryLock(self: *Impl) bool {
return os.windows.kernel32.TryAcquireSRWLockExclusive(&self.srwlock) != os.windows.FALSE;
}
fn lock(self: *Impl) void {
os.windows.kernel32.AcquireSRWLockExclusive(&self.srwlock);
}
fn unlock(self: *Impl) void {
os.windows.kernel32.ReleaseSRWLockExclusive(&self.srwlock);
}
};
// os_unfair_lock on darwin supports priority inheritance and is generally faster than Futex solutions.
const DarwinImpl = struct {
oul: os.darwin.os_unfair_lock = .{},
fn tryLock(self: *Impl) bool {
return os.darwin.os_unfair_lock_trylock(&self.oul);
}
fn lock(self: *Impl) void {
os.darwin.os_unfair_lock_lock(&self.oul);
}
fn unlock(self: *Impl) void {
os.darwin.os_unfair_lock_unlock(&self.oul);
}
};
const FutexImpl = struct {
state: Atomic(u32) = Atomic(u32).init(unlocked),
const unlocked = 0b00;
const locked = 0b01;
const contended = 0b11; // must contain the `locked` bit for x86 optimization below
fn tryLock(self: *Impl) bool {
// Lock with compareAndSwap instead of tryCompareAndSwap to avoid reporting spurious CAS failure.
return self.lockFast("compareAndSwap");
}
fn lock(self: *Impl) void {
// Lock with tryCompareAndSwap instead of compareAndSwap due to being more inline-able on LL/SC archs like ARM.
if (!self.lockFast("tryCompareAndSwap")) {
self.lockSlow();
}
}
inline fn lockFast(self: *Impl, comptime casFn: []const u8) bool {
// On x86, use `lock bts` instead of `lock cmpxchg` as:
// - they both seem to mark the cache-line as modified regardless: https://stackoverflow.com/a/63350048
// - `lock bts` is smaller instruction-wise which makes it better for inlining
if (comptime builtin.target.cpu.arch.isX86()) {
const locked_bit = @ctz(@as(u32, locked));
return self.state.bitSet(locked_bit, .Acquire) == 0;
}
// Acquire barrier ensures grabbing the lock happens before the critical section
// and that the previous lock holder's critical section happens before we grab the lock.
return @field(self.state, casFn)(unlocked, locked, .Acquire, .Monotonic) == null;
}
fn lockSlow(self: *Impl) void {
@setCold(true);
// Avoid doing an atomic swap below if we already know the state is contended.
// An atomic swap unconditionally stores which marks the cache-line as modified unnecessarily.
if (self.state.load(.Monotonic) == contended) {
Futex.wait(&self.state, contended);
}
// Try to acquire the lock while also telling the existing lock holder that there are threads waiting.
//
// Once we sleep on the Futex, we must acquire the mutex using `contended` rather than `locked`.
// If not, threads sleeping on the Futex wouldn't see the state change in unlock and potentially deadlock.
// The downside is that the last mutex unlocker will see `contended` and do an unnecessary Futex wake
// but this is better than having to wake all waiting threads on mutex unlock.
//
// Acquire barrier ensures grabbing the lock happens before the critical section
// and that the previous lock holder's critical section happens before we grab the lock.
while (self.state.swap(contended, .Acquire) != unlocked) {
Futex.wait(&self.state, contended);
}
}
fn unlock(self: *Impl) void {
// Unlock the mutex and wake up a waiting thread if any.
//
// A waiting thread will acquire with `contended` instead of `locked`
// which ensures that it wakes up another thread on the next unlock().
//
// Release barrier ensures the critical section happens before we let go of the lock
// and that our critical section happens before the next lock holder grabs the lock.
const state = self.state.swap(unlocked, .Release);
assert(state != unlocked);
if (state == contended) {
Futex.wake(&self.state, 1);
}
}
};
test "Mutex - smoke test" {
var mutex = Mutex{};
try testing.expect(mutex.tryLock());
try testing.expect(!mutex.tryLock());
mutex.unlock();
mutex.lock();
try testing.expect(!mutex.tryLock());
mutex.unlock();
}
// A counter which is incremented without atomic instructions
const NonAtomicCounter = struct {
// direct u128 could maybe use xmm ops on x86 which are atomic
value: [2]u64 = [_]u64{ 0, 0 },
fn get(self: NonAtomicCounter) u128 {
return @bitCast(u128, self.value);
}
fn inc(self: *NonAtomicCounter) void {
for (@bitCast([2]u64, self.get() + 1)) |v, i| {
@ptrCast(*volatile u64, &self.value[i]).* = v;
}
}
};
test "Mutex - many uncontended" {
// This test requires spawning threads.
if (builtin.single_threaded) {
return error.SkipZigTest;
}
const num_threads = 4;
const num_increments = 1000;
const Runner = struct {
mutex: Mutex = .{},
thread: std.Thread = undefined,
counter: NonAtomicCounter = .{},
fn run(self: *@This()) void {
var i: usize = num_increments;
while (i > 0) : (i -= 1) {
self.mutex.lock();
defer self.mutex.unlock();
self.counter.inc();
}
}
};
var runners = [_]Runner{.{}} ** num_threads;
for (runners) |*r| r.thread = try std.Thread.spawn(.{}, Runner.run, .{r});
for (runners) |r| r.thread.join();
for (runners) |r| try testing.expectEqual(r.counter.get(), num_increments);
}
test "Mutex - many contended" {
// This test requires spawning threads.
if (builtin.single_threaded) {
return error.SkipZigTest;
}
const num_threads = 4;
const num_increments = 1000;
const Runner = struct {
mutex: Mutex = .{},
counter: NonAtomicCounter = .{},
fn run(self: *@This()) void {
var i: usize = num_increments;
while (i > 0) : (i -= 1) {
// Occasionally hint to let another thread run.
defer if (i % 100 == 0) std.Thread.yield() catch {};
self.mutex.lock();
defer self.mutex.unlock();
self.counter.inc();
}
}
};
var runner = Runner{};
var threads: [num_threads]std.Thread = undefined;
for (threads) |*t| t.* = try std.Thread.spawn(.{}, Runner.run, .{&runner});
for (threads) |t| t.join();
try testing.expectEqual(runner.counter.get(), num_increments * num_threads);
}