forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeadlock_detector.c
206 lines (178 loc) · 6.94 KB
/
deadlock_detector.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/*
* deadlock_detector.c Detects potential deadlocks in a running process.
* For Linux, uses BCC, eBPF. See .py file.
*
* Copyright 2017 Facebook, Inc.
* Licensed under the Apache License, Version 2.0 (the "License")
*
* 1-Feb-2016 Kenny Yu Created this.
*/
#include <linux/sched.h>
#include <uapi/linux/ptrace.h>
// Maximum number of mutexes a single thread can hold at once.
// If the number is too big, the unrolled loops wil cause the stack
// to be too big, and the bpf verifier will fail.
#define MAX_HELD_MUTEXES 16
// Info about held mutexes. `mutex` will be 0 if not held.
struct held_mutex_t {
u64 mutex;
u64 stack_id;
};
// List of mutexes that a thread is holding. Whenever we loop over this array,
// we need to force the compiler to unroll the loop, otherwise the bcc verifier
// will fail because the loop will create a backwards edge.
struct thread_to_held_mutex_leaf_t {
struct held_mutex_t held_mutexes[MAX_HELD_MUTEXES];
};
// Map of thread ID -> array of (mutex addresses, stack id)
BPF_HASH(thread_to_held_mutexes, u32, struct thread_to_held_mutex_leaf_t, 2097152);
// Key type for edges. Represents an edge from mutex1 => mutex2.
struct edges_key_t {
u64 mutex1;
u64 mutex2;
};
// Leaf type for edges. Holds information about where each mutex was acquired.
struct edges_leaf_t {
u64 mutex1_stack_id;
u64 mutex2_stack_id;
u32 thread_pid;
char comm[TASK_COMM_LEN];
};
// Represents all edges currently in the mutex wait graph.
BPF_HASH(edges, struct edges_key_t, struct edges_leaf_t, 2097152);
// Info about parent thread when a child thread is created.
struct thread_created_leaf_t {
u64 stack_id;
u32 parent_pid;
char comm[TASK_COMM_LEN];
};
// Map of child thread pid -> info about parent thread.
BPF_HASH(thread_to_parent, u32, struct thread_created_leaf_t);
// Stack traces when threads are created and when mutexes are locked/unlocked.
BPF_STACK_TRACE(stack_traces, 655360);
// The first argument to the user space function we are tracing
// is a pointer to the mutex M held by thread T.
//
// For all mutexes N held by mutexes_held[T]
// add edge N => M (held by T)
// mutexes_held[T].add(M)
int trace_mutex_acquire(struct pt_regs *ctx, void *mutex_addr) {
// Higher 32 bits is process ID, Lower 32 bits is thread ID
u32 pid = bpf_get_current_pid_tgid();
u64 mutex = (u64)mutex_addr;
struct thread_to_held_mutex_leaf_t empty_leaf = {};
struct thread_to_held_mutex_leaf_t *leaf =
thread_to_held_mutexes.lookup_or_init(&pid, &empty_leaf);
if (!leaf) {
bpf_trace_printk(
"could not add thread_to_held_mutex key, thread: %d, mutex: %p\n", pid,
mutex);
return 1; // Could not insert, no more memory
}
// Recursive mutexes lock the same mutex multiple times. We cannot tell if
// the mutex is recursive after the mutex is already created. To avoid noisy
// reports, disallow self edges. Do one pass to check if we are already
// holding the mutex, and if we are, do nothing.
#pragma unroll
for (int i = 0; i < MAX_HELD_MUTEXES; ++i) {
if (leaf->held_mutexes[i].mutex == mutex) {
return 1; // Disallow self edges
}
}
u64 stack_id =
stack_traces.get_stackid(ctx, BPF_F_USER_STACK | BPF_F_REUSE_STACKID);
int added_mutex = 0;
#pragma unroll
for (int i = 0; i < MAX_HELD_MUTEXES; ++i) {
// If this is a free slot, see if we can insert.
if (!leaf->held_mutexes[i].mutex) {
if (!added_mutex) {
leaf->held_mutexes[i].mutex = mutex;
leaf->held_mutexes[i].stack_id = stack_id;
added_mutex = 1;
}
continue; // Nothing to do for a free slot
}
// Add edges from held mutex => current mutex
struct edges_key_t edge_key = {};
edge_key.mutex1 = leaf->held_mutexes[i].mutex;
edge_key.mutex2 = mutex;
struct edges_leaf_t edge_leaf = {};
edge_leaf.mutex1_stack_id = leaf->held_mutexes[i].stack_id;
edge_leaf.mutex2_stack_id = stack_id;
edge_leaf.thread_pid = pid;
bpf_get_current_comm(&edge_leaf.comm, sizeof(edge_leaf.comm));
// Returns non-zero on error
int result = edges.update(&edge_key, &edge_leaf);
if (result) {
bpf_trace_printk("could not add edge key %p, %p, error: %d\n",
edge_key.mutex1, edge_key.mutex2, result);
continue; // Could not insert, no more memory
}
}
// There were no free slots for this mutex.
if (!added_mutex) {
bpf_trace_printk("could not add mutex %p, added_mutex: %d\n", mutex,
added_mutex);
return 1;
}
return 0;
}
// The first argument to the user space function we are tracing
// is a pointer to the mutex M held by thread T.
//
// mutexes_held[T].remove(M)
int trace_mutex_release(struct pt_regs *ctx, void *mutex_addr) {
// Higher 32 bits is process ID, Lower 32 bits is thread ID
u32 pid = bpf_get_current_pid_tgid();
u64 mutex = (u64)mutex_addr;
struct thread_to_held_mutex_leaf_t *leaf =
thread_to_held_mutexes.lookup(&pid);
if (!leaf) {
// If the leaf does not exist for the pid, then it means we either missed
// the acquire event, or we had no more memory and could not add it.
bpf_trace_printk(
"could not find thread_to_held_mutex, thread: %d, mutex: %p\n", pid,
mutex);
return 1;
}
// For older kernels without "Bpf: allow access into map value arrays"
// (https://lkml.org/lkml/2016/8/30/287) the bpf verifier will fail with an
// invalid memory access on `leaf->held_mutexes[i]` below. On newer kernels,
// we can avoid making this extra copy in `value` and use `leaf` directly.
struct thread_to_held_mutex_leaf_t value = {};
bpf_probe_read(&value, sizeof(struct thread_to_held_mutex_leaf_t), leaf);
#pragma unroll
for (int i = 0; i < MAX_HELD_MUTEXES; ++i) {
// Find the current mutex (if it exists), and clear it.
// Note: Can't use `leaf->` in this if condition, see comment above.
if (value.held_mutexes[i].mutex == mutex) {
leaf->held_mutexes[i].mutex = 0;
leaf->held_mutexes[i].stack_id = 0;
}
}
return 0;
}
// Trace return from clone() syscall in the child thread (return value > 0).
int trace_clone(struct pt_regs *ctx, unsigned long flags, void *child_stack,
void *ptid, void *ctid, struct pt_regs *regs) {
u32 child_pid = PT_REGS_RC(ctx);
if (child_pid <= 0) {
return 1;
}
struct thread_created_leaf_t thread_created_leaf = {};
thread_created_leaf.parent_pid = bpf_get_current_pid_tgid();
thread_created_leaf.stack_id =
stack_traces.get_stackid(ctx, BPF_F_USER_STACK | BPF_F_REUSE_STACKID);
bpf_get_current_comm(&thread_created_leaf.comm,
sizeof(thread_created_leaf.comm));
struct thread_created_leaf_t *insert_result =
thread_to_parent.lookup_or_init(&child_pid, &thread_created_leaf);
if (!insert_result) {
bpf_trace_printk(
"could not add thread_created_key, child: %d, parent: %d\n", child_pid,
thread_created_leaf.parent_pid);
return 1; // Could not insert, no more memory
}
return 0;
}