Skip to content

Commit bcbe007

Browse files
committed
Move signal handler registeration into class
This avoids crashes and deadlocks if we run multiple simultaneous time-based collectors. That's _probably_ inadvisable for accuracy, but we should either support it or hard fail if it's attempted (and I'm attempting to support it for now).
1 parent efe6603 commit bcbe007

File tree

2 files changed

+76
-26
lines changed

2 files changed

+76
-26
lines changed

ext/vernier/vernier.cc

+63-26
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,66 @@ class RetainedCollector : public BaseCollector {
10621062
}
10631063
};
10641064

1065+
class GlobalSignalHandler {
1066+
static LiveSample *live_sample;
1067+
1068+
public:
1069+
static GlobalSignalHandler *get_instance() {
1070+
static GlobalSignalHandler instance;
1071+
return &instance;
1072+
}
1073+
1074+
void install() {
1075+
const std::lock_guard<std::mutex> lock(mutex);
1076+
count++;
1077+
1078+
if (count == 1) setup_signal_handler();
1079+
}
1080+
1081+
void uninstall() {
1082+
const std::lock_guard<std::mutex> lock(mutex);
1083+
count--;
1084+
1085+
if (count == 0) clear_signal_handler();
1086+
}
1087+
1088+
void record_sample(LiveSample &sample, pthread_t pthread_id) {
1089+
const std::lock_guard<std::mutex> lock(mutex);
1090+
1091+
live_sample = &sample;
1092+
if (pthread_kill(pthread_id, SIGPROF)) {
1093+
rb_bug("pthread_kill failed");
1094+
}
1095+
sample.wait();
1096+
live_sample = NULL;
1097+
}
1098+
1099+
private:
1100+
std::mutex mutex;
1101+
int count;
1102+
1103+
static void signal_handler(int sig, siginfo_t* sinfo, void* ucontext) {
1104+
assert(live_sample);
1105+
live_sample->sample_current_thread();
1106+
}
1107+
1108+
void setup_signal_handler() {
1109+
struct sigaction sa;
1110+
sa.sa_sigaction = signal_handler;
1111+
sa.sa_flags = SA_RESTART | SA_SIGINFO;
1112+
sigemptyset(&sa.sa_mask);
1113+
sigaction(SIGPROF, &sa, NULL);
1114+
}
1115+
1116+
void clear_signal_handler() {
1117+
struct sigaction sa;
1118+
sa.sa_handler = SIG_IGN;
1119+
sa.sa_flags = SA_RESTART;
1120+
sigemptyset(&sa.sa_mask);
1121+
sigaction(SIGPROF, &sa, NULL);
1122+
}
1123+
};
1124+
LiveSample *GlobalSignalHandler::live_sample;
10651125

10661126
class TimeCollector : public BaseCollector {
10671127
MarkerTable markers;
@@ -1072,8 +1132,6 @@ class TimeCollector : public BaseCollector {
10721132
atomic_bool running;
10731133
SamplerSemaphore thread_stopped;
10741134

1075-
static LiveSample *live_sample;
1076-
10771135
TimeStamp interval;
10781136

10791137
public:
@@ -1094,11 +1152,6 @@ class TimeCollector : public BaseCollector {
10941152
}
10951153
}
10961154

1097-
static void signal_handler(int sig, siginfo_t* sinfo, void* ucontext) {
1098-
assert(live_sample);
1099-
live_sample->sample_current_thread();
1100-
}
1101-
11021155
VALUE get_markers() {
11031156
VALUE list = rb_ary_new2(this->markers.list.size());
11041157

@@ -1111,7 +1164,6 @@ class TimeCollector : public BaseCollector {
11111164

11121165
void sample_thread_run() {
11131166
LiveSample sample;
1114-
live_sample = &sample;
11151167

11161168
TimeStamp next_sample_schedule = TimeStamp::Now();
11171169
while (running) {
@@ -1121,10 +1173,7 @@ class TimeCollector : public BaseCollector {
11211173
for (auto &thread : threads.list) {
11221174
//if (thread.state == Thread::State::RUNNING) {
11231175
if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend.size() == 0)) {
1124-
if (pthread_kill(thread.pthread_id, SIGPROF)) {
1125-
rb_bug("pthread_kill failed");
1126-
}
1127-
sample.wait();
1176+
GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
11281177

11291178
if (sample.sample.gc) {
11301179
// fprintf(stderr, "skipping GC sample\n");
@@ -1151,8 +1200,6 @@ class TimeCollector : public BaseCollector {
11511200
TimeStamp::Sleep(sleep_time);
11521201
}
11531202

1154-
live_sample = NULL;
1155-
11561203
thread_stopped.post();
11571204
}
11581205

@@ -1222,11 +1269,7 @@ class TimeCollector : public BaseCollector {
12221269
return false;
12231270
}
12241271

1225-
struct sigaction sa;
1226-
sa.sa_sigaction = signal_handler;
1227-
sa.sa_flags = SA_RESTART | SA_SIGINFO;
1228-
sigemptyset(&sa.sa_mask);
1229-
sigaction(SIGPROF, &sa, NULL);
1272+
GlobalSignalHandler::get_instance()->install();
12301273

12311274
running = true;
12321275

@@ -1256,11 +1299,7 @@ class TimeCollector : public BaseCollector {
12561299
running = false;
12571300
thread_stopped.wait();
12581301

1259-
struct sigaction sa;
1260-
sa.sa_handler = SIG_IGN;
1261-
sa.sa_flags = SA_RESTART;
1262-
sigemptyset(&sa.sa_mask);
1263-
sigaction(SIGPROF, &sa, NULL);
1302+
GlobalSignalHandler::get_instance()->uninstall();
12641303

12651304
rb_internal_thread_remove_event_hook(thread_hook);
12661305
rb_remove_event_hook(internal_gc_event_cb);
@@ -1318,8 +1357,6 @@ class TimeCollector : public BaseCollector {
13181357
}
13191358
};
13201359

1321-
LiveSample *TimeCollector::live_sample;
1322-
13231360
static void
13241361
collector_mark(void *data) {
13251362
BaseCollector *collector = static_cast<BaseCollector *>(data);

test/test_time_collector.rb

+13
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,19 @@ def test_sequential_threads
110110
assert_valid_result result
111111
end
112112

113+
def test_nested_collections
114+
outer_result = inner_result = nil
115+
outer_result = Vernier.trace do
116+
inner_result = Vernier.trace do
117+
sleep 0.1
118+
end
119+
sleep 0.1
120+
end
121+
122+
assert_in_epsilon 200, inner_result.weights.sum, generous_epsilon
123+
assert_in_epsilon 400, outer_result.weights.sum, generous_epsilon
124+
end
125+
113126
def generous_epsilon
114127
if ENV["GITHUB_ACTIONS"] && ENV["RUNNER_OS"] == "macOS"
115128
# Timing on macOS Actions runners seem extremely unpredictable

0 commit comments

Comments
 (0)