From 01980b3b086de1e34af1b39a80d1363cb1a07d08 Mon Sep 17 00:00:00 2001 From: Kiran Pamnany Date: Thu, 17 Oct 2024 15:35:48 -0400 Subject: [PATCH] stackwalk: fix jl_thread_suspend_and_get_state race (#56047) (#192) There was a missing re-assignment of old = -1; at the end of that loop which means in the ABA case, we accidentally actually acquire the lock on the thread despite not actually having stopped the thread; or in the counter-case, we try to run through this logic with old==-1 on the next iteration, and that isn't valid either (jl_thread_suspend_and_get_state should return failure and the loop will abort too early). Fix #56046 Co-authored-by: Jameson Nash --- src/stackwalk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stackwalk.c b/src/stackwalk.c index ab3f13c8e9746..def7dd9fd6b9f 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -881,8 +881,8 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT } bt_context_t *context = NULL; bt_context_t c; - int16_t old = -1; - while (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) { + int16_t old; + for (old = -1; !jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid; old = -1) { int lockret = jl_lock_stackwalk(); // if this task is already running somewhere, we need to stop the thread it is running on and query its state if (!jl_thread_suspend_and_get_state(old, 0, &c)) {