Skip to content

Commit

Permalink
Support customization of termination signal for timeouts
Browse files Browse the repository at this point in the history
We've been having a lot of timeouts on CI recently.  Our coredumps might
be helpful in tracking these down, but when we send `SIGTERM` or
`SIGKILL` we don't get coredumps.  This allows customization of which
signal is sent during these timeout messages, to allow for CI to set it
to `SIGSEGV` or similar, to force core dumps for debugging purposes.
  • Loading branch information
staticfloat committed Jun 29, 2022
1 parent b11ccae commit 89321d0
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 6 deletions.
5 changes: 4 additions & 1 deletion stdlib/Distributed/src/managers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,10 @@ function kill(manager::LocalManager, pid::Int, config::WorkerConfig; exit_timeou
# Check to see if our child exited, and if not, send an actual kill signal
if !process_exited(config.process)
@warn("Failed to gracefully kill worker $(pid), sending SIGTERM")
kill(config.process, Base.SIGTERM)

# Support overriding this for the
term_signal = parse(Int, get(ENV, "JULIA_TEST_TIMEOUT_SIGNUM", "$(Base.SIGTERM)"))
kill(config.process, term_signal)

sleep(term_timeout)
if !process_exited(config.process)
Expand Down
3 changes: 2 additions & 1 deletion stdlib/LibGit2/test/libgit2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ function challenge_prompt(cmd::Cmd, challenges; timeout::Integer=60, debug::Bool
end

if process_running(p)
kill(p)
term_signal = parse(Int, get(ENV, "JULIA_TEST_TIMEOUT_SIGNUM", "$(Base.SIGTERM)"))
kill(p, term_signal)
put!(timer, :timeout)
elseif success(p)
put!(timer, :success)
Expand Down
5 changes: 3 additions & 2 deletions stdlib/Profile/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ end
end

# Profile deadlocking in compilation (debuginfo registration)
term_signal = parse(Int, get(ENV, "JULIA_TEST_TIMEOUT_SIGNUM", "$(Base.SIGTERM)"))
let cmd = Base.julia_cmd()
script = """
using Profile
Expand All @@ -181,7 +182,7 @@ let cmd = Base.julia_cmd()
t = Timer(120) do t
# should be under 10 seconds, so give it 2 minutes then report failure
println("KILLING BY PROFILE TEST WATCHDOG\n")
kill(p, Base.SIGTERM)
kill(p, term_signal)
sleep(10)
kill(p, Base.SIGKILL)
end
Expand Down Expand Up @@ -209,7 +210,7 @@ if Sys.isbsd() || Sys.islinux()
t = Timer(120) do t
# should be under 10 seconds, so give it 2 minutes then report failure
println("KILLING BY PROFILE TEST WATCHDOG\n")
kill(p, Base.SIGTERM)
kill(p, term_signal)
sleep(10)
kill(p, Base.SIGKILL)
close(iob)
Expand Down
3 changes: 2 additions & 1 deletion stdlib/Sockets/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ using Base: Experimental
# so that we can attempt to get a "friendly" backtrace if something gets stuck
# (although this'll also terminate any attempted debugging session)
# expected test duration is about 5-10 seconds
term_signal = parse(Int, get(ENV, "JULIA_TEST_TIMEOUT_SIGNUM", "$(Base.SIGTERM)"))
function killjob(d)
Core.print(Core.stderr, d)
if Sys.islinux()
Expand All @@ -18,7 +19,7 @@ function killjob(d)
ccall(:uv_kill, Cint, (Cint, Cint), getpid(), SIGINFO)
sleep(5) # Allow time for profile to collect and print before killing
end
ccall(:uv_kill, Cint, (Cint, Cint), getpid(), Base.SIGTERM)
ccall(:uv_kill, Cint, (Cint, Cint), getpid(), term_signal)
nothing
end
sockets_watchdog_timer = Timer(t -> killjob("KILLING BY SOCKETS TEST WATCHDOG\n"), 600)
Expand Down
3 changes: 2 additions & 1 deletion test/threads.jl
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,13 @@ end

# Spawn another process as a watchdog. If this test fails, it'll unrecoverably
# hang in the event loop. Another process needs to kill it
term_signal = parse(Int, get(ENV, "JULIA_TEST_TIMEOUT_SIGNUM", "$(Base.SIGTERM)"))
cmd = """
@async (Base.wait_readnb(stdin, 1); exit())
sleep(100)
isopen(stdin) || exit()
println(stderr, "ERROR: Killing threads test due to watchdog expiry")
ccall(:uv_kill, Cint, (Cint, Cint), $(getpid()), Base.SIGTERM)
ccall(:uv_kill, Cint, (Cint, Cint), $(getpid()), $(term_signal))
"""
proc = open(pipeline(`$(Base.julia_cmd()) -e $cmd`; stderr=stderr); write=true)

Expand Down

0 comments on commit 89321d0

Please sign in to comment.