Skip to content

Commit

Permalink
Change timeout kill signals to SIGQUIT from SIGTERM
Browse files Browse the repository at this point in the history
We've been having a lot of timeouts on CI recently.  Our coredumps might
be helpful in tracking these down, but when we send `SIGTERM` or
`SIGKILL` we don't get coredumps.  This changes timeout signals to
generally use `SIGQUIT` instead of `SIGTERM`, when sending the first kill signal.
  • Loading branch information
staticfloat committed Nov 8, 2022
1 parent a41ae5b commit 8400403
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 7 deletions.
8 changes: 6 additions & 2 deletions stdlib/Distributed/src/managers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -735,8 +735,12 @@ function kill(manager::LocalManager, pid::Int, config::WorkerConfig; exit_timeou

# Check to see if our child exited, and if not, send an actual kill signal
if !process_exited(config.process)
@warn("Failed to gracefully kill worker $(pid), sending SIGTERM")
kill(config.process, Base.SIGTERM)
@warn("Failed to gracefully kill worker $(pid), sending SIGQUIT")

# Support overriding the termination signal for Base.runtests() on CI. We often want to
# get a coredump from a worker that times out, and we need to send some other signal than
# `Base.SIGTERM`, so allow that to be overridden by an environment variable.
kill(config.process, Base.SIGQUIT)

sleep(term_timeout)
if !process_exited(config.process)
Expand Down
2 changes: 1 addition & 1 deletion stdlib/LibGit2/test/libgit2-tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function challenge_prompt(cmd::Cmd, challenges)
end

if process_running(p)
kill(p)
kill(p, Base.SIGQUIT)
put!(timer, :timeout)
elseif success(p)
put!(timer, :success)
Expand Down
4 changes: 2 additions & 2 deletions stdlib/Profile/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ let cmd = Base.julia_cmd()
t = Timer(120) do t
# should be under 10 seconds, so give it 2 minutes then report failure
println("KILLING BY PROFILE TEST WATCHDOG\n")
kill(p, Base.SIGTERM)
kill(p, Base.SIGQUIT)
sleep(10)
kill(p, Base.SIGKILL)
end
Expand Down Expand Up @@ -210,7 +210,7 @@ if Sys.isbsd() || Sys.islinux()
t = Timer(120) do t
# should be under 10 seconds, so give it 2 minutes then report failure
println("KILLING BY PROFILE TEST WATCHDOG\n")
kill(p, Base.SIGTERM)
kill(p, Base.SIGQUIT)
sleep(10)
kill(p, Base.SIGKILL)
close(iob)
Expand Down
2 changes: 1 addition & 1 deletion stdlib/Sockets/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ function killjob(d)
ccall(:uv_kill, Cint, (Cint, Cint), getpid(), SIGINFO)
sleep(5) # Allow time for profile to collect and print before killing
end
ccall(:uv_kill, Cint, (Cint, Cint), getpid(), Base.SIGTERM)
ccall(:uv_kill, Cint, (Cint, Cint), getpid(), Base.SIGQUIT)
nothing
end
sockets_watchdog_timer = Timer(t -> killjob("KILLING BY SOCKETS TEST WATCHDOG\n"), 600)
Expand Down
2 changes: 1 addition & 1 deletion test/threads.jl
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ cmd = """
sleep(100)
isopen(stdin) || exit()
println(stderr, "ERROR: Killing threads test due to watchdog expiry")
ccall(:uv_kill, Cint, (Cint, Cint), $(getpid()), Base.SIGTERM)
ccall(:uv_kill, Cint, (Cint, Cint), $(getpid()), Base.SIGQUIT)
"""
proc = open(pipeline(`$(Base.julia_cmd()) -e $cmd`; stderr=stderr); write=true)

Expand Down

0 comments on commit 8400403

Please sign in to comment.