Skip to content

Commit

Permalink
Replace a timeout task with timedwait()
Browse files Browse the repository at this point in the history
This should fix an exception seen in CI from the lingering timeout task:
```
 Test Summary:                                | Pass  Total  Time
Deserialization error recovery and include() |   11     11  3.9s
      From worker 4:	Unhandled Task ERROR: EOFError: read end of file
      From worker 4:	Stacktrace:
      From worker 4:	 [1] wait
      From worker 4:	   @ .\asyncevent.jl:159 [inlined]
      From worker 4:	 [2] sleep(sec::Float64)
      From worker 4:	   @ Base .\asyncevent.jl:265
      From worker 4:	 [3] (::DistributedNext.var"#34#37"{DistributedNext.Worker, Float64})()
      From worker 4:	   @ DistributedNext D:\a\DistributedNext.jl\DistributedNext.jl\src\cluster.jl:213
```
  • Loading branch information
JamesWrigley committed Jan 2, 2025
1 parent 468fcc0 commit 0d5aaa3
Showing 1 changed file with 4 additions and 10 deletions.
14 changes: 4 additions & 10 deletions src/cluster.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,16 +209,10 @@ function wait_for_conn(w)
timeout = worker_timeout() - (time() - w.ct_time)
timeout <= 0 && error("peer $(w.id) has not connected to $(myid())")

T = Threads.@spawn begin
sleep($timeout)
lock(w.c_state) do
notify(w.c_state; all=true)
end
end
errormonitor(T)
lock(w.c_state) do
wait(w.c_state)
(@atomic w.state) === WorkerState_created && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
if timedwait(() -> (@atomic w.state) === WorkerState_connected, timeout) === :timed_out
# Notify any waiters on the state and throw
@lock w.c_state notify(w.c_state)
error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
end
end
nothing
Expand Down

0 comments on commit 0d5aaa3

Please sign in to comment.