Skip to content

Commit

Permalink
Fix flaky test_worker_time_to_live (#6061)
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky authored Apr 6, 2022
1 parent bddc3f2 commit cd2ce86
Showing 1 changed file with 11 additions and 8 deletions.
19 changes: 11 additions & 8 deletions distributed/tests/test_failed_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,20 +464,23 @@ async def test_restart_timeout_on_long_running_task(c, s, a):
async def test_worker_time_to_live(c, s, a, b):
from distributed.scheduler import heartbeat_interval

# worker removal is also controlled by 10 * heartbeat
assert set(s.workers) == {a.address, b.address}
interval = 10 * heartbeat_interval(len(s.workers)) + 0.5

a.periodic_callbacks["heartbeat"].stop()
await asyncio.sleep(0.010)
assert set(s.workers) == {a.address, b.address}
while a.heartbeat_active:
await asyncio.sleep(0.01)

start = time()
while set(s.workers) == {a.address, b.address}:
await asyncio.sleep(interval)
assert time() < start + interval + 0.1

set(s.workers) == {b.address}
await asyncio.sleep(0.01)
assert set(s.workers) == {b.address}

# Worker removal is triggered after 10 * heartbeat
# This is 10 * 0.5s at the moment of writing.
interval = 10 * heartbeat_interval(len(s.workers))
# Currently observing an extra 0.3~0.6s on top of the interval.
# Adding some padding to prevent flakiness.
assert time() - start < interval + 2.0


@gen_cluster()
Expand Down

0 comments on commit cd2ce86

Please sign in to comment.