Skip to content

Commit 05ef55e

Browse files
committed
taskrunner: fix tests that waited on WaitCh
Now that "dead" tasks may run again, the taskrunner Run() method will not return when the task finishes running, so tests must wait for the task state to be "dead" instead of using the WaitCh, since it won't be closed until the taskrunner is killed.
1 parent 1ad918b commit 05ef55e

File tree

3 files changed

+70
-87
lines changed

3 files changed

+70
-87
lines changed

client/allocrunner/alloc_runner_test.go

+10-2
Original file line numberDiff line numberDiff line change
@@ -1213,7 +1213,11 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
12131213
ar.Run()
12141214
defer destroy(ar)
12151215

1216-
require.Equal(t, structs.AllocClientStatusComplete, ar.AllocState().ClientStatus)
1216+
testutil.WaitForResult(func() (bool, error) {
1217+
return ar.AllocState().ClientStatus == structs.AllocClientStatusComplete, nil
1218+
}, func(_ error) {
1219+
t.Fatalf("expected alloc to be complete")
1220+
})
12171221

12181222
// Step 2. Modify its directory
12191223
task := alloc.Job.TaskGroups[0].Tasks[0]
@@ -1241,7 +1245,11 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) {
12411245
ar2.Run()
12421246
defer destroy(ar2)
12431247

1244-
require.Equal(t, structs.AllocClientStatusComplete, ar2.AllocState().ClientStatus)
1248+
testutil.WaitForResult(func() (bool, error) {
1249+
return ar.AllocState().ClientStatus == structs.AllocClientStatusComplete, nil
1250+
}, func(_ error) {
1251+
t.Fatalf("expected alloc to be complete")
1252+
})
12451253

12461254
// Ensure that data from ar was moved to ar2
12471255
dataFile = filepath.Join(ar2.allocDir.SharedDir, "data", "data_file")

client/allocrunner/taskrunner/sids_hook_test.go

+1-6
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"github.com/hashicorp/nomad/helper/uuid"
2323
"github.com/hashicorp/nomad/nomad/mock"
2424
"github.com/hashicorp/nomad/nomad/structs"
25-
"github.com/hashicorp/nomad/testutil"
2625
"github.com/stretchr/testify/require"
2726
"golang.org/x/sys/unix"
2827
)
@@ -297,11 +296,7 @@ func TestTaskRunner_DeriveSIToken_UnWritableTokenFile(t *testing.T) {
297296
go tr.Run()
298297

299298
// wait for task runner to finish running
300-
select {
301-
case <-tr.WaitCh():
302-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
303-
r.Fail("timed out waiting for task runner")
304-
}
299+
testWaitForTaskToDie(t, tr)
305300

306301
// assert task exited un-successfully
307302
finalState := tr.TaskState()

client/allocrunner/taskrunner/task_runner_test.go

+59-79
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ func TestTaskRunner_Restore_Running(t *testing.T) {
335335
defer newTR.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
336336

337337
// Wait for new task runner to exit when the process does
338-
<-newTR.WaitCh()
338+
testWaitForTaskToDie(t, newTR)
339339

340340
// Assert that the process was only started once
341341
started := 0
@@ -603,11 +603,7 @@ func TestTaskRunner_TaskEnv_Interpolated(t *testing.T) {
603603
defer cleanup()
604604

605605
// Wait for task to complete
606-
select {
607-
case <-tr.WaitCh():
608-
case <-time.After(3 * time.Second):
609-
require.Fail("timeout waiting for task to exit")
610-
}
606+
testWaitForTaskToDie(t, tr)
611607

612608
// Get the mock driver plugin
613609
driverPlugin, err := conf.DriverManager.Dispense(mockdriver.PluginID.Name)
@@ -654,7 +650,9 @@ func TestTaskRunner_TaskEnv_Chroot(t *testing.T) {
654650
go tr.Run()
655651
defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
656652

657-
// Wait for task to exit
653+
// Wait for task to exit and kill the task runner to run the stop hooks.
654+
testWaitForTaskToDie(t, tr)
655+
tr.Kill(context.Background(), structs.NewTaskEvent("kill"))
658656
timeout := 15 * time.Second
659657
if testutil.IsCI() {
660658
timeout = 120 * time.Second
@@ -703,7 +701,9 @@ func TestTaskRunner_TaskEnv_Image(t *testing.T) {
703701
tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name)
704702
defer cleanup()
705703

706-
// Wait for task to exit
704+
// Wait for task to exit and kill task runner to run the stop hooks.
705+
testWaitForTaskToDie(t, tr)
706+
tr.Kill(context.Background(), structs.NewTaskEvent("kill"))
707707
select {
708708
case <-tr.WaitCh():
709709
case <-time.After(15 * time.Second):
@@ -750,7 +750,9 @@ func TestTaskRunner_TaskEnv_None(t *testing.T) {
750750
%s
751751
`, root, taskDir, taskDir, os.Getenv("PATH"))
752752

753-
// Wait for task to exit
753+
// Wait for task to exit and kill the task runner to run the stop hooks.
754+
testWaitForTaskToDie(t, tr)
755+
tr.Kill(context.Background(), structs.NewTaskEvent("kill"))
754756
select {
755757
case <-tr.WaitCh():
756758
case <-time.After(15 * time.Second):
@@ -818,10 +820,7 @@ func TestTaskRunner_DevicePropogation(t *testing.T) {
818820
defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
819821

820822
// Wait for task to complete
821-
select {
822-
case <-tr.WaitCh():
823-
case <-time.After(3 * time.Second):
824-
}
823+
testWaitForTaskToDie(t, tr)
825824

826825
// Get the mock driver plugin
827826
driverPlugin, err := conf.DriverManager.Dispense(mockdriver.PluginID.Name)
@@ -1306,15 +1305,15 @@ func TestTaskRunner_CheckWatcher_Restart(t *testing.T) {
13061305
"Received",
13071306
"Task Setup",
13081307
"Started",
1309-
"Restart Signaled",
1308+
"Restart Running Signaled",
13101309
"Terminated",
13111310
"Restarting",
13121311
"Started",
1313-
"Restart Signaled",
1312+
"Restart Running Signaled",
13141313
"Terminated",
13151314
"Restarting",
13161315
"Started",
1317-
"Restart Signaled",
1316+
"Restart Running Signaled",
13181317
"Terminated",
13191318
"Not Restarting",
13201319
}
@@ -1328,11 +1327,7 @@ func TestTaskRunner_CheckWatcher_Restart(t *testing.T) {
13281327
// Wait until the task exits. Don't simply wait for it to run as it may
13291328
// get restarted and terminated before the test is able to observe it
13301329
// running.
1331-
select {
1332-
case <-tr.WaitCh():
1333-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
1334-
require.Fail(t, "timeout")
1335-
}
1330+
testWaitForTaskToDie(t, tr)
13361331

13371332
state := tr.TaskState()
13381333
actualEvents := make([]string, len(state.Events))
@@ -1421,11 +1416,7 @@ func TestTaskRunner_BlockForSIDSToken(t *testing.T) {
14211416

14221417
// task runner should exit now that it has been unblocked and it is a batch
14231418
// job with a zero sleep time
1424-
select {
1425-
case <-tr.WaitCh():
1426-
case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())):
1427-
r.Fail("timed out waiting for batch task to exist")
1428-
}
1419+
testWaitForTaskToDie(t, tr)
14291420

14301421
// assert task exited successfully
14311422
finalState := tr.TaskState()
@@ -1478,11 +1469,7 @@ func TestTaskRunner_DeriveSIToken_Retry(t *testing.T) {
14781469
go tr.Run()
14791470

14801471
// assert task runner blocks on SI token
1481-
select {
1482-
case <-tr.WaitCh():
1483-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
1484-
r.Fail("timed out waiting for task runner")
1485-
}
1472+
testWaitForTaskToDie(t, tr)
14861473

14871474
// assert task exited successfully
14881475
finalState := tr.TaskState()
@@ -1598,11 +1585,7 @@ func TestTaskRunner_BlockForVaultToken(t *testing.T) {
15981585

15991586
// TR should exit now that it's unblocked by vault as its a batch job
16001587
// with 0 sleeping.
1601-
select {
1602-
case <-tr.WaitCh():
1603-
case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())):
1604-
require.Fail(t, "timed out waiting for batch task to exit")
1605-
}
1588+
testWaitForTaskToDie(t, tr)
16061589

16071590
// Assert task exited successfully
16081591
finalState := tr.TaskState()
@@ -1615,6 +1598,14 @@ func TestTaskRunner_BlockForVaultToken(t *testing.T) {
16151598
require.NoError(t, err)
16161599
require.Equal(t, token, string(data))
16171600

1601+
// Kill task runner to trigger stop hooks
1602+
tr.Kill(context.Background(), structs.NewTaskEvent("kill"))
1603+
select {
1604+
case <-tr.WaitCh():
1605+
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
1606+
require.Fail(t, "timed out waiting for task runner to exit")
1607+
}
1608+
16181609
// Check the token was revoked
16191610
testutil.WaitForResult(func() (bool, error) {
16201611
if len(vaultClient.StoppedTokens()) != 1 {
@@ -1661,17 +1652,21 @@ func TestTaskRunner_DeriveToken_Retry(t *testing.T) {
16611652
defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
16621653
go tr.Run()
16631654

1664-
// Wait for TR to exit and check its state
1655+
// Wait for TR to die and check its state
1656+
testWaitForTaskToDie(t, tr)
1657+
1658+
state := tr.TaskState()
1659+
require.Equal(t, structs.TaskStateDead, state.State)
1660+
require.False(t, state.Failed)
1661+
1662+
// Kill task runner to trigger stop hooks
1663+
tr.Kill(context.Background(), structs.NewTaskEvent("kill"))
16651664
select {
16661665
case <-tr.WaitCh():
16671666
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
16681667
require.Fail(t, "timed out waiting for task runner to exit")
16691668
}
16701669

1671-
state := tr.TaskState()
1672-
require.Equal(t, structs.TaskStateDead, state.State)
1673-
require.False(t, state.Failed)
1674-
16751670
require.Equal(t, 1, count)
16761671

16771672
// Check that the token is on disk
@@ -1771,11 +1766,7 @@ func TestTaskRunner_Download_ChrootExec(t *testing.T) {
17711766
defer cleanup()
17721767

17731768
// Wait for task to run and exit
1774-
select {
1775-
case <-tr.WaitCh():
1776-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
1777-
require.Fail(t, "timed out waiting for task runner to exit")
1778-
}
1769+
testWaitForTaskToDie(t, tr)
17791770

17801771
state := tr.TaskState()
17811772
require.Equal(t, structs.TaskStateDead, state.State)
@@ -1816,11 +1807,7 @@ func TestTaskRunner_Download_RawExec(t *testing.T) {
18161807
defer cleanup()
18171808

18181809
// Wait for task to run and exit
1819-
select {
1820-
case <-tr.WaitCh():
1821-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
1822-
require.Fail(t, "timed out waiting for task runner to exit")
1823-
}
1810+
testWaitForTaskToDie(t, tr)
18241811

18251812
state := tr.TaskState()
18261813
require.Equal(t, structs.TaskStateDead, state.State)
@@ -1851,11 +1838,7 @@ func TestTaskRunner_Download_List(t *testing.T) {
18511838
defer cleanup()
18521839

18531840
// Wait for task to run and exit
1854-
select {
1855-
case <-tr.WaitCh():
1856-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
1857-
require.Fail(t, "timed out waiting for task runner to exit")
1858-
}
1841+
testWaitForTaskToDie(t, tr)
18591842

18601843
state := tr.TaskState()
18611844
require.Equal(t, structs.TaskStateDead, state.State)
@@ -1902,11 +1885,7 @@ func TestTaskRunner_Download_Retries(t *testing.T) {
19021885
tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
19031886
defer cleanup()
19041887

1905-
select {
1906-
case <-tr.WaitCh():
1907-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
1908-
require.Fail(t, "timed out waiting for task to exit")
1909-
}
1888+
testWaitForTaskToDie(t, tr)
19101889

19111890
state := tr.TaskState()
19121891
require.Equal(t, structs.TaskStateDead, state.State)
@@ -2100,6 +2079,8 @@ func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) {
21002079
case <-time.After(1 * time.Second):
21012080
}
21022081

2082+
require.Equal(t, structs.TaskStatePending, tr.TaskState().State)
2083+
21032084
// Send a signal and restart
21042085
err = tr.Signal(structs.NewTaskEvent("don't panic"), "QUIT")
21052086
require.EqualError(t, err, ErrTaskNotRunning.Error())
@@ -2110,12 +2091,7 @@ func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) {
21102091

21112092
// Unblock and let it finish
21122093
waitCh <- struct{}{}
2113-
2114-
select {
2115-
case <-tr.WaitCh():
2116-
case <-time.After(10 * time.Second):
2117-
require.Fail(t, "timed out waiting for task to complete")
2118-
}
2094+
testWaitForTaskToDie(t, tr)
21192095

21202096
// Assert the task ran and never restarted
21212097
state := tr.TaskState()
@@ -2153,11 +2129,7 @@ func TestTaskRunner_Run_RecoverableStartError(t *testing.T) {
21532129
tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
21542130
defer cleanup()
21552131

2156-
select {
2157-
case <-tr.WaitCh():
2158-
case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
2159-
require.Fail(t, "timed out waiting for task to exit")
2160-
}
2132+
testWaitForTaskToDie(t, tr)
21612133

21622134
state := tr.TaskState()
21632135
require.Equal(t, structs.TaskStateDead, state.State)
@@ -2202,11 +2174,7 @@ func TestTaskRunner_Template_Artifact(t *testing.T) {
22022174
go tr.Run()
22032175

22042176
// Wait for task to run and exit
2205-
select {
2206-
case <-tr.WaitCh():
2207-
case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())):
2208-
require.Fail(t, "timed out waiting for task runner to exit")
2209-
}
2177+
testWaitForTaskToDie(t, tr)
22102178

22112179
state := tr.TaskState()
22122180
require.Equal(t, structs.TaskStateDead, state.State)
@@ -2536,7 +2504,9 @@ func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) {
25362504
tr, err := NewTaskRunner(conf)
25372505
require.NoError(t, err)
25382506
defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
2539-
tr.Run()
2507+
go tr.Run()
2508+
2509+
testWaitForTaskToDie(t, tr)
25402510

25412511
state := tr.TaskState()
25422512
require.Equal(t, structs.TaskStateDead, state.State)
@@ -2562,7 +2532,17 @@ func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) {
25622532
func testWaitForTaskToStart(t *testing.T, tr *TaskRunner) {
25632533
testutil.WaitForResult(func() (bool, error) {
25642534
ts := tr.TaskState()
2565-
return ts.State == structs.TaskStateRunning, fmt.Errorf("%v", ts.State)
2535+
return ts.State == structs.TaskStateRunning, fmt.Errorf("expected task to be runnig, got %v", ts.State)
2536+
}, func(err error) {
2537+
require.NoError(t, err)
2538+
})
2539+
}
2540+
2541+
// testWaitForTaskToDie waits for the task to die or fails the test
2542+
func testWaitForTaskToDie(t *testing.T, tr *TaskRunner) {
2543+
testutil.WaitForResult(func() (bool, error) {
2544+
ts := tr.TaskState()
2545+
return ts.State == structs.TaskStateDead, fmt.Errorf("expected task to be dead, got %v", ts.State)
25662546
}, func(err error) {
25672547
require.NoError(t, err)
25682548
})

0 commit comments

Comments
 (0)