Skip to content

Commit d4642a5

Browse files
fix merge conflict (#13321)
Co-authored-by: Derek Strickland <[email protected]>
1 parent f52d570 commit d4642a5

File tree

5 files changed

+175
-16
lines changed

5 files changed

+175
-16
lines changed

.changelog/13055.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
lifecycle: fixed a bug where sidecar tasks were not being stopped last
3+
```

client/allocrunner/alloc_runner.go

+35-11
Original file line numberDiff line numberDiff line change
@@ -511,21 +511,21 @@ func (ar *allocRunner) handleTaskStateUpdates() {
511511
states := make(map[string]*structs.TaskState, trNum)
512512

513513
for name, tr := range ar.tasks {
514-
state := tr.TaskState()
515-
states[name] = state
514+
taskState := tr.TaskState()
515+
states[name] = taskState
516516

517517
if tr.IsPoststopTask() {
518518
continue
519519
}
520520

521521
// Capture live task runners in case we need to kill them
522-
if state.State != structs.TaskStateDead {
522+
if taskState.State != structs.TaskStateDead {
523523
liveRunners = append(liveRunners, tr)
524524
continue
525525
}
526526

527527
// Task is dead, determine if other tasks should be killed
528-
if state.Failed {
528+
if taskState.Failed {
529529
// Only set failed event if no event has been
530530
// set yet to give dead leaders priority.
531531
if killEvent == nil {
@@ -612,16 +612,16 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
612612
ar.logger.Warn("error stopping leader task", "error", err, "task_name", name)
613613
}
614614

615-
state := tr.TaskState()
616-
states[name] = state
615+
taskState := tr.TaskState()
616+
states[name] = taskState
617617
break
618618
}
619619

620-
// Kill the rest concurrently
620+
// Kill the rest non-sidecar or poststop tasks concurrently
621621
wg := sync.WaitGroup{}
622622
for name, tr := range ar.tasks {
623-
// Filter out poststop tasks so they run after all the other tasks are killed
624-
if tr.IsLeader() || tr.IsPoststopTask() {
623+
// Filter out poststop and sidecar tasks so that they stop after all the other tasks are killed
624+
if tr.IsLeader() || tr.IsPoststopTask() || tr.IsSidecarTask() {
625625
continue
626626
}
627627

@@ -635,9 +635,33 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
635635
ar.logger.Warn("error stopping task", "error", err, "task_name", name)
636636
}
637637

638-
state := tr.TaskState()
638+
taskState := tr.TaskState()
639+
mu.Lock()
640+
states[name] = taskState
641+
mu.Unlock()
642+
}(name, tr)
643+
}
644+
wg.Wait()
645+
646+
// Kill the sidecar tasks last.
647+
for name, tr := range ar.tasks {
648+
if !tr.IsSidecarTask() || tr.IsLeader() || tr.IsPoststopTask() {
649+
continue
650+
}
651+
652+
wg.Add(1)
653+
go func(name string, tr *taskrunner.TaskRunner) {
654+
defer wg.Done()
655+
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
656+
taskEvent.SetKillTimeout(tr.Task().KillTimeout)
657+
err := tr.Kill(context.TODO(), taskEvent)
658+
if err != nil && err != taskrunner.ErrTaskNotRunning {
659+
ar.logger.Warn("error stopping sidecar task", "error", err, "task_name", name)
660+
}
661+
662+
taskState := tr.TaskState()
639663
mu.Lock()
640-
states[name] = state
664+
states[name] = taskState
641665
mu.Unlock()
642666
}(name, tr)
643667
}

client/allocrunner/alloc_runner_test.go

+129
Original file line numberDiff line numberDiff line change
@@ -1573,3 +1573,132 @@ func TestAllocRunner_PersistState_Destroyed(t *testing.T) {
15731573
require.NoError(t, err)
15741574
require.Nil(t, ts)
15751575
}
1576+
1577+
// TestAllocRunner_Lifecycle_Shutdown_Order asserts that a service job with 3
1578+
// lifecycle hooks (1 sidecar, 1 ephemeral, 1 poststop) starts all 4 tasks, and shuts down
1579+
// the sidecar after main, but before poststop.
1580+
func TestAllocRunner_Lifecycle_Shutdown_Order(t *testing.T) {
1581+
alloc := mock.LifecycleAllocWithPoststopDeploy()
1582+
1583+
alloc.Job.Type = structs.JobTypeService
1584+
1585+
mainTask := alloc.Job.TaskGroups[0].Tasks[0]
1586+
mainTask.Config["run_for"] = "100s"
1587+
1588+
sidecarTask := alloc.Job.TaskGroups[0].Tasks[1]
1589+
sidecarTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart
1590+
sidecarTask.Config["run_for"] = "100s"
1591+
1592+
poststopTask := alloc.Job.TaskGroups[0].Tasks[2]
1593+
ephemeralTask := alloc.Job.TaskGroups[0].Tasks[3]
1594+
1595+
alloc.Job.TaskGroups[0].Tasks = []*structs.Task{mainTask, ephemeralTask, sidecarTask, poststopTask}
1596+
1597+
conf, cleanup := testAllocRunnerConfig(t, alloc)
1598+
defer cleanup()
1599+
ar, err := NewAllocRunner(conf)
1600+
require.NoError(t, err)
1601+
defer destroy(ar)
1602+
go ar.Run()
1603+
1604+
upd := conf.StateUpdater.(*MockStateUpdater)
1605+
1606+
// Wait for main and sidecar tasks to be running, and that the
1607+
// ephemeral task ran and exited.
1608+
testutil.WaitForResult(func() (bool, error) {
1609+
last := upd.Last()
1610+
if last == nil {
1611+
return false, fmt.Errorf("No updates")
1612+
}
1613+
1614+
if last.ClientStatus != structs.AllocClientStatusRunning {
1615+
return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus)
1616+
}
1617+
1618+
if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning {
1619+
return false, fmt.Errorf("expected main task to be running not %s", s)
1620+
}
1621+
1622+
if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateRunning {
1623+
return false, fmt.Errorf("expected sidecar task to be running not %s", s)
1624+
}
1625+
1626+
if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead {
1627+
return false, fmt.Errorf("expected ephemeral task to be dead not %s", s)
1628+
}
1629+
1630+
if last.TaskStates[ephemeralTask.Name].Failed {
1631+
return false, fmt.Errorf("expected ephemeral task to be successful not failed")
1632+
}
1633+
1634+
return true, nil
1635+
}, func(err error) {
1636+
t.Fatalf("error waiting for initial state:\n%v", err)
1637+
})
1638+
1639+
// Tell the alloc to stop
1640+
stopAlloc := alloc.Copy()
1641+
stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop
1642+
ar.Update(stopAlloc)
1643+
1644+
// Wait for tasks to stop.
1645+
testutil.WaitForResult(func() (bool, error) {
1646+
last := upd.Last()
1647+
1648+
if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead {
1649+
return false, fmt.Errorf("expected ephemeral task to be dead not %s", s)
1650+
}
1651+
1652+
if last.TaskStates[ephemeralTask.Name].Failed {
1653+
return false, fmt.Errorf("expected ephemeral task to be successful not failed")
1654+
}
1655+
1656+
if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead {
1657+
return false, fmt.Errorf("expected main task to be dead not %s", s)
1658+
}
1659+
1660+
if last.TaskStates[mainTask.Name].Failed {
1661+
return false, fmt.Errorf("expected main task to be successful not failed")
1662+
}
1663+
1664+
if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateDead {
1665+
return false, fmt.Errorf("expected sidecar task to be dead not %s", s)
1666+
}
1667+
1668+
if last.TaskStates[sidecarTask.Name].Failed {
1669+
return false, fmt.Errorf("expected sidecar task to be successful not failed")
1670+
}
1671+
1672+
if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateRunning {
1673+
return false, fmt.Errorf("expected poststop task to be running not %s", s)
1674+
}
1675+
1676+
return true, nil
1677+
}, func(err error) {
1678+
t.Fatalf("error waiting for kill state:\n%v", err)
1679+
})
1680+
1681+
last := upd.Last()
1682+
require.True(t, last.TaskStates[ephemeralTask.Name].FinishedAt.Before(last.TaskStates[mainTask.Name].FinishedAt))
1683+
require.True(t, last.TaskStates[mainTask.Name].FinishedAt.Before(last.TaskStates[sidecarTask.Name].FinishedAt))
1684+
1685+
// Wait for poststop task to stop.
1686+
testutil.WaitForResult(func() (bool, error) {
1687+
last := upd.Last()
1688+
1689+
if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateDead {
1690+
return false, fmt.Errorf("expected poststop task to be dead not %s", s)
1691+
}
1692+
1693+
if last.TaskStates[poststopTask.Name].Failed {
1694+
return false, fmt.Errorf("expected poststop task to be successful not failed")
1695+
}
1696+
1697+
return true, nil
1698+
}, func(err error) {
1699+
t.Fatalf("error waiting for poststop state:\n%v", err)
1700+
})
1701+
1702+
last = upd.Last()
1703+
require.True(t, last.TaskStates[sidecarTask.Name].FinishedAt.Before(last.TaskStates[poststopTask.Name].FinishedAt))
1704+
}

client/allocrunner/task_hook_coordinator.go

+3-5
Original file line numberDiff line numberDiff line change
@@ -179,20 +179,18 @@ func (c *taskHookCoordinator) StartPoststopTasks() {
179179
// hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks
180180
func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool {
181181
for _, tr := range tasks {
182-
lc := tr.Task().Lifecycle
183-
if lc == nil || !lc.Sidecar {
182+
if !tr.IsSidecarTask() {
184183
return true
185184
}
186185
}
187186

188187
return false
189188
}
190189

191-
// hasSidecarTasks returns true if all the passed tasks are sidecar tasks
190+
// hasSidecarTasks returns true if any of the passed tasks are sidecar tasks
192191
func hasSidecarTasks(tasks map[string]*taskrunner.TaskRunner) bool {
193192
for _, tr := range tasks {
194-
lc := tr.Task().Lifecycle
195-
if lc != nil && lc.Sidecar {
193+
if tr.IsSidecarTask() {
196194
return true
197195
}
198196
}

client/allocrunner/taskrunner/task_runner_getters.go

+5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ func (tr *TaskRunner) IsPoststopTask() bool {
3333
return tr.Task().Lifecycle != nil && tr.Task().Lifecycle.Hook == structs.TaskLifecycleHookPoststop
3434
}
3535

36+
// IsSidecarTask returns true if this task is a sidecar task in its task group.
37+
func (tr *TaskRunner) IsSidecarTask() bool {
38+
return tr.Task().Lifecycle != nil && tr.Task().Lifecycle.Sidecar
39+
}
40+
3641
func (tr *TaskRunner) Task() *structs.Task {
3742
tr.taskLock.RLock()
3843
defer tr.taskLock.RUnlock()

0 commit comments

Comments
 (0)