Skip to content

Commit 34d3548

Browse files
committed
lifecycle: unit test for lifecycle task behavior on restarts
Test the restart behavior of tasks with lifecycles when the allocation or tasks are restarted.
1 parent 6dcada4 commit 34d3548

File tree

3 files changed

+381
-175
lines changed

3 files changed

+381
-175
lines changed

client/allochealth/tracker_test.go

+24-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,30 @@ func TestTracker_Checks_Healthy(t *testing.T) {
9292
func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) {
9393
t.Parallel()
9494

95-
alloc := mock.LifecycleAllocWithPoststopDeploy()
95+
alloc := mock.LifecycleAllocFromTasks([]mock.LifecycleTaskDef{
96+
{
97+
Name: "web",
98+
RunFor: "1s",
99+
},
100+
{
101+
Name: "side",
102+
RunFor: "1s",
103+
Hook: structs.TaskLifecycleHookPrestart,
104+
IsSidecar: true,
105+
},
106+
{
107+
Name: "post",
108+
RunFor: "1s",
109+
Hook: structs.TaskLifecycleHookPoststop,
110+
},
111+
{
112+
Name: "init",
113+
RunFor: "1s",
114+
Hook: structs.TaskLifecycleHookPrestart,
115+
IsSidecar: false,
116+
},
117+
})
118+
96119
alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
97120

98121
// Synthesize running alloc and tasks

client/allocrunner/alloc_runner_test.go

+320-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"time"
1010

1111
"github.com/hashicorp/consul/api"
12+
multierror "github.com/hashicorp/go-multierror"
1213
"github.com/hashicorp/nomad/client/allochealth"
1314
"github.com/hashicorp/nomad/client/allocwatcher"
1415
cconsul "github.com/hashicorp/nomad/client/consul"
@@ -236,9 +237,9 @@ func TestAllocRunner_Lifecycle_Poststart(t *testing.T) {
236237
})
237238
}
238239

239-
// TestAllocRunner_TaskMain_KillTG asserts that when main tasks die the
240+
// TestAllocRunner_Lifecycle_TaskMain_KillTG asserts that when main tasks die the
240241
// entire task group is killed.
241-
func TestAllocRunner_TaskMain_KillTG(t *testing.T) {
242+
func TestAllocRunner_Lifecycle_TaskMain_KillTG(t *testing.T) {
242243
t.Parallel()
243244

244245
alloc := mock.BatchAlloc()
@@ -1568,3 +1569,320 @@ func TestAllocRunner_PersistState_Destroyed(t *testing.T) {
15681569
require.NoError(t, err)
15691570
require.Nil(t, ts)
15701571
}
1572+
1573+
func TestAllocRunner_Lifecycle_Restart(t *testing.T) {
1574+
1575+
// test cases can use this default or override w/ taskDefs param
1576+
alloc := mock.LifecycleAllocFromTasks([]mock.LifecycleTaskDef{
1577+
{"main", "100s", 0, "", false},
1578+
{"prestart-oneshot", "1s", 0, "prestart", false},
1579+
{"prestart-sidecar", "100s", 0, "prestart", true},
1580+
{"poststart-oneshot", "1s", 0, "poststart", false},
1581+
{"poststart-sidecar", "100s", 0, "poststart", true},
1582+
{"poststop", "1s", 0, "poststop", false},
1583+
})
1584+
alloc.Job.Type = structs.JobTypeService
1585+
rp := &structs.RestartPolicy{
1586+
Attempts: 1,
1587+
Interval: 10 * time.Minute,
1588+
Delay: 1 * time.Nanosecond,
1589+
Mode: structs.RestartPolicyModeFail,
1590+
}
1591+
1592+
testCases := []struct {
1593+
name string
1594+
taskDefs []mock.LifecycleTaskDef
1595+
action func(*allocRunner, *structs.Allocation) error
1596+
expectedErr string
1597+
expectedAfter map[string]structs.TaskState
1598+
}{
1599+
{
1600+
name: "restart entire allocation",
1601+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1602+
return ar.RestartAll(&structs.TaskEvent{})
1603+
},
1604+
// TODO: currently failed because we get an error here!
1605+
// TODO: some of these we're unsure of?
1606+
expectedAfter: map[string]structs.TaskState{
1607+
"main": structs.TaskState{State: "running", Restarts: 1},
1608+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1609+
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 1},
1610+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1611+
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 1},
1612+
"poststop": structs.TaskState{State: "pending", Restarts: 0},
1613+
},
1614+
},
1615+
1616+
{
1617+
name: "stop from server",
1618+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1619+
stopAlloc := alloc.Copy()
1620+
stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop
1621+
ar.Update(stopAlloc)
1622+
return nil
1623+
},
1624+
expectedAfter: map[string]structs.TaskState{
1625+
"main": structs.TaskState{State: "dead", Restarts: 0},
1626+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1627+
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
1628+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1629+
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
1630+
"poststop": structs.TaskState{State: "dead", Restarts: 0},
1631+
},
1632+
},
1633+
1634+
{
1635+
name: "restart main task",
1636+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1637+
return ar.RestartTask("main", &structs.TaskEvent{})
1638+
},
1639+
expectedAfter: map[string]structs.TaskState{
1640+
"main": structs.TaskState{State: "running", Restarts: 1},
1641+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1642+
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 0},
1643+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1644+
// TODO: poststart-sidecar is showing 0!
1645+
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 1},
1646+
"poststop": structs.TaskState{State: "pending", Restarts: 0},
1647+
},
1648+
},
1649+
1650+
{
1651+
// TODO: this isn't restarting?
1652+
name: "main task fails and restarts once",
1653+
taskDefs: []mock.LifecycleTaskDef{
1654+
{"main", "5s", 1, "", false},
1655+
{"prestart-oneshot", "1s", 0, "prestart", false},
1656+
{"prestart-sidecar", "100s", 0, "prestart", true},
1657+
{"poststart-oneshot", "1s", 0, "poststart", false},
1658+
{"poststart-sidecar", "100s", 0, "poststart", true},
1659+
{"poststop", "1s", 0, "poststop", false},
1660+
},
1661+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1662+
time.Sleep(5 * time.Second) // make sure main task has exited
1663+
return nil
1664+
},
1665+
expectedAfter: map[string]structs.TaskState{
1666+
"main": structs.TaskState{State: "dead", Restarts: 1},
1667+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1668+
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
1669+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1670+
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 1},
1671+
"poststop": structs.TaskState{State: "pending", Restarts: 0},
1672+
},
1673+
},
1674+
1675+
{
1676+
// TODO: this isn't restarting?
1677+
name: "main stopped unexpectedly and restarts once",
1678+
taskDefs: []mock.LifecycleTaskDef{
1679+
{"main", "5s", 0, "", false},
1680+
{"prestart-oneshot", "1s", 0, "prestart", false},
1681+
{"prestart-sidecar", "100s", 0, "prestart", true},
1682+
{"poststart-oneshot", "1s", 0, "poststart", false},
1683+
{"poststart-sidecar", "100s", 0, "poststart", true},
1684+
{"poststop", "1s", 0, "poststop", false},
1685+
},
1686+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1687+
time.Sleep(5 * time.Second) // make sure main task has exited
1688+
return nil
1689+
},
1690+
expectedAfter: map[string]structs.TaskState{
1691+
"main": structs.TaskState{State: "dead", Restarts: 1},
1692+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1693+
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
1694+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1695+
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 1},
1696+
"poststop": structs.TaskState{State: "pending", Restarts: 0},
1697+
},
1698+
},
1699+
1700+
{
1701+
name: "failed main task cannot be restarted",
1702+
taskDefs: []mock.LifecycleTaskDef{
1703+
{"main", "5s", 1, "", false},
1704+
{"prestart-oneshot", "1s", 0, "prestart", false},
1705+
{"prestart-sidecar", "100s", 0, "prestart", true},
1706+
{"poststart-oneshot", "1s", 0, "poststart", false},
1707+
{"poststart-sidecar", "100s", 0, "poststart", true},
1708+
{"poststop", "1s", 0, "poststop", false},
1709+
},
1710+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1711+
time.Sleep(5 * time.Second) // make sure main task has exited
1712+
return ar.RestartTask("main", &structs.TaskEvent{})
1713+
},
1714+
expectedErr: "Task not running",
1715+
expectedAfter: map[string]structs.TaskState{
1716+
"main": structs.TaskState{State: "dead", Restarts: 0},
1717+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1718+
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
1719+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1720+
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
1721+
"poststop": structs.TaskState{State: "dead", Restarts: 0},
1722+
},
1723+
},
1724+
1725+
{
1726+
name: "restart prestart-sidecar task",
1727+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1728+
return ar.RestartTask("prestart-sidecar", &structs.TaskEvent{})
1729+
},
1730+
expectedAfter: map[string]structs.TaskState{
1731+
"main": structs.TaskState{State: "running", Restarts: 0},
1732+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1733+
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 1},
1734+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1735+
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 0},
1736+
"poststop": structs.TaskState{State: "pending", Restarts: 0},
1737+
},
1738+
},
1739+
1740+
{
1741+
name: "restart poststart-sidecar task",
1742+
action: func(ar *allocRunner, alloc *structs.Allocation) error {
1743+
return ar.RestartTask("poststart-sidecar", &structs.TaskEvent{})
1744+
},
1745+
expectedAfter: map[string]structs.TaskState{
1746+
"main": structs.TaskState{State: "running", Restarts: 0},
1747+
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1748+
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 0},
1749+
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
1750+
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 1},
1751+
"poststop": structs.TaskState{State: "pending", Restarts: 0},
1752+
},
1753+
},
1754+
}
1755+
1756+
for _, tc := range testCases {
1757+
tc := tc
1758+
t.Run(tc.name, func(t *testing.T) {
1759+
t.Parallel()
1760+
require := require.New(t)
1761+
1762+
alloc := alloc.Copy()
1763+
alloc.Job.TaskGroups[0].RestartPolicy = rp
1764+
if tc.taskDefs != nil {
1765+
alloc = mock.LifecycleAllocFromTasks(tc.taskDefs)
1766+
alloc.Job.Type = structs.JobTypeService
1767+
}
1768+
for _, task := range alloc.Job.TaskGroups[0].Tasks {
1769+
task.RestartPolicy = rp // tasks inherit the group policy
1770+
}
1771+
1772+
conf, cleanup := testAllocRunnerConfig(t, alloc)
1773+
defer cleanup()
1774+
ar, err := NewAllocRunner(conf)
1775+
require.NoError(err)
1776+
defer destroy(ar)
1777+
go ar.Run()
1778+
1779+
upd := conf.StateUpdater.(*MockStateUpdater)
1780+
1781+
// assert our "before" states:
1782+
// - all one-shot tasks should be dead but not failed
1783+
// - all main tasks and sidecars should be running
1784+
// - no tasks should have restarted
1785+
testutil.WaitForResult(func() (bool, error) {
1786+
last := upd.Last()
1787+
if last == nil {
1788+
return false, fmt.Errorf("no update")
1789+
}
1790+
if last.ClientStatus != structs.AllocClientStatusRunning {
1791+
return false, fmt.Errorf(
1792+
"expected alloc to be running not %s", last.ClientStatus)
1793+
}
1794+
var errs *multierror.Error
1795+
1796+
expectedBefore := map[string]string{
1797+
"main": "running",
1798+
"prestart-oneshot": "dead",
1799+
"prestart-sidecar": "running",
1800+
"poststart-oneshot": "dead",
1801+
"poststart-sidecar": "running",
1802+
"poststop": "pending",
1803+
}
1804+
1805+
for task, expected := range expectedBefore {
1806+
got, ok := last.TaskStates[task]
1807+
if !ok {
1808+
continue
1809+
}
1810+
if got.State != expected {
1811+
errs = multierror.Append(errs, fmt.Errorf(
1812+
"expected initial state of task %q to be %q not %q",
1813+
task, expected, got.State))
1814+
}
1815+
if got.Restarts != 0 {
1816+
errs = multierror.Append(errs, fmt.Errorf(
1817+
"expected no initial restarts of task %q, not %q",
1818+
task, got.Restarts))
1819+
}
1820+
if expected == "dead" && got.Failed {
1821+
errs = multierror.Append(errs, fmt.Errorf(
1822+
"expected ephemeral task %q to be dead but not failed",
1823+
task))
1824+
}
1825+
1826+
}
1827+
if errs.ErrorOrNil() != nil {
1828+
return false, errs.ErrorOrNil()
1829+
}
1830+
return true, nil
1831+
}, func(err error) {
1832+
require.NoError(err, "error waiting for initial state")
1833+
})
1834+
1835+
// perform the action
1836+
err = tc.action(ar, alloc.Copy())
1837+
if tc.expectedErr != "" {
1838+
require.EqualError(err, tc.expectedErr)
1839+
} else {
1840+
require.NoError(err)
1841+
}
1842+
1843+
// assert our "after" states
1844+
testutil.WaitForResult(func() (bool, error) {
1845+
last := upd.Last()
1846+
if last == nil {
1847+
return false, fmt.Errorf("no update")
1848+
}
1849+
var errs *multierror.Error
1850+
for task, expected := range tc.expectedAfter {
1851+
got, ok := last.TaskStates[task]
1852+
if !ok {
1853+
errs = multierror.Append(errs, fmt.Errorf(
1854+
"no final state found for task %q", task,
1855+
))
1856+
}
1857+
if got.State != expected.State {
1858+
errs = multierror.Append(errs, fmt.Errorf(
1859+
"expected final state of task %q to be %q not %q",
1860+
task, expected.State, got.State))
1861+
}
1862+
if expected.State == "dead" {
1863+
if got.FinishedAt.IsZero() || got.StartedAt.IsZero() {
1864+
errs = multierror.Append(errs, fmt.Errorf(
1865+
"expected final state of task %q to have start and finish time", task))
1866+
}
1867+
if len(got.Events) < 2 {
1868+
errs = multierror.Append(errs, fmt.Errorf(
1869+
"expected final state of task %q to include at least 2 tasks", task))
1870+
}
1871+
}
1872+
1873+
if got.Restarts != expected.Restarts {
1874+
errs = multierror.Append(errs, fmt.Errorf(
1875+
"expected final restarts of task %q to be %v not %v",
1876+
task, expected.Restarts, got.Restarts))
1877+
}
1878+
}
1879+
if errs.ErrorOrNil() != nil {
1880+
return false, errs.ErrorOrNil()
1881+
}
1882+
return true, nil
1883+
}, func(err error) {
1884+
require.NoError(err, "error waiting for final state")
1885+
})
1886+
})
1887+
}
1888+
}

0 commit comments

Comments
 (0)