Skip to content

Commit

Permalink
Merge pull request #1264 from helsaawy/he/backport-waitinitexit
Browse files Browse the repository at this point in the history
[release/0.9] Wait for waitInitExit() to return #1249
  • Loading branch information
helsaawy authored Jan 6, 2022
2 parents e6fba04 + c82af14 commit 51b804f
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 10 deletions.
5 changes: 3 additions & 2 deletions cmd/containerd-shim-runhcs-v1/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,9 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all
return wt.KillExec(ctx, eid, signal, all)
})

// iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
eg.Go(func() error {
Expand Down
55 changes: 47 additions & 8 deletions cmd/containerd-shim-runhcs-v1/task_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,9 @@ func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all
}).Warn("failed to kill exec in task")
}

// iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
if signal == 0x9 && eid == "" && ht.host != nil {
Expand Down Expand Up @@ -577,8 +578,9 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
ex.ForceExit(ctx, 1)
}

// iterate next
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
switch state := e.State(); state {
Expand All @@ -587,6 +589,41 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
case shimExecStateRunning:
return 0, 0, time.Time{}, newExecInvalidStateError(ht.id, eid, state, "delete")
}

if eid == "" {
// We are killing the init task, so we expect the container to be
// stopped after this.
//
// The task process may have already exited, and the status set to
// shimExecStateExited, but resources may still be in the process
// of being cleaned up. Wait for ht.closed to be closed. This signals
// that waitInitExit() has finished destroying container resources,
// and layers were umounted.
// If the shim exits before resources are cleaned up, those resources
// will remain locked and untracked, which leads to lingering sandboxes
// and container resources like base vhdx.
select {
case <-time.After(30 * time.Second):
log.G(ctx).Error("timed out waiting for resource cleanup")
return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup")
case <-ht.closed:
}

// The init task has now exited. A ForceExit() has already been sent to
// execs. Cleanup execs and continue.
ht.execs.Range(func(key, value interface{}) bool {
if key == "" {
// Iterate next.
return true
}
ht.execs.Delete(key)

// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}

status := e.Status()
if eid != "" {
ht.execs.Delete(eid)
Expand Down Expand Up @@ -616,8 +653,9 @@ func (ht *hcsTask) Pids(ctx context.Context) ([]runhcsopts.ProcessDetails, error
ex := value.(shimExec)
pidMap[ex.Pid()] = ex.ID()

// Iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
pidMap[ht.init.Pid()] = ht.init.ID()

Expand Down Expand Up @@ -698,8 +736,9 @@ func (ht *hcsTask) waitForHostExit() {
ex := value.(shimExec)
ex.ForceExit(ctx, 1)

// iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
ht.init.ForceExit(ctx, 1)
ht.closeHost(ctx)
Expand Down
12 changes: 12 additions & 0 deletions cmd/containerd-shim-runhcs-v1/task_hcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ func Test_hcsTask_DeleteExec_InitExecID_CreatedState_Success(t *testing.T) {
// remove the 2nd exec so we just check without it.
lt.execs.Delete(second.id)

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -178,6 +180,8 @@ func Test_hcsTask_DeleteExec_InitExecID_RunningState_Error(t *testing.T) {
// Start the init exec
_ = init.Start(context.TODO())

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -192,6 +196,8 @@ func Test_hcsTask_DeleteExec_InitExecID_ExitedState_Success(t *testing.T) {

_ = init.Kill(context.TODO(), 0xf)

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -207,6 +213,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_CreatedState_Error(t *testing.T)
// start the init exec (required to have 2nd exec)
_ = init.Start(context.TODO())

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -226,6 +234,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_RunningState_Error(t *testing.T)
// put the 2nd exec into the running state
_ = second.Start(context.TODO())

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -244,6 +254,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_ExitedState_Success(t *testing.T
// put the 2nd exec into the exited state
_ = second.Kill(context.TODO(), 0xf)

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand Down

0 comments on commit 51b804f

Please sign in to comment.