From 2ecc6d33d8d38c2e111759cdc0a7b45fda15faec Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 30 Sep 2024 16:21:35 -0400 Subject: [PATCH] docker: reset timer after collecting stats In ##23966 when we switched to using the official Docker SDK client, we had to rework the stats collection loop for the new client. But we missed resetting the timer on the collection loop, which meant that we'd only collect stats once and then never again. * Ref: [NET-11202 (comment)](https://hashicorp.atlassian.net/browse/NET-11202?focusedCommentId=550814) * This has shipped in Nomad 1.9.0-beta.1 but not production yet. --- drivers/docker/stats.go | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/docker/stats.go b/drivers/docker/stats.go index d19e00b9018..cd64e966ee9 100644 --- a/drivers/docker/stats.go +++ b/drivers/docker/stats.go @@ -97,6 +97,25 @@ func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, inte timer, cancel := helper.NewSafeTimer(interval) defer cancel() + collectOnce := func() { + defer timer.Reset(interval) + statsReader, err := h.dockerClient.ContainerStatsOneShot(ctx, h.containerID) + if err != nil && err != io.EOF { + h.logger.Debug("error collecting stats from container", "error", err) + return + } + defer statsReader.Body.Close() + + var stats containerapi.Stats + if err := json.NewDecoder(statsReader.Body).Decode(&stats); err != nil { + h.logger.Error("error decoding stats data for container", "error", err) + return + } + + resourceUsage := util.DockerStatsToTaskResourceUsage(&stats, compute) + destCh.send(resourceUsage) + } + for { select { case <-ctx.Done(): @@ -104,27 +123,7 @@ func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, inte case <-h.doneCh: return case <-timer.C: - // ContainerStats returns a StatsResponseReader. Body of that reader - // contains the stats and implements io.Reader - statsReader, err := h.dockerClient.ContainerStatsOneShot(ctx, h.containerID) - if err != nil && err != io.EOF { - // An error occurred during stats collection, retry with backoff - h.logger.Debug("error collecting stats from container", "error", err) - continue - } - - var stats containerapi.Stats - - if err := json.NewDecoder(statsReader.Body).Decode(&stats); err != nil { - h.logger.Error("error unmarshalling stats data for container", "error", err) - _ = statsReader.Body.Close() - continue - } - - resourceUsage := util.DockerStatsToTaskResourceUsage(&stats, compute) - destCh.send(resourceUsage) - - _ = statsReader.Body.Close() + collectOnce() } } }