From c194a9abbfd138fb97313c51b5032a9e71b5459b Mon Sep 17 00:00:00 2001 From: Jigisha Patil <89548848+jigisha620@users.noreply.github.com> Date: Thu, 14 Nov 2024 11:58:52 -0800 Subject: [PATCH] chore: Add unfinished_work_seconds metric (#1809) --- pkg/controllers/provisioning/scheduling/metrics.go | 13 +++++++++++++ .../provisioning/scheduling/scheduler.go | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pkg/controllers/provisioning/scheduling/metrics.go b/pkg/controllers/provisioning/scheduling/metrics.go index 43d341bb0c..45444176ea 100644 --- a/pkg/controllers/provisioning/scheduling/metrics.go +++ b/pkg/controllers/provisioning/scheduling/metrics.go @@ -57,6 +57,19 @@ var ( schedulingIDLabel, }, ) + UnfinishedWorkSeconds = opmetrics.NewPrometheusGauge( + crmetrics.Registry, + prometheus.GaugeOpts{ + Namespace: metrics.Namespace, + Subsystem: schedulerSubsystem, + Name: "unfinished_work_seconds", + Help: "How many seconds of work has been done that is in progress and hasn't been observed by scheduling_duration_seconds.", + }, + []string{ + ControllerLabel, + schedulingIDLabel, + }, + ) IgnoredPodCount = opmetrics.NewPrometheusGauge( crmetrics.Registry, prometheus.GaugeOpts{ diff --git a/pkg/controllers/provisioning/scheduling/scheduler.go b/pkg/controllers/provisioning/scheduling/scheduler.go index fb4b7f55e3..1d7ef740a9 100644 --- a/pkg/controllers/provisioning/scheduling/scheduler.go +++ b/pkg/controllers/provisioning/scheduling/scheduler.go @@ -213,6 +213,7 @@ func (s *Scheduler) Solve(ctx context.Context, pods []*corev1.Pod) Results { lastLogTime := s.clock.Now() batchSize := len(q.pods) for { + UnfinishedWorkSeconds.Set(float64(s.clock.Since(startTime)), map[string]string{ControllerLabel: injection.GetControllerName(ctx), schedulingIDLabel: string(s.id)}) QueueDepth.Set(float64(len(q.pods)), map[string]string{ControllerLabel: injection.GetControllerName(ctx), schedulingIDLabel: string(s.id)}) if s.clock.Since(lastLogTime) > time.Minute { @@ -240,7 +241,7 @@ func (s *Scheduler) Solve(ctx context.Context, pods []*corev1.Pod) Results { } } } - + UnfinishedWorkSeconds.Set(0, map[string]string{ControllerLabel: injection.GetControllerName(ctx), schedulingIDLabel: string(s.id)}) for _, m := range s.newNodeClaims { m.FinalizeScheduling() }