From a1769265eedf23f43e077fe84f93ba54a871feb3 Mon Sep 17 00:00:00 2001 From: Sunny Date: Thu, 10 Aug 2023 19:39:20 +0000 Subject: [PATCH] runtime/metrics: Delete metrics on object delete Delete the object metrics when the object is deleted. This ensures that stale metrics about a deleted object is no longer exported. As a result, the `ConditionDelete` is no longer needed. Another reason to not have `ConditionDelete` is that a condition can only be one of True, False or Unknown. This introduces new delete methods in the low level metrics Recorder. In the high level controller metrics, a list of owned finalizers is introduced which is used to determine if an object is being deleted. The existing Record*() methods are updated to check if the given object is deleted, and call record or delete based on that. The user of this API has to pass in the finalizer they write on object they maintain to the metrics helper and record the metrics at the very end of the reconciliation so that the final object state can be used to determine if the metrics can be deleted safely. To allow creating multiple instances of metrics helper, the metrics collector registration is now done using a new function MustMakeRecorder() which returns a metrics.Recorder. metrics.Recorder can be used to create multiple metrics helpers with different attributes if required, sharing the same underlying metrics recorder. Signed-off-by: Sunny --- runtime/controller/metrics.go | 73 +++++++++++++++++++++++++----- runtime/controller/metrics_test.go | 61 +++++++++++++++++++++++++ runtime/metrics/recorder.go | 37 +++++++++------ runtime/metrics/recorder_test.go | 68 ++++++++++++++++++++++++---- 4 files changed, 205 insertions(+), 34 deletions(-) create mode 100644 runtime/controller/metrics_test.go diff --git a/runtime/controller/metrics.go b/runtime/controller/metrics.go index 1446fc5fe..7138e6b96 100644 --- a/runtime/controller/metrics.go +++ b/runtime/controller/metrics.go @@ -25,12 +25,22 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/reference" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" crtlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/fluxcd/pkg/apis/meta" "github.com/fluxcd/pkg/runtime/metrics" ) +// MustMakeRecorder attempts to register the metrics collectors in the controller-runtime metrics registry, which panics +// upon the first registration that causes an error. Which usually happens if you try to initialise a Metrics value +// twice for your controller. +func MustMakeRecorder() *metrics.Recorder { + metricsRecorder := metrics.NewRecorder() + crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...) + return metricsRecorder +} + // Metrics is a helper struct that adds the capability for recording GitOps Toolkit standard metrics to a reconciler. // // Use it by embedding it in your reconciler struct: @@ -50,23 +60,30 @@ import ( type Metrics struct { Scheme *runtime.Scheme MetricsRecorder *metrics.Recorder + ownedFinalizers []string } -// MustMakeMetrics creates a new Metrics with a new metrics.Recorder, and the Metrics.Scheme set to that of the given -// mgr. -// It attempts to register the metrics collectors in the controller-runtime metrics registry, which panics upon the -// first registration that causes an error. Which usually happens if you try to initialise a Metrics value twice for -// your controller. -func MustMakeMetrics(mgr ctrl.Manager) Metrics { - metricsRecorder := metrics.NewRecorder() - crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...) - +// NewMetrics creates a new Metrics with the given metrics.Recorder, and the Metrics.Scheme set to that of the given +// mgr, along with an optional set of owned finalizers which is used to determine when an object is being deleted. +func NewMetrics(mgr ctrl.Manager, recorder *metrics.Recorder, finalizers ...string) Metrics { return Metrics{ Scheme: mgr.GetScheme(), - MetricsRecorder: metricsRecorder, + MetricsRecorder: recorder, + ownedFinalizers: finalizers, } } +// IsDelete returns if the object is deleted by checking for deletion timestamp +// and owned finalizers in the object. +func (m Metrics) IsDelete(obj conditions.Getter) bool { + for _, f := range m.ownedFinalizers { + if controllerutil.ContainsFinalizer(obj, f) { + return false + } + } + return !obj.GetDeletionTimestamp().IsZero() +} + // RecordDuration records the duration of a reconcile attempt for the given obj based on the given startTime. func (m Metrics) RecordDuration(ctx context.Context, obj conditions.Getter, startTime time.Time) { if m.MetricsRecorder != nil { @@ -75,6 +92,10 @@ func (m Metrics) RecordDuration(ctx context.Context, obj conditions.Getter, star logr.FromContextOrDiscard(ctx).Error(err, "unable to get object reference to record duration") return } + if m.IsDelete(obj) { + m.MetricsRecorder.DeleteDuration(*ref) + return + } m.MetricsRecorder.RecordDuration(*ref, startTime) } } @@ -87,22 +108,38 @@ func (m Metrics) RecordSuspend(ctx context.Context, obj conditions.Getter, suspe logr.FromContextOrDiscard(ctx).Error(err, "unable to get object reference to record suspend") return } + if m.IsDelete(obj) { + m.MetricsRecorder.DeleteSuspend(*ref) + return + } m.MetricsRecorder.RecordSuspend(*ref, suspend) } } // RecordReadiness records the meta.ReadyCondition status for the given obj. func (m Metrics) RecordReadiness(ctx context.Context, obj conditions.Getter) { + if m.IsDelete(obj) { + m.DeleteCondition(ctx, obj, meta.ReadyCondition) + return + } m.RecordCondition(ctx, obj, meta.ReadyCondition) } // RecordReconciling records the meta.ReconcilingCondition status for the given obj. func (m Metrics) RecordReconciling(ctx context.Context, obj conditions.Getter) { + if m.IsDelete(obj) { + m.DeleteCondition(ctx, obj, meta.ReconcilingCondition) + return + } m.RecordCondition(ctx, obj, meta.ReconcilingCondition) } // RecordStalled records the meta.StalledCondition status for the given obj. func (m Metrics) RecordStalled(ctx context.Context, obj conditions.Getter) { + if m.IsDelete(obj) { + m.DeleteCondition(ctx, obj, meta.StalledCondition) + return + } m.RecordCondition(ctx, obj, meta.StalledCondition) } @@ -120,5 +157,19 @@ func (m Metrics) RecordCondition(ctx context.Context, obj conditions.Getter, con if rc == nil { rc = conditions.UnknownCondition(conditionType, "", "") } - m.MetricsRecorder.RecordCondition(*ref, *rc, !obj.GetDeletionTimestamp().IsZero()) + m.MetricsRecorder.RecordCondition(*ref, *rc) +} + +// DeleteCondition deletes the condition metrics of the given conditionType for +// the given object. +func (m Metrics) DeleteCondition(ctx context.Context, obj conditions.Getter, conditionType string) { + if m.MetricsRecorder == nil { + return + } + ref, err := reference.GetReference(m.Scheme, obj) + if err != nil { + logr.FromContextOrDiscard(ctx).Error(err, "unable to get object reference to delete condition metric") + return + } + m.MetricsRecorder.DeleteCondition(*ref, conditionType) } diff --git a/runtime/controller/metrics_test.go b/runtime/controller/metrics_test.go new file mode 100644 index 000000000..50096d65b --- /dev/null +++ b/runtime/controller/metrics_test.go @@ -0,0 +1,61 @@ +/* +Copyright 2023 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "testing" + "time" + + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/fluxcd/pkg/runtime/conditions/testdata" +) + +func TestMetrics_IsDelete(t *testing.T) { + testFinalizers := []string{"finalizers.fluxcd.io", "finalizers.foo.bar"} + timenow := metav1.NewTime(time.Now()) + + tests := []struct { + name string + finalizers []string + deleteTimestamp *metav1.Time + ownedFinalizers []string + want bool + }{ + {"equal finalizers, no delete timestamp", testFinalizers, nil, testFinalizers, false}, + {"partial finalizers, no delete timestamp", []string{"finalizers.fluxcd.io"}, nil, testFinalizers, false}, + {"unknown finalizers, no delete timestamp", []string{"foo"}, nil, testFinalizers, false}, + {"unknown finalizers, delete timestamp", []string{"foo"}, &timenow, testFinalizers, true}, + {"no finalizers, no delete timestamp", []string{}, nil, testFinalizers, false}, + {"no owned finalizers, no delete timestamp", []string{"foo"}, nil, nil, false}, + {"no finalizers, delete timestamp", []string{}, &timenow, testFinalizers, true}, + {"no finalizers, no delete timestamp", nil, nil, nil, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + metrics := Metrics{ownedFinalizers: tt.ownedFinalizers} + obj := &testdata.Fake{} + obj.SetFinalizers(tt.finalizers) + obj.SetDeletionTimestamp(tt.deleteTimestamp) + g.Expect(metrics.IsDelete(obj)).To(Equal(tt.want)) + }) + } +} diff --git a/runtime/metrics/recorder.go b/runtime/metrics/recorder.go index 88ead2fe6..e339a69cb 100644 --- a/runtime/metrics/recorder.go +++ b/runtime/metrics/recorder.go @@ -25,10 +25,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const ( - ConditionDeleted = "Deleted" -) - // Recorder is a struct for recording GitOps Toolkit metrics for a controller. // // Use NewRecorder to initialise it with properly configured metric names. @@ -77,19 +73,20 @@ func (r *Recorder) Collectors() []prometheus.Collector { } // RecordCondition records the condition as given for the ref. -func (r *Recorder) RecordCondition(ref corev1.ObjectReference, condition metav1.Condition, deleted bool) { - for _, status := range []string{string(metav1.ConditionTrue), string(metav1.ConditionFalse), string(metav1.ConditionUnknown), ConditionDeleted} { +func (r *Recorder) RecordCondition(ref corev1.ObjectReference, condition metav1.Condition) { + for _, status := range []metav1.ConditionStatus{metav1.ConditionTrue, metav1.ConditionFalse, metav1.ConditionUnknown} { var value float64 - if deleted { - if status == ConditionDeleted { - value = 1 - } - } else { - if status == string(condition.Status) { - value = 1 - } + if status == condition.Status { + value = 1 } - r.conditionGauge.WithLabelValues(ref.Kind, ref.Name, ref.Namespace, condition.Type, status).Set(value) + r.conditionGauge.WithLabelValues(ref.Kind, ref.Name, ref.Namespace, condition.Type, string(status)).Set(value) + } +} + +// DeleteCondition deletes the condition metrics for the ref. +func (r *Recorder) DeleteCondition(ref corev1.ObjectReference, conditionType string) { + for _, status := range []metav1.ConditionStatus{metav1.ConditionTrue, metav1.ConditionFalse, metav1.ConditionUnknown} { + r.conditionGauge.DeleteLabelValues(ref.Kind, ref.Name, ref.Namespace, conditionType, string(status)) } } @@ -102,7 +99,17 @@ func (r *Recorder) RecordSuspend(ref corev1.ObjectReference, suspend bool) { r.suspendGauge.WithLabelValues(ref.Kind, ref.Name, ref.Namespace).Set(value) } +// DeleteSuspend deletes the suspend metric for the ref. +func (r *Recorder) DeleteSuspend(ref corev1.ObjectReference) { + r.suspendGauge.DeleteLabelValues(ref.Kind, ref.Name, ref.Namespace) +} + // RecordDuration records the duration since start for the given ref. func (r *Recorder) RecordDuration(ref corev1.ObjectReference, start time.Time) { r.durationHistogram.WithLabelValues(ref.Kind, ref.Name, ref.Namespace).Observe(time.Since(start).Seconds()) } + +// DeleteDuration deletes the duration metric for the ref. +func (r *Recorder) DeleteDuration(ref corev1.ObjectReference) { + r.durationHistogram.DeleteLabelValues(ref.Kind, ref.Name, ref.Namespace) +} diff --git a/runtime/metrics/recorder_test.go b/runtime/metrics/recorder_test.go index a96927f28..b6c04d39f 100644 --- a/runtime/metrics/recorder_test.go +++ b/runtime/metrics/recorder_test.go @@ -44,15 +44,13 @@ func TestRecorder_RecordCondition(t *testing.T) { Status: metav1.ConditionTrue, } - rec.RecordCondition(ref, cond, false) + rec.RecordCondition(ref, cond) metricFamilies, err := reg.Gather() - if err != nil { - require.NoError(t, err) - } + require.NoError(t, err) require.Equal(t, len(metricFamilies), 1) - require.Equal(t, len(metricFamilies[0].Metric), 4) + require.Equal(t, len(metricFamilies[0].Metric), 3) var conditionTrueValue float64 for _, m := range metricFamilies[0].Metric { @@ -69,6 +67,13 @@ func TestRecorder_RecordCondition(t *testing.T) { } require.Equal(t, conditionTrueValue, float64(1)) + + // Delete metrics. + rec.DeleteCondition(ref, cond.Type) + + metricFamilies, err = reg.Gather() + require.NoError(t, err) + require.Equal(t, len(metricFamilies), 0) } func TestRecorder_RecordDuration(t *testing.T) { @@ -86,9 +91,7 @@ func TestRecorder_RecordDuration(t *testing.T) { rec.RecordDuration(ref, reconcileStart) metricFamilies, err := reg.Gather() - if err != nil { - require.NoError(t, err) - } + require.NoError(t, err) require.Equal(t, len(metricFamilies), 1) require.Equal(t, len(metricFamilies[0].Metric), 1) @@ -110,4 +113,53 @@ func TestRecorder_RecordDuration(t *testing.T) { t.Errorf("expected namespace label to be %s, got %s", ref.Namespace, *pair.Value) } } + + // Delete metrics. + rec.DeleteDuration(ref) + + metricFamilies, err = reg.Gather() + require.NoError(t, err) + require.Equal(t, len(metricFamilies), 0) +} + +func TestRecorder_RecordSuspend(t *testing.T) { + rec := NewRecorder() + reg := prometheus.NewRegistry() + reg.MustRegister(rec.suspendGauge) + + ref := corev1.ObjectReference{ + Kind: "GitRepository", + Namespace: "default", + Name: "test", + } + + rec.RecordSuspend(ref, true) + + metricFamilies, err := reg.Gather() + require.NoError(t, err) + + require.Equal(t, len(metricFamilies), 1) + require.Equal(t, len(metricFamilies[0].Metric), 1) + + value := *metricFamilies[0].Metric[0].GetGauge().Value + require.EqualValues(t, value, 1, "expected gauge value") + + for _, pair := range metricFamilies[0].Metric[0].GetLabel() { + if *pair.Name == "kind" { + require.Equal(t, *pair.Value, ref.Kind, "unexpected kind") + } + if *pair.Name == "name" { + require.Equal(t, *pair.Value, ref.Name, "unexpected name") + } + if *pair.Name == "namespace" { + require.Equal(t, *pair.Value, ref.Namespace, "unexpected namespace") + } + } + + // Delete metrics. + rec.DeleteSuspend(ref) + + metricFamilies, err = reg.Gather() + require.NoError(t, err) + require.Equal(t, len(metricFamilies), 0) }