From e06edbad79aeaff067a9fc3efd3d2b4909caaa7f Mon Sep 17 00:00:00 2001 From: ka3de Date: Thu, 29 Feb 2024 14:42:44 +0100 Subject: [PATCH] Fix: telemetry region label (#638) Telemetry metrics should reference the check region, therefore use the region ID extracted from the global check ID. --- internal/scraper/scraper.go | 1 - internal/telemetry/telemeter.go | 18 +++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/internal/scraper/scraper.go b/internal/scraper/scraper.go index 79b36e674..19b7d04c8 100644 --- a/internal/scraper/scraper.go +++ b/internal/scraper/scraper.go @@ -248,7 +248,6 @@ func (s *Scraper) Run(ctx context.Context) { s.telemeter.AddExecution(telemetry.Execution{ LocalTenantID: s.check.TenantId, RegionID: int32(s.check.RegionId), - Region: s.probe.Region, CheckClass: s.check.Class(), Duration: end.Sub(start), }) diff --git a/internal/telemetry/telemeter.go b/internal/telemetry/telemeter.go index 77331f77f..1bece9dd5 100644 --- a/internal/telemetry/telemeter.go +++ b/internal/telemetry/telemeter.go @@ -2,6 +2,7 @@ package telemetry import ( "context" + "strconv" "sync" "time" @@ -40,7 +41,6 @@ type metrics struct { type Execution struct { LocalTenantID int64 RegionID int32 - Region string CheckClass sm.CheckClass Duration time.Duration } @@ -89,11 +89,11 @@ func (t *Telemeter) AddExecution(e Execution) { // If we do not have a pusher for this region, create it l := t.logger.With(). Str("component", "region-pusher"). - Str("agent_instance", t.instance). - Str("region", e.Region). + Str("agentInstance", t.instance). + Int32("regionId", e.RegionID). Logger() labels := prom.Labels{ - "region": e.Region, + "region_id": strconv.FormatInt(int64(e.RegionID), 10), } m := RegionMetrics{ t.metrics.pushRequestsActive.With(labels), @@ -118,7 +118,7 @@ func (t *Telemeter) registerMetrics(registerer prom.Registerer) { Name: "push_requests_active", Help: "Active push telemetry requests", ConstLabels: prom.Labels{"agent_instance": t.instance}, - }, []string{"region"}) + }, []string{"region_id"}) t.metrics.pushRequestsDuration = prom.NewHistogramVec(prom.HistogramOpts{ Namespace: "sm_agent", Subsystem: "telemetry", @@ -126,21 +126,21 @@ func (t *Telemeter) registerMetrics(registerer prom.Registerer) { Help: "Duration of push telemetry requests", Buckets: prom.ExponentialBucketsRange(0.01, 2.0, 10), ConstLabels: prom.Labels{"agent_instance": t.instance}, - }, []string{"region"}) + }, []string{"region_id"}) t.metrics.pushRequestsTotal = prom.NewCounterVec(prom.CounterOpts{ Namespace: "sm_agent", Subsystem: "telemetry", Name: "push_requests_total", Help: "Total count of push telemetry requests", ConstLabels: prom.Labels{"agent_instance": t.instance}, - }, []string{"region"}) + }, []string{"region_id"}) t.metrics.pushRequestsError = prom.NewCounterVec(prom.CounterOpts{ Namespace: "sm_agent", Subsystem: "telemetry", Name: "push_requests_errors_total", Help: "Total count of errored push telemetry requests", ConstLabels: prom.Labels{"agent_instance": t.instance}, - }, []string{"region"}) + }, []string{"region_id"}) t.metrics.addExecutionDuration = prom.NewHistogramVec(prom.HistogramOpts{ Namespace: "sm_agent", @@ -152,7 +152,7 @@ func (t *Telemeter) registerMetrics(registerer prom.Registerer) { NativeHistogramMaxBucketNumber: 100, NativeHistogramMinResetDuration: time.Hour, ConstLabels: prom.Labels{"agent_instance": t.instance}, - }, []string{"region"}) + }, []string{"region_id"}) registerer.MustRegister(t.metrics.pushRequestsActive) registerer.MustRegister(t.metrics.pushRequestsDuration)