From 67bc304ccbddb79959356ae4abbd26d63b7b465a Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sat, 3 Aug 2024 12:17:51 -0700 Subject: [PATCH 1/6] only increment thanos_rule_evaluation_with_warnings_total metric for non PromQL warnings Signed-off-by: Ben Ye --- cmd/thanos/rule.go | 34 +++++++++++++++++++----- cmd/thanos/rule_test.go | 59 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 09c928fb6e..91081be1d4 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -40,6 +40,7 @@ import ( "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/tsdb/agent" "github.com/prometheus/prometheus/tsdb/wlog" + "github.com/prometheus/prometheus/util/annotations" "github.com/thanos-io/objstore" "github.com/thanos-io/objstore/client" @@ -80,6 +81,11 @@ import ( const dnsSDResolver = "miekgdns" +var ( + promQLInfo = annotations.PromQLInfo.Error() + promQLWarning = annotations.PromQLWarning.Error() +) + type ruleConfig struct { http httpConfig grpc grpcConfig @@ -292,7 +298,7 @@ func newRuleMetrics(reg *prometheus.Registry) *RuleMetrics { m.ruleEvalWarnings = factory.NewCounterVec( prometheus.CounterOpts{ Name: "thanos_rule_evaluation_with_warnings_total", - Help: "The total number of rule evaluation that were successful but had warnings which can indicate partial error.", + Help: "The total number of rule evaluation that were successful but had non PromQL warnings which can indicate partial error.", }, []string{"strategy"}, ) m.ruleEvalWarnings.WithLabelValues(strings.ToLower(storepb.PartialResponseStrategy_ABORT.String())) @@ -939,6 +945,8 @@ func queryFuncCreator( level.Error(logger).Log("err", err, "query", qs) continue } + + warns = filterOutPromQLWarnings(warns, logger, qs) if len(warns) > 0 { ruleEvalWarnings.WithLabelValues(strings.ToLower(partialResponseStrategy.String())).Inc() // TODO(bwplotka): Propagate those to UI, probably requires changing rule manager code ): @@ -970,12 +978,13 @@ func queryFuncCreator( continue } - if len(result.Warnings) > 0 { + warnings := make([]string, 0, len(result.Warnings)) + for _, warn := range result.Warnings { + warnings = append(warnings, warn.Error()) + } + warnings = filterOutPromQLWarnings(warnings, logger, qs) + if len(warnings) > 0 { ruleEvalWarnings.WithLabelValues(strings.ToLower(partialResponseStrategy.String())).Inc() - warnings := make([]string, 0, len(result.Warnings)) - for _, w := range result.Warnings { - warnings = append(warnings, w.Error()) - } level.Warn(logger).Log("warnings", strings.Join(warnings, ", "), "query", qs) } @@ -1081,3 +1090,16 @@ func validateTemplate(tmplStr string) error { } return nil } + +// Filter out PromQL related warnings from warning response and keep store related warnings only. +func filterOutPromQLWarnings(warns []string, logger log.Logger, query string) []string { + storeWarnings := make([]string, 0, len(warns)) + for _, warn := range warns { + if strings.Contains(warn, promQLInfo) || strings.Contains(warn, promQLWarning) { + level.Warn(logger).Log("warning", warn, "query", query) + continue + } + storeWarnings = append(storeWarnings, warn) + } + return storeWarnings +} diff --git a/cmd/thanos/rule_test.go b/cmd/thanos/rule_test.go index 097b66e65b..550a9f6066 100644 --- a/cmd/thanos/rule_test.go +++ b/cmd/thanos/rule_test.go @@ -4,6 +4,9 @@ package main import ( + "github.com/go-kit/log" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/util/annotations" "testing" "github.com/efficientgo/core/testutil" @@ -110,3 +113,59 @@ func Test_tableLinkForExpression(t *testing.T) { testutil.Equals(t, resStr, td.expectStr) } } + +func TestFilterOutPromQLWarnings(t *testing.T) { + logger := log.NewNopLogger() + query := "foo" + expr, err := parser.ParseExpr(`rate(prometheus_build_info[5m])`) + testutil.Ok(t, err) + possibleCounterInfo := annotations.NewPossibleNonCounterInfo("foo", expr.PositionRange()) + badBucketLabelWarning := annotations.NewBadBucketLabelWarning("foo", "0.99", expr.PositionRange()) + for _, tc := range []struct { + name string + warnings []string + expected []string + }{ + { + name: "nil warning", + expected: make([]string, 0), + }, + { + name: "empty warning", + warnings: make([]string, 0), + expected: make([]string, 0), + }, + { + name: "no PromQL warning", + warnings: []string{ + "some_warning_message", + }, + expected: []string{ + "some_warning_message", + }, + }, + { + name: "PromQL warning", + warnings: []string{ + possibleCounterInfo.Error(), + }, + expected: make([]string, 0), + }, + { + name: "filter out all PromQL warnings", + warnings: []string{ + possibleCounterInfo.Error(), + badBucketLabelWarning.Error(), + "some_warning_message", + }, + expected: []string{ + "some_warning_message", + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + output := filterOutPromQLWarnings(tc.warnings, logger, query) + testutil.Equals(t, tc.expected, output) + }) + } +} From 909251b9eb9a2620f9dc07e920d28c4b74bb1a07 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sat, 3 Aug 2024 12:21:00 -0700 Subject: [PATCH 2/6] update changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index adfa54b03e..a12bcd6616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed * [#7511](https://github.com/thanos-io/thanos/pull/7511) Query Frontend: fix doubled gzip compression for response body. +* [#7592](https://github.com/thanos-io/thanos/pull/7592) Ruler: Only increment `thanos_rule_evaluation_with_warnings_total` metric for non PromQL warnings. ### Added From 74e4ef01fd7abf9792eb8b23794903907f6276cf Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sat, 3 Aug 2024 12:23:00 -0700 Subject: [PATCH 3/6] lint Signed-off-by: Ben Ye --- cmd/thanos/rule_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/thanos/rule_test.go b/cmd/thanos/rule_test.go index 550a9f6066..54a5c880f2 100644 --- a/cmd/thanos/rule_test.go +++ b/cmd/thanos/rule_test.go @@ -4,12 +4,12 @@ package main import ( - "github.com/go-kit/log" - "github.com/prometheus/prometheus/promql/parser" - "github.com/prometheus/prometheus/util/annotations" "testing" "github.com/efficientgo/core/testutil" + "github.com/go-kit/log" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/util/annotations" ) func Test_parseFlagLabels(t *testing.T) { From 31c70e2ee02be0c9aaf4393129f8d79e95761cda Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sat, 3 Aug 2024 12:40:51 -0700 Subject: [PATCH 4/6] lint Signed-off-by: Ben Ye --- cmd/thanos/rule_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/thanos/rule_test.go b/cmd/thanos/rule_test.go index 54a5c880f2..28d1421fa8 100644 --- a/cmd/thanos/rule_test.go +++ b/cmd/thanos/rule_test.go @@ -8,8 +8,9 @@ import ( "github.com/efficientgo/core/testutil" "github.com/go-kit/log" - "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/util/annotations" + + "github.com/thanos-io/thanos/pkg/extpromql" ) func Test_parseFlagLabels(t *testing.T) { @@ -117,7 +118,7 @@ func Test_tableLinkForExpression(t *testing.T) { func TestFilterOutPromQLWarnings(t *testing.T) { logger := log.NewNopLogger() query := "foo" - expr, err := parser.ParseExpr(`rate(prometheus_build_info[5m])`) + expr, err := extpromql.ParseExpr(`rate(prometheus_build_info[5m])`) testutil.Ok(t, err) possibleCounterInfo := annotations.NewPossibleNonCounterInfo("foo", expr.PositionRange()) badBucketLabelWarning := annotations.NewBadBucketLabelWarning("foo", "0.99", expr.PositionRange()) From fcb56764e690d0e21ae4f572851e39b061d61b01 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sat, 3 Aug 2024 13:41:37 -0700 Subject: [PATCH 5/6] address comment Signed-off-by: Ben Ye --- cmd/thanos/rule.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 91081be1d4..551ecbabdd 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -40,8 +40,6 @@ import ( "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/tsdb/agent" "github.com/prometheus/prometheus/tsdb/wlog" - "github.com/prometheus/prometheus/util/annotations" - "github.com/thanos-io/objstore" "github.com/thanos-io/objstore/client" objstoretracing "github.com/thanos-io/objstore/tracing/opentracing" @@ -55,6 +53,7 @@ import ( "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/discovery/dns" "github.com/thanos-io/thanos/pkg/errutil" + "github.com/thanos-io/thanos/pkg/extannotations" "github.com/thanos-io/thanos/pkg/extgrpc" "github.com/thanos-io/thanos/pkg/extkingpin" "github.com/thanos-io/thanos/pkg/extprom" @@ -81,11 +80,6 @@ import ( const dnsSDResolver = "miekgdns" -var ( - promQLInfo = annotations.PromQLInfo.Error() - promQLWarning = annotations.PromQLWarning.Error() -) - type ruleConfig struct { http httpConfig grpc grpcConfig @@ -1095,7 +1089,7 @@ func validateTemplate(tmplStr string) error { func filterOutPromQLWarnings(warns []string, logger log.Logger, query string) []string { storeWarnings := make([]string, 0, len(warns)) for _, warn := range warns { - if strings.Contains(warn, promQLInfo) || strings.Contains(warn, promQLWarning) { + if extannotations.IsPromQLAnnotation(warn) { level.Warn(logger).Log("warning", warn, "query", query) continue } From 8eb5d1877d23bbc07dd88217af20ef3b902d460d Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sat, 3 Aug 2024 13:52:15 -0700 Subject: [PATCH 6/6] lint Signed-off-by: Ben Ye --- cmd/thanos/rule.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 551ecbabdd..6c8fbea6e9 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -40,11 +40,12 @@ import ( "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/tsdb/agent" "github.com/prometheus/prometheus/tsdb/wlog" + "gopkg.in/yaml.v2" + "github.com/thanos-io/objstore" "github.com/thanos-io/objstore/client" objstoretracing "github.com/thanos-io/objstore/tracing/opentracing" "github.com/thanos-io/promql-engine/execution/parse" - "gopkg.in/yaml.v2" "github.com/thanos-io/thanos/pkg/alert" v1 "github.com/thanos-io/thanos/pkg/api/rule"