grafana · krajorama · Nov 27, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -69,6 +69,7 @@
 * [ENHANCEMENT] Distributor: Initialize ha_tracker cache before ha_tracker and distributor reach running state and begin serving writes. #9826 #9976
 * [ENHANCEMENT] Ingester: `-ingest-storage.kafka.max-buffered-bytes` to limit the memory for buffered records when using concurrent fetching. #9892
 * [ENHANCEMENT] Querier: improve performance and memory consumption of queries that select many series. #9914
+* [ENHANCEMENT] Distributor: allow a different higher limit for info series (series ending in `_info`) label count. #10028
 * [BUGFIX] Fix issue where functions such as `rate()` over native histograms could return incorrect values if a float stale marker was present in the selected range. #9508
 * [BUGFIX] Fix issue where negation of native histograms (eg. `-some_native_histogram_series`) did nothing. #9508
 * [BUGFIX] Fix issue where `metric might not be a counter, name does not end in _total/_sum/_count/_bucket` annotation would be emitted even if `rate` or `increase` did not have enough samples to compute a result. #9508

@@ -3795,6 +3795,16 @@
           "fieldFlag": "validation.max-label-names-per-series",
           "fieldType": "int"
         },
+        {
+          "kind": "field",
+          "name": "max_label_names_per_info_series",
+          "required": false,
+          "desc": "Maximum number of label names per info series.",
+          "fieldValue": null,
+          "fieldDefaultValue": 80,
+          "fieldFlag": "validation.max-label-names-per-info-series",
+          "fieldType": "int"
+        },
         {
           "kind": "field",
           "name": "max_metadata_length",

@@ -3307,6 +3307,8 @@ Usage of ./cmd/mimir/mimir:
     	Controls how far into the future incoming samples and exemplars are accepted compared to the wall clock. Any sample or exemplar will be rejected if its timestamp is greater than '(now + creation_grace_period)'. This configuration is enforced in the distributor and ingester. (default 10m)
   -validation.enforce-metadata-metric-name
     	Enforce every metadata has a metric name. (default true)
+  -validation.max-label-names-per-info-series int
+    	Maximum number of label names per info series. (default 80)
   -validation.max-label-names-per-series int
     	Maximum number of label names per series. (default 30)
   -validation.max-length-label-name int

@@ -853,6 +853,8 @@ Usage of ./cmd/mimir/mimir:
     	Enable anonymous usage reporting. (default true)
   -usage-stats.installation-mode string
     	Installation mode. Supported values: custom, helm, jsonnet. (default "custom")
+  -validation.max-label-names-per-info-series int
+    	Maximum number of label names per info series. (default 80)
   -validation.max-label-names-per-series int
     	Maximum number of label names per series. (default 30)
   -validation.max-length-label-name int

@@ -3208,6 +3208,10 @@ The `limits` block configures default and per-tenant limits imposed by component
 # CLI flag: -validation.max-label-names-per-series
 [max_label_names_per_series: <int> | default = 30]
 
+# Maximum number of label names per info series.
+# CLI flag: -validation.max-label-names-per-info-series
+[max_label_names_per_info_series: <int> | default = 80]
+
 # Maximum length accepted for metric metadata. Metadata refers to Metric Name,
 # HELP and UNIT. Longer metadata is dropped except for HELP which is truncated.
 # CLI flag: -validation.max-metadata-length

@@ -1729,6 +1729,16 @@ The limit protects the system’s stability from potential abuse or mistakes. To
 Invalid series are skipped during the ingestion, and valid series within the same request are ingested.
 {{< /admonition >}}
 
+### err-mimir-max-label-names-per-info-series
+
+This non-critical error occurs when Mimir receives a write request that contains an info series with a number of labels that exceed the configured limit.
+An info series is a series where the metric name ends in `_info`.
+The limit protects the system’s stability from potential abuse or mistakes. To configure the limit on a per-tenant basis, use the `-validation.max-label-names-per-info-series` option.
-The limit protects the system’s stability from potential abuse or mistakes. To configure the limit on a per-tenant basis, use the `-validation.max-label-names-per-info-series` option.
+The limit protects the system’s stability from potential abuse or mistakes. To configure the limit on a per-tenant basis, use the `validation.max_label_names_per_info_series` option.
-The limit protects the system’s stability from potential abuse or mistakes. To configure the limit on a per-tenant basis, use the `-validation.max-label-names-per-info-series` option.
+The limit protects the system’s stability from potential abuse or mistakes. To configure the limit on a per-tenant basis, use the `validation.max_label_names_per_info_series` option.
+
+{{< admonition type="note" >}}
+Invalid series are skipped during the ingestion, and valid series within the same request are ingested.
+{{< /admonition >}}
+
 ### err-mimir-max-native-histogram-buckets
 
 This non-critical error occurs when Mimir receives a write request that contains a sample that is a native histogram that has too many observation buckets.

@@ -33,6 +33,7 @@ var (
 	reasonMissingMetricName            = globalerror.MissingMetricName.LabelValue()
 	reasonInvalidMetricName            = globalerror.InvalidMetricName.LabelValue()
 	reasonMaxLabelNamesPerSeries       = globalerror.MaxLabelNamesPerSeries.LabelValue()
+	reasonMaxLabelNamesPerInfoSeries   = globalerror.MaxLabelNamesPerInfoSeries.LabelValue()
 	reasonInvalidLabel                 = globalerror.SeriesInvalidLabel.LabelValue()
 	reasonInvalidLabelValue            = globalerror.SeriesInvalidLabelValue.LabelValue()
 	reasonLabelNameTooLong             = globalerror.SeriesLabelNameTooLong.LabelValue()
@@ -74,10 +75,16 @@ var (
 	invalidLabelMsgFormat      = globalerror.SeriesInvalidLabel.Message("received a series with an invalid label: '%.200s' series: '%.200s'")
 	invalidLabelValueMsgFormat = globalerror.SeriesInvalidLabelValue.Message("received a series with invalid value in label '%.200s': '%.200s' metric: '%.200s'")
 	duplicateLabelMsgFormat    = globalerror.SeriesWithDuplicateLabelNames.Message("received a series with duplicate label name, label: '%.200s' series: '%.200s'")
-	tooManyLabelsMsgFormat     = globalerror.MaxLabelNamesPerSeries.MessageWithPerTenantLimitConfig(
+
+	tooManyLabelsMsgFormat = globalerror.MaxLabelNamesPerSeries.MessageWithPerTenantLimitConfig(
 		"received a series whose number of labels exceeds the limit (actual: %d, limit: %d) series: '%.200s%s'",
 		validation.MaxLabelNamesPerSeriesFlag,
 	)
+	tooManyInfoLabelsMsgFormat = globalerror.MaxLabelNamesPerSeries.MessageWithPerTenantLimitConfig(
+		"received an info series whose number of labels exceeds the limit (actual: %d, limit: %d) series: '%.200s%s'",
+		validation.MaxLabelNamesPerSeriesFlag,
+	)
+
 	noMetricNameMsgFormat                 = globalerror.MissingMetricName.Message("received series has no metric name")
 	invalidMetricNameMsgFormat            = globalerror.InvalidMetricName.Message("received a series with invalid metric name: '%.200s'")
 	maxNativeHistogramBucketsMsgFormat    = globalerror.MaxNativeHistogramBuckets.Message("received a native histogram sample with too many buckets, timestamp: %d series: %s, buckets: %d, limit: %d")
@@ -126,6 +133,7 @@ type sampleValidationMetrics struct {
 	missingMetricName            *prometheus.CounterVec
 	invalidMetricName            *prometheus.CounterVec
 	maxLabelNamesPerSeries       *prometheus.CounterVec
+	maxLabelNamesPerInfoSeries   *prometheus.CounterVec
 	invalidLabel                 *prometheus.CounterVec
 	invalidLabelValue            *prometheus.CounterVec
 	labelNameTooLong             *prometheus.CounterVec
@@ -142,6 +150,7 @@ func (m *sampleValidationMetrics) deleteUserMetrics(userID string) {
 	m.missingMetricName.DeletePartialMatch(filter)
 	m.invalidMetricName.DeletePartialMatch(filter)
 	m.maxLabelNamesPerSeries.DeletePartialMatch(filter)
+	m.maxLabelNamesPerInfoSeries.DeletePartialMatch(filter)
 	m.invalidLabel.DeletePartialMatch(filter)
 	m.invalidLabelValue.DeletePartialMatch(filter)
 	m.labelNameTooLong.DeletePartialMatch(filter)
@@ -157,6 +166,7 @@ func (m *sampleValidationMetrics) deleteUserMetricsForGroup(userID, group string
 	m.missingMetricName.DeleteLabelValues(userID, group)
 	m.invalidMetricName.DeleteLabelValues(userID, group)
 	m.maxLabelNamesPerSeries.DeleteLabelValues(userID, group)
+	m.maxLabelNamesPerInfoSeries.DeleteLabelValues(userID, group)
 	m.invalidLabel.DeleteLabelValues(userID, group)
 	m.invalidLabelValue.DeleteLabelValues(userID, group)
 	m.labelNameTooLong.DeleteLabelValues(userID, group)
@@ -173,6 +183,7 @@ func newSampleValidationMetrics(r prometheus.Registerer) *sampleValidationMetric
 		missingMetricName:            validation.DiscardedSamplesCounter(r, reasonMissingMetricName),
 		invalidMetricName:            validation.DiscardedSamplesCounter(r, reasonInvalidMetricName),
 		maxLabelNamesPerSeries:       validation.DiscardedSamplesCounter(r, reasonMaxLabelNamesPerSeries),
+		maxLabelNamesPerInfoSeries:   validation.DiscardedSamplesCounter(r, reasonMaxLabelNamesPerInfoSeries),
 		invalidLabel:                 validation.DiscardedSamplesCounter(r, reasonInvalidLabel),
 		invalidLabelValue:            validation.DiscardedSamplesCounter(r, reasonInvalidLabelValue),
 		labelNameTooLong:             validation.DiscardedSamplesCounter(r, reasonLabelNameTooLong),
@@ -349,6 +360,7 @@ func validateExemplarTimestamp(m *exemplarValidationMetrics, userID string, minT
 // labelValidationConfig helps with getting required config to validate labels.
 type labelValidationConfig interface {
 	MaxLabelNamesPerSeries(userID string) int
+	MaxLabelNamesPerInfoSeries(userID string) int
 	MaxLabelNameLength(userID string) int
 	MaxLabelValueLength(userID string) int
 }
@@ -387,9 +399,17 @@ func validateLabels(m *sampleValidationMetrics, cfg labelValidationConfig, userI
 	}
 
 	if !skipLabelCountValidation && len(ls) > cfg.MaxLabelNamesPerSeries(userID) {
-		m.maxLabelNamesPerSeries.WithLabelValues(userID, group).Inc()
-		metric, ellipsis := getMetricAndEllipsis(ls)
-		return fmt.Errorf(tooManyLabelsMsgFormat, len(ls), cfg.MaxLabelNamesPerSeries(userID), metric, ellipsis)
+		if strings.HasSuffix(unsafeMetricName, "_info") {
+			if len(ls) > cfg.MaxLabelNamesPerInfoSeries(userID) {
+				m.maxLabelNamesPerInfoSeries.WithLabelValues(userID, group).Inc()
+				metric, ellipsis := getMetricAndEllipsis(ls)
+				return fmt.Errorf(tooManyInfoLabelsMsgFormat, len(ls), cfg.MaxLabelNamesPerInfoSeries(userID), metric, ellipsis)
+			}
+		} else {
+			m.maxLabelNamesPerSeries.WithLabelValues(userID, group).Inc()
+			metric, ellipsis := getMetricAndEllipsis(ls)
+			return fmt.Errorf(tooManyLabelsMsgFormat, len(ls), cfg.MaxLabelNamesPerSeries(userID), metric, ellipsis)
+		}
 	}
 
 	maxLabelNameLength := cfg.MaxLabelNameLength(userID)

@@ -24,15 +24,20 @@ import (
 )
 
 type validateLabelsCfg struct {
-	maxLabelNamesPerSeries int
-	maxLabelNameLength     int
-	maxLabelValueLength    int
+	maxLabelNamesPerSeries     int
+	maxLabelNamesPerInfoSeries int
+	maxLabelNameLength         int
+	maxLabelValueLength        int
 }
 
 func (v validateLabelsCfg) MaxLabelNamesPerSeries(_ string) int {
 	return v.maxLabelNamesPerSeries
 }
 
+func (v validateLabelsCfg) MaxLabelNamesPerInfoSeries(_ string) int {
+	return v.maxLabelNamesPerInfoSeries
+}
+
 func (v validateLabelsCfg) MaxLabelNameLength(_ string) int {
 	return v.maxLabelNameLength
 }
@@ -64,6 +69,7 @@ func TestValidateLabels(t *testing.T) {
 	cfg.maxLabelValueLength = 25
 	cfg.maxLabelNameLength = 25
 	cfg.maxLabelNamesPerSeries = 2
+	cfg.maxLabelNamesPerInfoSeries = 3
 
 	for _, c := range []struct {
 		metric                   model.Metric
@@ -157,6 +163,31 @@ func TestValidateLabels(t *testing.T) {
 				)...,
 			),
 		},
+		{
+			// *_info metrics have higher label limits.
+			metric:                   map[model.LabelName]model.LabelValue{model.MetricNameLabel: "foo_info", "bar": "baz", "blip": "blop"},
+			skipLabelNameValidation:  false,
+			skipLabelCountValidation: false,
+			err:                      nil,
+		},
+		{
+			// *_info metrics have higher label limits.
+			metric:                   map[model.LabelName]model.LabelValue{model.MetricNameLabel: "foo_info", "bar": "baz", "blip": "blop", "blap": "blup"},
+			skipLabelNameValidation:  false,
+			skipLabelCountValidation: false,
+			err: fmt.Errorf(
+				tooManyInfoLabelsMsgFormat,
+				tooManyLabelsArgs(
+					[]mimirpb.LabelAdapter{
+						{Name: model.MetricNameLabel, Value: "foo_info"},
+						{Name: "bar", Value: "baz"},
+						{Name: "blip", Value: "blop"},
+						{Name: "blap", Value: "blup"},
+					},
+					3,
+				)...,
+			),
+		},
 		{
 			metric:                   map[model.LabelName]model.LabelValue{model.MetricNameLabel: "foo", "bar": "baz", "blip": "blop"},
 			skipLabelNameValidation:  false,
@@ -206,6 +237,7 @@ func TestValidateLabels(t *testing.T) {
 			cortex_discarded_samples_total{group="custom label",reason="label_value_invalid",user="testUser"} 1
 			cortex_discarded_samples_total{group="custom label",reason="label_value_too_long",user="testUser"} 1
 			cortex_discarded_samples_total{group="custom label",reason="max_label_names_per_series",user="testUser"} 1
+			cortex_discarded_samples_total{group="custom label",reason="max_label_names_per_info_series",user="testUser"} 1
 			cortex_discarded_samples_total{group="custom label",reason="metric_name_invalid",user="testUser"} 2
 			cortex_discarded_samples_total{group="custom label",reason="missing_metric_name",user="testUser"} 1
 			cortex_discarded_samples_total{group="custom label",reason="random reason",user="different user"} 1

@@ -17,6 +17,7 @@ const (
 	MissingMetricName                     ID = "missing-metric-name"
 	InvalidMetricName                     ID = "metric-name-invalid"
 	MaxLabelNamesPerSeries                ID = "max-label-names-per-series"
+	MaxLabelNamesPerInfoSeries            ID = "max-label-names-per-info-series"
 	MaxNativeHistogramBuckets             ID = "max-native-histogram-buckets"
 	NotReducibleNativeHistogram           ID = "not-reducible-native-histogram"
 	InvalidSchemaNativeHistogram          ID = "invalid-native-histogram-schema"

@@ -39,6 +39,7 @@ const (
 	MaxEstimatedChunksPerQueryMultiplierFlag  = "querier.max-estimated-fetched-chunks-per-query-multiplier"
 	MaxEstimatedMemoryConsumptionPerQueryFlag = "querier.max-estimated-memory-consumption-per-query"
 	MaxLabelNamesPerSeriesFlag                = "validation.max-label-names-per-series"
+	MaxLabelNamesPerInfoSeriesFlag            = "validation.max-label-names-per-info-series"
 	MaxLabelNameLengthFlag                    = "validation.max-length-label-name"
 	MaxLabelValueLengthFlag                   = "validation.max-length-label-value"
 	MaxMetadataLengthFlag                     = "validation.max-metadata-length"
@@ -113,6 +114,7 @@ type Limits struct {
 	MaxLabelNameLength                          int                 `yaml:"max_label_name_length" json:"max_label_name_length"`
 	MaxLabelValueLength                         int                 `yaml:"max_label_value_length" json:"max_label_value_length"`
 	MaxLabelNamesPerSeries                      int                 `yaml:"max_label_names_per_series" json:"max_label_names_per_series"`
+	MaxLabelNamesPerInfoSeries                  int                 `yaml:"max_label_names_per_info_series" json:"max_label_names_per_info_series"`
 	MaxMetadataLength                           int                 `yaml:"max_metadata_length" json:"max_metadata_length"`
 	MaxNativeHistogramBuckets                   int                 `yaml:"max_native_histogram_buckets" json:"max_native_histogram_buckets"`
 	MaxExemplarsPerSeriesPerRequest             int                 `yaml:"max_exemplars_per_series_per_request" json:"max_exemplars_per_series_per_request" category:"experimental"`
@@ -264,6 +266,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&l.MaxLabelNameLength, MaxLabelNameLengthFlag, 1024, "Maximum length accepted for label names")
 	f.IntVar(&l.MaxLabelValueLength, MaxLabelValueLengthFlag, 2048, "Maximum length accepted for label value. This setting also applies to the metric name")
 	f.IntVar(&l.MaxLabelNamesPerSeries, MaxLabelNamesPerSeriesFlag, 30, "Maximum number of label names per series.")
+	f.IntVar(&l.MaxLabelNamesPerInfoSeries, MaxLabelNamesPerInfoSeriesFlag, 80, "Maximum number of label names per info series.")
 	f.IntVar(&l.MaxMetadataLength, MaxMetadataLengthFlag, 1024, "Maximum length accepted for metric metadata. Metadata refers to Metric Name, HELP and UNIT. Longer metadata is dropped except for HELP which is truncated.")
 	f.IntVar(&l.MaxNativeHistogramBuckets, maxNativeHistogramBucketsFlag, 0, "Maximum number of buckets per native histogram sample. 0 to disable the limit.")
 	f.IntVar(&l.MaxExemplarsPerSeriesPerRequest, "distributor.max-exemplars-per-series-per-request", 0, "Maximum number of exemplars per series per request. 0 to disable limit in request. The exceeding exemplars are dropped.")
@@ -596,6 +599,11 @@ func (o *Overrides) MaxLabelNamesPerSeries(userID string) int {
 	return o.getOverridesForUser(userID).MaxLabelNamesPerSeries
 }
 
+// MaxLabelNamesPerInfoSeries returns maximum number of label/value pairs for info timeseries.
+func (o *Overrides) MaxLabelNamesPerInfoSeries(userID string) int {
+	return o.getOverridesForUser(userID).MaxLabelNamesPerInfoSeries
+}
+
 // MaxMetadataLength returns maximum length metadata can be. Metadata refers
 // to the Metric Name, HELP and UNIT.
 func (o *Overrides) MaxMetadataLength(userID string) int {