Skip to content

Commit

Permalink
MQE: Add support for histogram_avg (#10046)
Browse files Browse the repository at this point in the history
  • Loading branch information
jhesketh authored Nov 29, 2024
1 parent 6ece172 commit b73cba2
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 42 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* [CHANGE] Querier: The `.` pattern in regular expressions in PromQL matches newline characters. With this change regular expressions like `.*` match strings that include `\n`. To maintain the old behaviour, you will have to change regular expressions by replacing all `.` patterns with `[^\n]`, e.g. `foo[^\n]*`. This upgrades PromQL compatibility from Prometheus 2.0 to 3.0. #9844
* [CHANGE] Querier: Lookback and range selectors are left open and right closed (previously left closed and right closed). This change affects queries when the evaluation time perfectly aligns with the sample timestamps. For example assume querying a timeseries with evenly spaced samples exactly 1 minute apart. Previously, a range query with `5m` would usually return 5 samples, or 6 samples if the query evaluation aligns perfectly with a scrape. Now, queries like this will always return 5 samples. This upgrades PromQL compatibility from Prometheus 2.0 to 3.0. #9844
* [CHANGE] Querier: promql(native histograms): Introduce exponential interpolation. #9844
* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.query-engine=mimir`. #9367 #9368 #9398 #9399 #9403 #9417 #9418 #9419 #9420 #9482 #9504 #9505 #9507 #9518 #9531 #9532 #9533 #9553 #9558 #9588 #9589 #9639 #9641 #9642 #9651 #9664 #9681 #9717 #9719 #9724 #9874 #9929 #9998 #10007 #10010
* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.query-engine=mimir`. #9367 #9368 #9398 #9399 #9403 #9417 #9418 #9419 #9420 #9482 #9504 #9505 #9507 #9518 #9531 #9532 #9533 #9553 #9558 #9588 #9589 #9639 #9641 #9642 #9651 #9664 #9681 #9717 #9719 #9724 #9874 #9929 #9998 #10007 #10010 #10046
* [FEATURE] Distributor: Add support for `lz4` OTLP compression. #9763
* [FEATURE] Query-frontend: added experimental configuration options `query-frontend.cache-errors` and `query-frontend.results-cache-ttl-for-errors` to allow non-transient responses to be cached. When set to `true` error responses from hitting limits or bad data are cached for a short TTL. #9028
* [FEATURE] Query-frontend: add middleware to control access to specific PromQL experimental functions on a per-tenant basis. #9798
Expand Down
3 changes: 3 additions & 0 deletions pkg/streamingpromql/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2662,8 +2662,11 @@ func TestCompareVariousMixedMetricsFunctions(t *testing.T) {

for _, labels := range labelCombinations {
labelRegex := strings.Join(labels, "|")
expressions = append(expressions, fmt.Sprintf(`histogram_avg(series{label=~"(%s)"})`, labelRegex))
expressions = append(expressions, fmt.Sprintf(`histogram_count(series{label=~"(%s)"})`, labelRegex))
expressions = append(expressions, fmt.Sprintf(`histogram_quantile(0.8, series{label=~"(%s)"})`, labelRegex))
expressions = append(expressions, fmt.Sprintf(`histogram_quantile(scalar(series{label="i"}), series{label=~"(%s)"})`, labelRegex))
expressions = append(expressions, fmt.Sprintf(`histogram_sum(series{label=~"(%s)"})`, labelRegex))
}

// We skip comparing the annotation results as Prometheus does not output any series name
Expand Down
2 changes: 1 addition & 1 deletion pkg/streamingpromql/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,6 @@ func HistogramQuantileOperatorFactory() InstantVectorFunctionOperatorFactory {
// These functions return an instant-vector.
var instantVectorFunctionOperatorFactories = map[string]InstantVectorFunctionOperatorFactory{
// Please keep this list sorted alphabetically.

"abs": InstantVectorTransformationFunctionOperatorFactory("abs", functions.Abs),
"acos": InstantVectorTransformationFunctionOperatorFactory("acos", functions.Acos),
"acosh": InstantVectorTransformationFunctionOperatorFactory("acosh", functions.Acosh),
Expand All @@ -334,6 +333,7 @@ var instantVectorFunctionOperatorFactories = map[string]InstantVectorFunctionOpe
"deriv": FunctionOverRangeVectorOperatorFactory("deriv", functions.Deriv),
"exp": InstantVectorTransformationFunctionOperatorFactory("exp", functions.Exp),
"floor": InstantVectorTransformationFunctionOperatorFactory("floor", functions.Floor),
"histogram_avg": InstantVectorTransformationFunctionOperatorFactory("histogram_avg", functions.HistogramAvg),
"histogram_count": InstantVectorTransformationFunctionOperatorFactory("histogram_count", functions.HistogramCount),
"histogram_quantile": HistogramQuantileOperatorFactory(),
"histogram_sum": InstantVectorTransformationFunctionOperatorFactory("histogram_sum", functions.HistogramSum),
Expand Down
22 changes: 22 additions & 0 deletions pkg/streamingpromql/operators/functions/native_histograms.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,28 @@ import (
"github.com/grafana/mimir/pkg/streamingpromql/types"
)

func HistogramAvg(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) {
floats, err := types.FPointSlicePool.Get(len(seriesData.Histograms), memoryConsumptionTracker)
if err != nil {
return types.InstantVectorSeriesData{}, err
}

data := types.InstantVectorSeriesData{
Floats: floats,
}

for _, histogram := range seriesData.Histograms {
data.Floats = append(data.Floats, promql.FPoint{
T: histogram.T,
F: histogram.H.Sum / histogram.H.Count,
})
}

types.PutInstantVectorSeriesData(seriesData, memoryConsumptionTracker)

return data, nil
}

func HistogramCount(seriesData types.InstantVectorSeriesData, _ []types.ScalarData, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, error) {
floats, err := types.FPointSlicePool.Get(len(seriesData.Histograms), memoryConsumptionTracker)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions pkg/streamingpromql/testdata/ours/functions.test
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,9 @@ eval_fail instant at 5m exp({__name__=~"float_metric_.*"})
eval_fail instant at 5m floor({__name__=~"float_metric_.*"})
expected_fail_message vector cannot contain metrics with the same labelset

eval_fail instant at 5m histogram_avg({__name__=~"histogram_metric_.*"})
expected_fail_message vector cannot contain metrics with the same labelset

eval_fail instant at 5m histogram_count({__name__=~"histogram_metric_.*"})
expected_fail_message vector cannot contain metrics with the same labelset

Expand Down
18 changes: 18 additions & 0 deletions pkg/streamingpromql/testdata/ours/native_histograms.test
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ eval instant at 5m single_histogram
eval range from 0 to 5m step 1m single_histogram
{__name__="single_histogram"} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:20 count:7 buckets:[9 10 1]}}

# histogram_avg is sum / count.
eval range from 0 to 5m step 1m histogram_avg(single_histogram)
{} 1.25 1.25 1.25 1.25 1.25 2.857142857142857

# histogram_count extracts the count property from the histogram.
eval range from 0 to 5m step 1m histogram_count(single_histogram)
{} 4 4 4 4 4 7
Expand All @@ -30,6 +34,9 @@ load 1m
eval range from 0 to 5m step 1m mixed_metric
{__name__="mixed_metric"} 1 2 3 {{count:4 sum:5 buckets:[1 2 1]}} {{count:6 sum:8 buckets:[1 4 1]}} {{count:6 sum:8 buckets:[1 4 1]}}

eval instant at 3m histogram_avg(mixed_metric)
{} 1.25

eval instant at 3m histogram_count(mixed_metric)
{} 4

Expand All @@ -42,6 +49,9 @@ eval range from 0 to 5m step 1m histogram_count(mixed_metric)
eval range from 0 to 5m step 1m histogram_sum(mixed_metric)
{} _ _ _ 5 8 8

# histogram_avg ignores any float values
eval instant at 2m histogram_avg(mixed_metric)

# histogram_count ignores any float values
eval instant at 2m histogram_count(mixed_metric)

Expand All @@ -61,6 +71,11 @@ eval instant at 0 route
route{path="two"} {{schema:0 sum:10 count:20 buckets:[9 10 1]}}
route{path="three"} {{schema:0 sum:12 count:10 buckets:[3 2 5]}}

eval instant at 0 histogram_avg(route)
{path="one"} 1.25
{path="two"} 0.5
{path="three"} 1.2

eval instant at 0 histogram_count(route)
{path="one"} 4
{path="two"} 20
Expand Down Expand Up @@ -94,6 +109,9 @@ load 1m
eval range from 0 to 8m step 1m mixed_metric
{__name__="mixed_metric"} 1 2 3 3 3 4 5 {{schema:0 sum:18 count:10 buckets:[3 4 3]}} {{schema:0 sum:18 count:10 buckets:[3 4 3]}}

eval range from 0 to 8m step 1m histogram_avg(mixed_metric)
{} _ _ _ _ _ _ _ 1.8 1.8

eval range from 0 to 8m step 1m histogram_count(mixed_metric)
{} _ _ _ _ _ _ _ 10 10

Expand Down
7 changes: 3 additions & 4 deletions pkg/streamingpromql/testdata/upstream/histograms.test
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,9 @@ eval instant at 50m testhistogram3_sum
testhistogram3_sum{start="negative"} 80

# Test histogram_avg. This has no classic equivalent.
# Unsupported by streaming engine.
# eval instant at 50m histogram_avg(testhistogram3)
# {start="positive"} 3
# {start="negative"} 4
eval instant at 50m histogram_avg(testhistogram3)
{start="positive"} 3
{start="negative"} 4

# Test histogram_stddev. This has no classic equivalent.
# Unsupported by streaming engine.
Expand Down
60 changes: 24 additions & 36 deletions pkg/streamingpromql/testdata/upstream/native_histograms.test
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ eval instant at 1m histogram_count(empty_histogram)
eval instant at 1m histogram_sum(empty_histogram)
{} 0

# Unsupported by streaming engine.
# eval instant at 1m histogram_avg(empty_histogram)
# {} NaN
eval instant at 1m histogram_avg(empty_histogram)
{} NaN

# Unsupported by streaming engine.
# eval instant at 1m histogram_fraction(-Inf, +Inf, empty_histogram)
Expand All @@ -43,9 +42,8 @@ eval instant at 1m histogram_sum(single_histogram)
{} 5

# histogram_avg calculates the average from sum and count properties.
# Unsupported by streaming engine.
# eval instant at 1m histogram_avg(single_histogram)
# {} 1.25
eval instant at 1m histogram_avg(single_histogram)
{} 1.25

# We expect half of the values to fall in the range 1 < x <= 2.
# Unsupported by streaming engine.
Expand Down Expand Up @@ -76,9 +74,8 @@ eval instant at 5m histogram_count(multi_histogram)
eval instant at 5m histogram_sum(multi_histogram)
{} 5

# Unsupported by streaming engine.
# eval instant at 5m histogram_avg(multi_histogram)
# {} 1.25
eval instant at 5m histogram_avg(multi_histogram)
{} 1.25

# Unsupported by streaming engine.
# eval instant at 5m histogram_fraction(1, 2, multi_histogram)
Expand All @@ -96,9 +93,8 @@ eval instant at 50m histogram_count(multi_histogram)
eval instant at 50m histogram_sum(multi_histogram)
{} 5

# Unsupported by streaming engine.
# eval instant at 50m histogram_avg(multi_histogram)
# {} 1.25
eval instant at 50m histogram_avg(multi_histogram)
{} 1.25

# Unsupported by streaming engine.
# eval instant at 50m histogram_fraction(1, 2, multi_histogram)
Expand All @@ -122,9 +118,8 @@ eval instant at 5m histogram_count(incr_histogram)
eval instant at 5m histogram_sum(incr_histogram)
{} 6

# Unsupported by streaming engine.
# eval instant at 5m histogram_avg(incr_histogram)
# {} 1.2
eval instant at 5m histogram_avg(incr_histogram)
{} 1.2

# We expect 3/5ths of the values to fall in the range 1 < x <= 2.
# Unsupported by streaming engine.
Expand All @@ -145,9 +140,8 @@ eval instant at 50m histogram_count(incr_histogram)
eval instant at 50m histogram_sum(incr_histogram)
{} 24

# Unsupported by streaming engine.
# eval instant at 50m histogram_avg(incr_histogram)
# {} 1.7142857142857142
eval instant at 50m histogram_avg(incr_histogram)
{} 1.7142857142857142

# We expect 12/14ths of the values to fall in the range 1 < x <= 2.
# Unsupported by streaming engine.
Expand Down Expand Up @@ -186,9 +180,8 @@ eval instant at 5m histogram_count(low_res_histogram)
eval instant at 5m histogram_sum(low_res_histogram)
{} 8

# Unsupported by streaming engine.
# eval instant at 5m histogram_avg(low_res_histogram)
# {} 1.6
eval instant at 5m histogram_avg(low_res_histogram)
{} 1.6

# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4.
# Unsupported by streaming engine.
Expand All @@ -208,9 +201,8 @@ eval instant at 1m histogram_count(single_zero_histogram)
eval instant at 1m histogram_sum(single_zero_histogram)
{} 0.25

# Unsupported by streaming engine.
# eval instant at 1m histogram_avg(single_zero_histogram)
# {} 0.25
eval instant at 1m histogram_avg(single_zero_histogram)
{} 0.25

# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally
# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the
Expand All @@ -235,9 +227,8 @@ eval instant at 1m histogram_count(negative_histogram)
eval instant at 1m histogram_sum(negative_histogram)
{} -5

# Unsupported by streaming engine.
# eval instant at 1m histogram_avg(negative_histogram)
# {} -1.25
eval instant at 1m histogram_avg(negative_histogram)
{} -1.25

# We expect half of the values to fall in the range -2 < x <= -1.
# Unsupported by streaming engine.
Expand All @@ -261,9 +252,8 @@ eval instant at 5m histogram_count(two_samples_histogram)
eval instant at 5m histogram_sum(two_samples_histogram)
{} -4

# Unsupported by streaming engine.
# eval instant at 5m histogram_avg(two_samples_histogram)
# {} -1
eval instant at 5m histogram_avg(two_samples_histogram)
{} -1

# Unsupported by streaming engine.
# eval instant at 5m histogram_fraction(-2, -1, two_samples_histogram)
Expand All @@ -285,9 +275,8 @@ eval instant at 5m histogram_count(balanced_histogram)
eval instant at 5m histogram_sum(balanced_histogram)
{} 0

# Unsupported by streaming engine.
# eval instant at 5m histogram_avg(balanced_histogram)
# {} 0
eval instant at 5m histogram_avg(balanced_histogram)
{} 0

# Unsupported by streaming engine.
# eval instant at 5m histogram_fraction(0, 4, balanced_histogram)
Expand Down Expand Up @@ -1202,9 +1191,8 @@ eval instant at 5m rate(const_histogram[5m])

# Zero buckets mean no observations, thus the denominator in the average is 0
# leading to 0/0, which is NaN.
# Unsupported by streaming engine.
# eval instant at 5m histogram_avg(rate(const_histogram[5m]))
# {} NaN
eval instant at 5m histogram_avg(rate(const_histogram[5m]))
{} NaN

# Zero buckets mean no observations, so count is 0.
eval instant at 5m histogram_count(rate(const_histogram[5m]))
Expand Down

0 comments on commit b73cba2

Please sign in to comment.