Skip to content

Commit 30df31e

Browse files
authored
feat: Tune Patterns query drain instance (#13137)
1 parent 6e119aa commit 30df31e

File tree

4 files changed

+74
-17
lines changed

4 files changed

+74
-17
lines changed

pkg/pattern/drain/drain.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ func (d *Drain) train(tokens []string, stringer func([]string) string, ts int64)
220220

221221
func (d *Drain) TrainPattern(content string, samples []*logproto.PatternSample) *LogCluster {
222222
tokens := deduplicatePlaceholders(d.tokenizer.Tokenize(content), d.config.ParamString)
223-
matchCluster := d.treeSearch(d.rootNode, tokens, d.config.SimTh, false)
223+
matchCluster := d.treeSearch(d.rootNode, tokens, d.config.SimTh, true)
224224
// Match no existing log cluster
225225
if matchCluster == nil {
226226
d.clustersCounter++

pkg/pattern/ingester_querier.go

+16-8
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ type IngesterQuerier struct {
2525

2626
ringClient *RingClient
2727

28-
registerer prometheus.Registerer
28+
registerer prometheus.Registerer
29+
ingesterQuerierMetrics *ingesterQuerierMetrics
2930
}
3031

3132
func NewIngesterQuerier(
@@ -36,10 +37,11 @@ func NewIngesterQuerier(
3637
logger log.Logger,
3738
) (*IngesterQuerier, error) {
3839
return &IngesterQuerier{
39-
logger: log.With(logger, "component", "pattern-ingester-querier"),
40-
ringClient: ringClient,
41-
cfg: cfg,
42-
registerer: prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", registerer),
40+
logger: log.With(logger, "component", "pattern-ingester-querier"),
41+
ringClient: ringClient,
42+
cfg: cfg,
43+
registerer: prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", registerer),
44+
ingesterQuerierMetrics: newIngesterQuerierMetrics(registerer, metricsNamespace),
4345
}, nil
4446
}
4547

@@ -63,11 +65,15 @@ func (q *IngesterQuerier) Patterns(ctx context.Context, req *logproto.QueryPatte
6365
if err != nil {
6466
return nil, err
6567
}
66-
return prunePatterns(resp, minClusterSize), nil
68+
return prunePatterns(resp, minClusterSize, q.ingesterQuerierMetrics), nil
6769
}
6870

69-
func prunePatterns(resp *logproto.QueryPatternsResponse, minClusterSize int) *logproto.QueryPatternsResponse {
70-
d := drain.New(drain.DefaultConfig(), nil)
71+
func prunePatterns(resp *logproto.QueryPatternsResponse, minClusterSize int, metrics *ingesterQuerierMetrics) *logproto.QueryPatternsResponse {
72+
pruneConfig := drain.DefaultConfig()
73+
pruneConfig.SimTh = 1.0 // Merge & de-dup patterns but don't modify them
74+
75+
patternsBefore := len(resp.Series)
76+
d := drain.New(pruneConfig, nil)
7177
for _, p := range resp.Series {
7278
d.TrainPattern(p.Pattern, p.Samples)
7379
}
@@ -86,6 +92,8 @@ func prunePatterns(resp *logproto.QueryPatternsResponse, minClusterSize int) *lo
8692
Samples: cluster.Samples(),
8793
})
8894
}
95+
metrics.patternsPrunedTotal.Add(float64(patternsBefore - len(resp.Series)))
96+
metrics.patternsRetainedTotal.Add(float64(len(resp.Series)))
8997
return resp
9098
}
9199

pkg/pattern/ingester_querier_test.go

+35-8
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@ import (
55
"os"
66
"testing"
77

8+
"github.com/prometheus/client_golang/prometheus"
89
"github.com/stretchr/testify/require"
910
"golang.org/x/exp/slices"
1011

1112
"github.com/grafana/loki/v3/pkg/logproto"
1213
)
1314

1415
func Test_prunePatterns(t *testing.T) {
15-
file, err := os.Open(`testdata/patterns.txt`)
16+
file, err := os.Open("testdata/patterns.txt")
1617
require.NoError(t, err)
1718
defer file.Close()
1819

@@ -24,15 +25,40 @@ func Test_prunePatterns(t *testing.T) {
2425
})
2526
}
2627
require.NoError(t, scanner.Err())
27-
prunePatterns(resp, 0)
28+
29+
startingPatterns := len(resp.Series)
30+
prunePatterns(resp, 0, newIngesterQuerierMetrics(prometheus.DefaultRegisterer, `test`))
2831

2932
expectedPatterns := []string{
30-
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=<_>`,
31-
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" <_> partitionID=<_> +0000 UTC, <_>`,
32-
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=<_> +0000 UTC, <_>`,
33-
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=<_> sampleTimestamp=2024-04-03 <_> +0000 UTC, <_>`,
34-
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=<_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
35-
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=<_> handledMessageTime="2024-04-03 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
33+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=0 <_>`,
34+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=1 <_>`,
35+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=2 <_>`,
36+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=3 <_>`,
37+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=4 <_>`,
38+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=5 <_>`,
39+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=6 <_>`,
40+
`<_> caller=aggregator.go:139 level=info msg="received kafka message" topic=cortex-dev-01-aggregations partition=7 <_>`,
41+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" <_> partitionID=0, <_> +0000 UTC, <_>`,
42+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" <_> partitionID=7, <_> +0000 UTC, <_>`,
43+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=0, <_> +0000 UTC, <_>`,
44+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=1, <_> +0000 UTC, <_>`,
45+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=2, <_> +0000 UTC, <_>`,
46+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=3, <_> +0000 UTC, <_>`,
47+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=3, <_> sampleTimestamp=2024-04-03 <_> +0000 UTC, <_>`,
48+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=4, <_> +0000 UTC, <_>`,
49+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=4, <_> sampleTimestamp=2024-04-03 <_> +0000 UTC, <_>`,
50+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=5, <_> +0000 UTC, <_>`,
51+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=5, <_> sampleTimestamp=2024-04-03 <_> +0000 UTC, <_>`,
52+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=6, <_> +0000 UTC, <_>`,
53+
`<_> caller=batcher.go:155 level=info msg="batcher:processing aggregation result" result="user=9960, partitionID=7, <_> +0000 UTC, <_>`,
54+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=0 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
55+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=1 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
56+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=2 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
57+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=3 handledMessageTime="2024-04-03 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
58+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=4 handledMessageTime="2024-04-03 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
59+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=5 handledMessageTime="2024-04-03 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
60+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=6 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
61+
`<_> caller=offset_committer.go:174 level=info msg="partition offset committer committed offset" topic=cortex-dev-01-aggregations partition=7 <_> +0000 UTC" <_> +0000 UTC" <_> currentBuckets="unsupported value type"`,
3662
`<_> caller=wrapper.go:48 level=info component=distributor msg="sample remote write" eventType=bi <_>`,
3763
}
3864

@@ -43,4 +69,5 @@ func Test_prunePatterns(t *testing.T) {
4369
slices.Sort(patterns)
4470

4571
require.Equal(t, expectedPatterns, patterns)
72+
require.Less(t, len(patterns), startingPatterns, `prunePatterns should remove duplicates`)
4673
}

pkg/pattern/metrics.go

+22
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,25 @@ func newIngesterMetrics(r prometheus.Registerer, metricsNamespace string) *inges
3333
}),
3434
}
3535
}
36+
37+
type ingesterQuerierMetrics struct {
38+
patternsPrunedTotal prometheus.Counter
39+
patternsRetainedTotal prometheus.Counter
40+
}
41+
42+
func newIngesterQuerierMetrics(r prometheus.Registerer, metricsNamespace string) *ingesterQuerierMetrics {
43+
return &ingesterQuerierMetrics{
44+
patternsPrunedTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
45+
Namespace: metricsNamespace,
46+
Subsystem: "pattern_ingester",
47+
Name: "query_pruned_total",
48+
Help: "The total number of patterns removed at query time by the pruning Drain instance",
49+
}),
50+
patternsRetainedTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
51+
Namespace: metricsNamespace,
52+
Subsystem: "pattern_ingester",
53+
Name: "query_retained_total",
54+
Help: "The total number of patterns retained at query time by the pruning Drain instance",
55+
}),
56+
}
57+
}

0 commit comments

Comments
 (0)