Skip to content

Commit 7c86e65

Browse files
authored
fix: Fixes pattern pruning stability (#13429)
1 parent 2affa48 commit 7c86e65

File tree

3 files changed

+147
-220
lines changed

3 files changed

+147
-220
lines changed

pkg/pattern/ingester_querier.go

+72-44
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,27 @@ import (
55
"errors"
66
"math"
77
"net/http"
8+
"sort"
89

910
"github.com/go-kit/log"
1011
"github.com/go-kit/log/level"
1112
"github.com/grafana/dskit/httpgrpc"
1213
"github.com/grafana/dskit/ring"
1314
"github.com/prometheus/client_golang/prometheus"
15+
"golang.org/x/sync/errgroup"
1416

1517
"github.com/grafana/loki/v3/pkg/logproto"
1618
"github.com/grafana/loki/v3/pkg/logql/syntax"
17-
"github.com/grafana/loki/v3/pkg/pattern/drain"
1819

1920
loki_iter "github.com/grafana/loki/v3/pkg/iter"
2021
pattern_iter "github.com/grafana/loki/v3/pkg/pattern/iter"
2122
)
2223

2324
// TODO(kolesnikovae): parametrise QueryPatternsRequest
24-
const minClusterSize = 30
25+
const (
26+
minClusterSize = 30
27+
maxPatterns = 300
28+
)
2529

2630
var ErrParseQuery = errors.New("only byte_over_time and count_over_time queries without filters are supported")
2731

@@ -132,36 +136,63 @@ func (q *IngesterQuerier) querySample(ctx context.Context, req *logproto.QuerySa
132136
return iterators, nil
133137
}
134138

135-
func prunePatterns(resp *logproto.QueryPatternsResponse, minClusterSize int, metrics *ingesterQuerierMetrics) *logproto.QueryPatternsResponse {
136-
pruneConfig := drain.DefaultConfig()
137-
pruneConfig.SimTh = 1.0 // Merge & de-dup patterns but don't modify them
138-
139+
func prunePatterns(resp *logproto.QueryPatternsResponse, minClusterSize int64, metrics *ingesterQuerierMetrics) *logproto.QueryPatternsResponse {
139140
patternsBefore := len(resp.Series)
140-
d := drain.New(pruneConfig, "", nil)
141-
for _, p := range resp.Series {
142-
d.TrainPattern(p.GetPattern(), p.Samples)
141+
total := make([]int64, len(resp.Series))
142+
143+
for i, p := range resp.Series {
144+
for _, s := range p.Samples {
145+
total[i] += s.Value
146+
}
143147
}
144148

145-
resp.Series = resp.Series[:0]
146-
for _, cluster := range d.Clusters() {
147-
if cluster.Size < minClusterSize {
148-
continue
149+
// Create a slice of structs to keep Series and total together
150+
type SeriesWithTotal struct {
151+
Series *logproto.PatternSeries
152+
Total int64
153+
}
154+
155+
seriesWithTotals := make([]SeriesWithTotal, len(resp.Series))
156+
for i := range resp.Series {
157+
seriesWithTotals[i] = SeriesWithTotal{
158+
Series: resp.Series[i],
159+
Total: total[i],
149160
}
150-
pattern := d.PatternString(cluster)
151-
if pattern == "" {
152-
continue
161+
}
162+
163+
// Sort the slice of structs by the Total field
164+
sort.Slice(seriesWithTotals, func(i, j int) bool {
165+
return seriesWithTotals[i].Total > seriesWithTotals[j].Total
166+
})
167+
168+
// Initialize a variable to keep track of the position for valid series
169+
pos := 0
170+
171+
// Iterate over the seriesWithTotals
172+
for i := range seriesWithTotals {
173+
if seriesWithTotals[i].Total >= minClusterSize {
174+
// Place the valid series at the current position
175+
resp.Series[pos] = seriesWithTotals[i].Series
176+
pos++
153177
}
154-
resp.Series = append(resp.Series,
155-
logproto.NewPatternSeries(pattern, cluster.Samples()))
156178
}
179+
180+
// Slice the resp.Series to include only the valid series
181+
resp.Series = resp.Series[:pos]
182+
183+
if len(resp.Series) > maxPatterns {
184+
resp.Series = resp.Series[:maxPatterns]
185+
}
186+
157187
metrics.patternsPrunedTotal.Add(float64(patternsBefore - len(resp.Series)))
158188
metrics.patternsRetainedTotal.Add(float64(len(resp.Series)))
189+
159190
return resp
160191
}
161192

162193
// ForAllIngesters runs f, in parallel, for all ingesters
163194
func (q *IngesterQuerier) forAllIngesters(ctx context.Context, f func(context.Context, logproto.PatternClient) (interface{}, error)) ([]ResponseFromIngesters, error) {
164-
replicationSet, err := q.ringClient.Ring().GetReplicationSetForOperation(ring.Read)
195+
replicationSet, err := q.ringClient.Ring().GetAllHealthy(ring.Read)
165196
if err != nil {
166197
return nil, err
167198
}
@@ -174,32 +205,29 @@ type ResponseFromIngesters struct {
174205
response interface{}
175206
}
176207

177-
// forGivenIngesters runs f, in parallel, for given ingesters
178208
func (q *IngesterQuerier) forGivenIngesters(ctx context.Context, replicationSet ring.ReplicationSet, f func(context.Context, logproto.PatternClient) (interface{}, error)) ([]ResponseFromIngesters, error) {
179-
cfg := ring.DoUntilQuorumConfig{
180-
// Nothing here
181-
}
182-
results, err := ring.DoUntilQuorum(ctx, replicationSet, cfg, func(ctx context.Context, ingester *ring.InstanceDesc) (ResponseFromIngesters, error) {
183-
client, err := q.ringClient.Pool().GetClientFor(ingester.Addr)
184-
if err != nil {
185-
return ResponseFromIngesters{addr: ingester.Addr}, err
186-
}
187-
188-
resp, err := f(ctx, client.(logproto.PatternClient))
189-
if err != nil {
190-
return ResponseFromIngesters{addr: ingester.Addr}, err
191-
}
192-
193-
return ResponseFromIngesters{ingester.Addr, resp}, nil
194-
}, func(ResponseFromIngesters) {
195-
// Nothing to do
196-
})
197-
if err != nil {
209+
g, ctx := errgroup.WithContext(ctx)
210+
responses := make([]ResponseFromIngesters, len(replicationSet.Instances))
211+
212+
for i, ingester := range replicationSet.Instances {
213+
ingester := ingester
214+
i := i
215+
g.Go(func() error {
216+
client, err := q.ringClient.Pool().GetClientFor(ingester.Addr)
217+
if err != nil {
218+
return err
219+
}
220+
221+
resp, err := f(ctx, client.(logproto.PatternClient))
222+
if err != nil {
223+
return err
224+
}
225+
responses[i] = ResponseFromIngesters{addr: ingester.Addr, response: resp}
226+
return nil
227+
})
228+
}
229+
if err := g.Wait(); err != nil {
198230
return nil, err
199231
}
200-
201-
responses := make([]ResponseFromIngesters, 0, len(results))
202-
responses = append(responses, results...)
203-
204-
return responses, err
232+
return responses, nil
205233
}

0 commit comments

Comments
 (0)