From 5a0c48153e2a013b991831ed0aafebb8423b171d Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 6 Nov 2024 20:31:33 +0100 Subject: [PATCH] Fix regression in bloom gateway that caused nothing to be filtered Pull request #14661 added the series labels to the FilterChunkRefs request in order to be able to filter out full series that do not contain the filter key when using bloom filters. However, this chage incorrectly assumed that `Chunk.Metric` from the `GetChunks()` call contains the labels of the stream. This information is only populated once the chunk is fetched, though. Therefore the list of labels was empty, and no chunk refs were filtered. Signed-off-by: Christian Haudum --- pkg/indexgateway/gateway.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pkg/indexgateway/gateway.go b/pkg/indexgateway/gateway.go index dfeb2935918d7..f535c7fce3776 100644 --- a/pkg/indexgateway/gateway.go +++ b/pkg/indexgateway/gateway.go @@ -209,6 +209,8 @@ func buildResponses(query seriesindex.Query, batch seriesindex.ReadBatchResult, func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequest) (result *logproto.GetChunkRefResponse, err error) { logger := util_log.WithContext(ctx, g.log) + sp, ctx := opentracing.StartSpanFromContext(ctx, "indexgateway.GetChunkRef") + defer sp.Finish() instanceID, err := tenant.TenantID(ctx) if err != nil { @@ -225,16 +227,12 @@ func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequ return nil, err } - series := make(map[uint64]labels.Labels) result = &logproto.GetChunkRefResponse{ Refs: make([]*logproto.ChunkRef, 0, len(chunks)), } for _, cs := range chunks { for i := range cs { result.Refs = append(result.Refs, &cs[i].ChunkRef) - if _, ok := series[cs[i].Fingerprint]; !ok { - series[cs[i].Fingerprint] = cs[i].Metric - } } } @@ -261,10 +259,22 @@ func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequ return result, nil } - chunkRefs, used, err := g.bloomQuerier.FilterChunkRefs(ctx, instanceID, req.From, req.Through, series, result.Refs, req.Plan) + // Doing a "duplicate" index lookup is not ideal, + // however, modifying the GetChunkRef() response, which contains the logproto.ChunkRef is neither. + start := time.Now() + series, err := g.indexQuerier.GetSeries(ctx, instanceID, req.From, req.Through, matchers...) + seriesMap := make(map[uint64]labels.Labels, len(series)) + for _, s := range series { + seriesMap[s.Hash()] = s + } + sp.LogKV("msg", "indexQuerier.GetSeries", "duration", time.Since(start), "count", len(series)) + + start = time.Now() + chunkRefs, used, err := g.bloomQuerier.FilterChunkRefs(ctx, instanceID, req.From, req.Through, seriesMap, result.Refs, req.Plan) if err != nil { return nil, err } + sp.LogKV("msg", "bloomQuerier.FilterChunkRefs", "duration", time.Since(start)) result.Refs = chunkRefs level.Info(logger).Log("msg", "return filtered chunk refs", "unfiltered", initialChunkCount, "filtered", len(result.Refs), "used_blooms", used)