grafana · salvacorts · Oct 31, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
@@ -44,9 +44,9 @@ func groupRefs(t *testing.T, chunkRefs []*logproto.ChunkRef) []*logproto.Grouped
 	t.Helper()
 	grouped := groupChunkRefs(nil, chunkRefs, nil)
 	// Put fake labels to the series
-	for i, g := range grouped {
+	for _, g := range grouped {
 		g.Labels = &logproto.IndexSeries{
-			Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", fmt.Sprintf("%d", i))),
+			Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", fmt.Sprintf("%d", g.Fingerprint))),
 		}
 	}
 
@@ -305,17 +305,17 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {
 					{From: 1696248000000, Through: 1696251600000, Checksum: 2},
 					{From: 1696244400000, Through: 1696248000000, Checksum: 4},
 				}, Labels: &logproto.IndexSeries{
-					Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", "0")),
+					Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", "1000")),
 				}},
 				{Fingerprint: 2000, Tenant: tenantID, Refs: []*logproto.ShortRef{
 					{From: 1696255200000, Through: 1696258800000, Checksum: 3},
 				}, Labels: &logproto.IndexSeries{
-					Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", "1")),
+					Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", "2000")),
 				}},
 				{Fingerprint: 3000, Tenant: tenantID, Refs: []*logproto.ShortRef{
 					{From: 1696240800000, Through: 1696244400000, Checksum: 1},
 				}, Labels: &logproto.IndexSeries{
-					Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", "2")),
+					Labels: logproto.FromLabelsToLabelAdapters(labels.FromStrings("foo", "3000")),
 				}},
 			},
 		}, res)

@@ -72,6 +72,7 @@ func (e extractor) Extract(start, end int64, r resultscache.Response, _, _ int64
 		if len(refs) > 0 {
 			chunkRefs = append(chunkRefs, &logproto.GroupedChunkRefs{
 				Fingerprint: chunkRef.Fingerprint,
+				Labels:      chunkRef.Labels,
 				Tenant:      chunkRef.Tenant,
 				Refs:        refs,
 			})

@@ -322,6 +322,7 @@ func mergeSeries(input [][]*logproto.GroupedChunkRefs, buf []*logproto.GroupedCh
 			}
 			return &logproto.GroupedChunkRefs{
 				Fingerprint: a.Fingerprint,
+				Labels:      a.Labels,
 				Tenant:      a.Tenant,
 				Refs:        mergeChunkSets(a.Refs, b.Refs),
 			}

@@ -391,6 +391,7 @@ func (m *FilterChunkRefRequest) WithStartEndForCache(start, end time.Time) resul
 		if len(refs) > 0 {
 			chunkRefs = append(chunkRefs, &GroupedChunkRefs{
 				Fingerprint: chunkRef.Fingerprint,
+				Labels:      chunkRef.Labels,
 				Tenant:      chunkRef.Tenant,
 				Refs:        refs,
 			})

@@ -157,45 +157,40 @@ func (sm stringMatcherTest) Matches(series labels.Labels, bloom filter.Checker)
 	// 2. It should be possible to test for just the key
 
 	var (
-		combined = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value)
-
-		rawKey      = unsafe.Slice(unsafe.StringData(sm.matcher.Key), len(sm.matcher.Key))
+		combined    = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value)
 		rawCombined = unsafe.Slice(unsafe.StringData(combined), len(combined))
 	)
 
-	return sm.match(series, bloom, rawKey, rawCombined)
+	return sm.match(series, bloom, rawCombined)
 }
 
 func (sm stringMatcherTest) MatchesWithPrefixBuf(series labels.Labels, bloom filter.Checker, buf []byte, prefixLen int) bool {
 	var (
-		combined = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value)
-
-		prefixedKey      = appendToBuf(buf, prefixLen, sm.matcher.Key)
+		combined         = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value)
 		prefixedCombined = appendToBuf(buf, prefixLen, combined)
 	)
 
-	return sm.match(series, bloom, prefixedKey, prefixedCombined)
+	return sm.match(series, bloom, prefixedCombined)
 }
 
-func (sm stringMatcherTest) match(series labels.Labels, bloom filter.Checker, key []byte, combined []byte) bool {
-	// If the label is part of the series, we cannot check the bloom since
-	// the label is not structured metadata
-	if value := series.Get(sm.matcher.Key); value != "" {
-		// If the series label value is the same as the matcher value, we cannot filter out this chunk.
-		// Otherwise, we can filter out this chunk.
-		// E.g. `{env="prod"} | env="prod"` should not filter out the chunk.
-		// E.g. `{env="prod"} | env="dev"` should filter out the chunk.
-		// E.g. `{env="prod"} | env=""` should filter out the chunk.
-		return value == sm.matcher.Value
+// match returns true if the series matches the matcher or is in the bloom filter.
+// TODO(salvacorts): support filtering out chunks for labels overriden by structurdd metadata.
+// We'd need passing a list of structured metadata fields similarly to how we pass the series.
+// SEE: https://github.com/grafana/loki/pull/14661#discussion_r1824228343
+func (sm stringMatcherTest) match(series labels.Labels, bloom filter.Checker, combined []byte) bool {
+	// If we don't have the series labels, we cannot disambiguate which labels come from the series in which case
+	// we may filter out chunks for queries like `{env="prod"} | env="prod"` if env=prod is not structured metadata
+	if len(series) == 0 {
+		return true
 	}
 
-	// To this point we know the label is structured metadata so if the label name is not
-	// in the bloom, we can filter out the chunk.
-	if !bloom.Test(key) {
-		return false
-	}
+	// It's in the series if the key is set and has the same value.
+	// By checking val != "" we handle `{env="prod"} | user=""`.
+	val := series.Get(sm.matcher.Key)
+	inSeries := val != "" && val == sm.matcher.Value
 
-	return bloom.Test(combined)
+	inBloom := bloom.Test(combined)
+	return inSeries || inBloom
 }
 
 // appendToBuf is the equivalent of append(buf[:prefixLen], str). len(buf) must

@@ -21,10 +21,11 @@ func TestLabelMatchersToBloomTest(t *testing.T) {
 			tokenizer,
 			push.LabelAdapter{Name: "trace_id", Value: "exists_1"},
 			push.LabelAdapter{Name: "trace_id", Value: "exists_2"},
+			push.LabelAdapter{Name: "app", Value: "other"},
 		)
 	)
 
-	series := labels.FromStrings("app", "fake")
+	series := labels.FromStrings("env", "prod", "app", "fake")
 	tt := []struct {
 		name  string
 		query string
@@ -66,15 +67,33 @@ func TestLabelMatchersToBloomTest(t *testing.T) {
 			match: false,
 		},
 		{
-			name:  "filter series label with different value",
-			query: `{app="fake"} | app="noexist"`,
+			name:  "ignore label from series",
+			query: `{app="fake"} | env="prod"`,
+			match: true,
+		},
+		{
+			name:  "filter label from series",
+			query: `{app="fake"} | env="dev"`, // env is set to prod in the series
 			match: false,
 		},
+		// We cannot support this test case until we can forward a list of structured metadata fields.
+		// We cannot check if the key is structured metadata using the bloom because these are probabilistic
+		// E.g. bloom.Test("env") may return true even if env is not structured metadata.
+		//{
+		//	name:  "filter label from series overridden by structured metadata",
+		//	query: `{app="fake"} | app="fake"`, // app is set to other in the structured metadata
+		//	match: false,
+		//},
 		{
-			name:  "ignore label from series",
-			query: `{app="fake"} | app="fake"`,
+			name:  "ignore label from series and structured metadata",
+			query: `{app="fake"} | app="other"`,
 			match: true,
 		},
+		{
+			name:  "filter series label with non-existing value",
+			query: `{app="fake"} | app="noexist"`,
+			match: false,
+		},
 		{
 			name:  "ignore label from series with empty value",
 			query: `{app="fake"} | app=""`,