Skip to content

Commit 04bc3a4

Browse files
authored
fix(blooms): ensure tokenizer cache is reset between series (grafana#13370)
1 parent a14755e commit 04bc3a4

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

pkg/storage/bloom/v1/bloom_tokenizer.go

+3
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,14 @@ func (bt *BloomTokenizer) newBloom() *Bloom {
9797
}
9898
}
9999

100+
// Populates a bloom filter(s) with the tokens from the given chunks.
101+
// Called once per series
100102
func (bt *BloomTokenizer) Populate(
101103
blooms SizedIterator[*Bloom],
102104
chks Iterator[ChunkRefWithIter],
103105
ch chan *BloomCreation,
104106
) {
107+
clear(bt.cache) // MUST always clear the cache before starting a new series
105108
var next bool
106109

107110
// All but the last bloom are considered full -- send back unaltered

pkg/storage/bloom/v1/bloom_tokenizer_test.go

+39
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,45 @@ func BenchmarkPopulateSeriesWithBloom(b *testing.B) {
288288
}
289289
}
290290

291+
func TestTokenizerClearsCacheBetweenPopulateCalls(t *testing.T) {
292+
bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, NewMetrics(nil))
293+
line := "foobarbazz"
294+
var blooms []*Bloom
295+
296+
for i := 0; i < 2; i++ {
297+
ch := make(chan *BloomCreation)
298+
itr, err := chunkRefItrFromLines(line)
299+
require.NoError(t, err)
300+
go bt.Populate(
301+
NewEmptyIter[*Bloom](),
302+
NewSliceIter([]ChunkRefWithIter{
303+
{
304+
Ref: ChunkRef{},
305+
Itr: itr,
306+
},
307+
}),
308+
ch,
309+
)
310+
var ct int
311+
for created := range ch {
312+
blooms = append(blooms, created.Bloom)
313+
ct++
314+
}
315+
// ensure we created one bloom for each call
316+
require.Equal(t, 1, ct)
317+
318+
}
319+
320+
for _, bloom := range blooms {
321+
toks := bt.lineTokenizer.Tokens(line)
322+
for toks.Next() {
323+
token := toks.At()
324+
require.True(t, bloom.Test(token))
325+
}
326+
require.NoError(t, toks.Err())
327+
}
328+
}
329+
291330
func BenchmarkMapClear(b *testing.B) {
292331
bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics)
293332
for i := 0; i < b.N; i++ {

0 commit comments

Comments
 (0)