Skip to content

Commit 51c42e8

Browse files
authored
feat: Do not add empty blooms to offsets (#14577)
1 parent 5824e3d commit 51c42e8

File tree

13 files changed

+260
-32
lines changed

13 files changed

+260
-32
lines changed

integration/bloom_building_test.go

+30-17
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,7 @@ func TestBloomBuilding(t *testing.T) {
6161
cliIngester.Now = now
6262

6363
// We now ingest some logs across many series.
64-
series := make([]labels.Labels, 0, nSeries)
65-
for i := 0; i < nSeries; i++ {
66-
lbs := labels.FromStrings("job", fmt.Sprintf("job-%d", i))
67-
series = append(series, lbs)
68-
69-
for j := 0; j < nLogsPerSeries; j++ {
70-
require.NoError(t, cliDistributor.PushLogLine(fmt.Sprintf("log line %d", j), now, nil, lbs.Map()))
71-
}
72-
}
64+
series := writeSeries(t, nSeries, nLogsPerSeries, cliDistributor, now, "job")
7365

7466
// restart ingester which should flush the chunks and index
7567
require.NoError(t, tIngester.Restart())
@@ -124,14 +116,8 @@ func TestBloomBuilding(t *testing.T) {
124116
checkSeriesInBlooms(t, now, tenantID, bloomStore, series)
125117

126118
// Push some more logs so TSDBs need to be updated.
127-
for i := 0; i < nSeries; i++ {
128-
lbs := labels.FromStrings("job", fmt.Sprintf("job-new-%d", i))
129-
series = append(series, lbs)
130-
131-
for j := 0; j < nLogsPerSeries; j++ {
132-
require.NoError(t, cliDistributor.PushLogLine(fmt.Sprintf("log line %d", j), now, nil, lbs.Map()))
133-
}
134-
}
119+
newSeries := writeSeries(t, nSeries, nLogsPerSeries, cliDistributor, now, "job-new")
120+
series = append(series, newSeries...)
135121

136122
// restart ingester which should flush the chunks and index
137123
require.NoError(t, tIngester.Restart())
@@ -147,6 +133,33 @@ func TestBloomBuilding(t *testing.T) {
147133
checkSeriesInBlooms(t, now, tenantID, bloomStore, series)
148134
}
149135

136+
func writeSeries(t *testing.T, nSeries int, nLogsPerSeries int, cliDistributor *client.Client, now time.Time, seriesPrefix string) []labels.Labels {
137+
series := make([]labels.Labels, 0, nSeries)
138+
for i := 0; i < nSeries; i++ {
139+
lbs := labels.FromStrings("job", fmt.Sprintf("%s-%d", seriesPrefix, i))
140+
series = append(series, lbs)
141+
142+
for j := 0; j < nLogsPerSeries; j++ {
143+
// Only write wtructured metadata for half of the series
144+
var metadata map[string]string
145+
if i%2 == 0 {
146+
metadata = map[string]string{
147+
"traceID": fmt.Sprintf("%d%d", i, j),
148+
"user": fmt.Sprintf("%d%d", i, j%10),
149+
}
150+
}
151+
152+
require.NoError(t, cliDistributor.PushLogLine(
153+
fmt.Sprintf("log line %d", j),
154+
now,
155+
metadata,
156+
lbs.Map(),
157+
))
158+
}
159+
}
160+
return series
161+
}
162+
150163
func checkCompactionFinished(t *testing.T, cliCompactor *client.Client) {
151164
checkForTimestampMetric(t, cliCompactor, "loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds")
152165
}

pkg/bloombuild/builder/spec.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ func (s *SimpleBloomGenerator) Generate(ctx context.Context) *LazyBlockBuilderIt
137137
)
138138
}
139139

140-
return NewLazyBlockBuilderIterator(ctx, s.opts, s.metrics, s.populator(ctx), s.writerReaderFunc, series, s.blocksIter)
140+
return NewLazyBlockBuilderIterator(ctx, s.opts, s.metrics, s.logger, s.populator(ctx), s.writerReaderFunc, series, s.blocksIter)
141141
}
142142

143143
// LazyBlockBuilderIterator is a lazy iterator over blocks that builds
@@ -146,6 +146,7 @@ type LazyBlockBuilderIterator struct {
146146
ctx context.Context
147147
opts v1.BlockOptions
148148
metrics *v1.Metrics
149+
logger log.Logger
149150
populate v1.BloomPopulatorFunc
150151
writerReaderFunc func() (v1.BlockWriter, v1.BlockReader)
151152
series iter.PeekIterator[*v1.Series]
@@ -160,6 +161,7 @@ func NewLazyBlockBuilderIterator(
160161
ctx context.Context,
161162
opts v1.BlockOptions,
162163
metrics *v1.Metrics,
164+
logger log.Logger,
163165
populate v1.BloomPopulatorFunc,
164166
writerReaderFunc func() (v1.BlockWriter, v1.BlockReader),
165167
series iter.PeekIterator[*v1.Series],
@@ -169,6 +171,7 @@ func NewLazyBlockBuilderIterator(
169171
ctx: ctx,
170172
opts: opts,
171173
metrics: metrics,
174+
logger: logger,
172175
populate: populate,
173176
writerReaderFunc: writerReaderFunc,
174177
series: series,
@@ -196,7 +199,7 @@ func (b *LazyBlockBuilderIterator) Next() bool {
196199
return false
197200
}
198201

199-
mergeBuilder := v1.NewMergeBuilder(b.blocks, b.series, b.populate, b.metrics)
202+
mergeBuilder := v1.NewMergeBuilder(b.blocks, b.series, b.populate, b.metrics, b.logger)
200203
writer, reader := b.writerReaderFunc()
201204
blockBuilder, err := v1.NewBlockBuilder(b.opts, writer)
202205
if err != nil {

pkg/storage/bloom/v1/bloom_builder.go

+12
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ func NewBloomBlockBuilder(opts BlockOptions, writer io.WriteCloser) *BloomBlockB
2828
}
2929
}
3030

31+
func (b *BloomBlockBuilder) UnflushedSize() int {
32+
return b.scratch.Len() + b.page.UnflushedSize()
33+
}
34+
3135
func (b *BloomBlockBuilder) Append(bloom *Bloom) (BloomOffset, error) {
3236
if !b.writtenSchema {
3337
if err := b.writeSchema(); err != nil {
@@ -68,6 +72,14 @@ func (b *BloomBlockBuilder) writeSchema() error {
6872
}
6973

7074
func (b *BloomBlockBuilder) Close() (uint32, error) {
75+
if !b.writtenSchema {
76+
// We will get here only if we haven't appended any bloom filters to the block
77+
// This would happen only if all series yielded empty blooms
78+
if err := b.writeSchema(); err != nil {
79+
return 0, errors.Wrap(err, "writing schema")
80+
}
81+
}
82+
7183
if b.page.Count() > 0 {
7284
if err := b.flushPage(); err != nil {
7385
return 0, errors.Wrap(err, "flushing final bloom page")

pkg/storage/bloom/v1/builder.go

+15
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"hash"
66
"io"
77

8+
"github.com/go-kit/log"
9+
"github.com/go-kit/log/level"
810
"github.com/pkg/errors"
911

1012
"github.com/grafana/loki/v3/pkg/compression"
@@ -112,6 +114,10 @@ func (w *PageWriter) Reset() {
112114
w.n = 0
113115
}
114116

117+
func (w *PageWriter) UnflushedSize() int {
118+
return w.enc.Len()
119+
}
120+
115121
func (w *PageWriter) SpaceFor(numBytes int) bool {
116122
// if a single bloom exceeds the target size, still accept it
117123
// otherwise only accept it if adding it would not exceed the target size
@@ -189,6 +195,7 @@ type MergeBuilder struct {
189195
// Add chunks of a single series to a bloom
190196
populate BloomPopulatorFunc
191197
metrics *Metrics
198+
logger log.Logger
192199
}
193200

194201
type BloomPopulatorFunc func(series *Series, preExistingBlooms iter.SizedIterator[*Bloom], chunksToAdd ChunkRefs, ch chan *BloomCreation)
@@ -202,6 +209,7 @@ func NewMergeBuilder(
202209
store iter.Iterator[*Series],
203210
populate BloomPopulatorFunc,
204211
metrics *Metrics,
212+
logger log.Logger,
205213
) *MergeBuilder {
206214
// combinedSeriesIter handles series with fingerprint collisions:
207215
// because blooms dont contain the label-set (only the fingerprint),
@@ -229,6 +237,7 @@ func NewMergeBuilder(
229237
store: combinedSeriesIter,
230238
populate: populate,
231239
metrics: metrics,
240+
logger: logger,
232241
}
233242
}
234243

@@ -306,6 +315,12 @@ func (mb *MergeBuilder) processNextSeries(
306315
if creation.Err != nil {
307316
return nil, info.sourceBytes, 0, false, false, errors.Wrap(creation.Err, "populating bloom")
308317
}
318+
319+
if creation.Bloom.IsEmpty() {
320+
level.Debug(mb.logger).Log("msg", "received empty bloom. Adding to index but skipping offsets", "fingerprint", nextInStore.Fingerprint)
321+
continue
322+
}
323+
309324
offset, err := builder.AddBloom(creation.Bloom)
310325
if err != nil {
311326
return nil, info.sourceBytes, 0, false, false, errors.Wrapf(

pkg/storage/bloom/v1/builder_test.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"sort"
77
"testing"
88

9+
"github.com/go-kit/log"
910
"github.com/prometheus/common/model"
1011
"github.com/stretchr/testify/require"
1112

@@ -263,7 +264,7 @@ func TestMergeBuilder(t *testing.T) {
263264
)
264265

265266
// Ensure that the merge builder combines all the blocks correctly
266-
mergeBuilder := NewMergeBuilder(dedupedBlocks(blocks), storeItr, populate, NewMetrics(nil))
267+
mergeBuilder := NewMergeBuilder(dedupedBlocks(blocks), storeItr, populate, NewMetrics(nil), log.NewNopLogger())
267268
indexBuf := bytes.NewBuffer(nil)
268269
bloomsBuf := bytes.NewBuffer(nil)
269270
writer := NewMemoryBlockWriter(indexBuf, bloomsBuf)
@@ -350,6 +351,8 @@ func TestMergeBuilderFingerprintCollision(t *testing.T) {
350351
// We're not testing the ability to extend a bloom in this test
351352
pop := func(_ *Series, _ iter.SizedIterator[*Bloom], _ ChunkRefs, ch chan *BloomCreation) {
352353
bloom := NewBloom()
354+
// Add something to the bloom so it's not empty
355+
bloom.Add([]byte("hello"))
353356
stats := indexingInfo{
354357
sourceBytes: int(bloom.Capacity()) / 8,
355358
indexedFields: NewSetFromLiteral[Field]("__all__"),
@@ -367,6 +370,7 @@ func TestMergeBuilderFingerprintCollision(t *testing.T) {
367370
iter.NewSliceIter(data),
368371
pop,
369372
NewMetrics(nil),
373+
log.NewNopLogger(),
370374
)
371375

372376
_, _, err = mergeBuilder.Build(builder)
@@ -539,6 +543,7 @@ func TestMergeBuilder_Roundtrip(t *testing.T) {
539543
dedupedStore,
540544
pop,
541545
NewMetrics(nil),
546+
log.NewNopLogger(),
542547
)
543548
builder, err := NewBlockBuilder(blockOpts, writer)
544549
require.Nil(t, err)

pkg/storage/bloom/v1/fuse.go

+48-5
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ func NewBloomRecorder(ctx context.Context, id string) *BloomRecorder {
3232
chunksSkipped: atomic.NewInt64(0),
3333
seriesMissed: atomic.NewInt64(0),
3434
chunksMissed: atomic.NewInt64(0),
35+
seriesEmpty: atomic.NewInt64(0),
36+
chunksEmpty: atomic.NewInt64(0),
3537
chunksFiltered: atomic.NewInt64(0),
3638
}
3739
}
@@ -45,6 +47,8 @@ type BloomRecorder struct {
4547
seriesSkipped, chunksSkipped *atomic.Int64
4648
// not found in bloom
4749
seriesMissed, chunksMissed *atomic.Int64
50+
// exists in block index but empty offsets
51+
seriesEmpty, chunksEmpty *atomic.Int64
4852
// filtered out
4953
chunksFiltered *atomic.Int64
5054
}
@@ -56,6 +60,8 @@ func (r *BloomRecorder) Merge(other *BloomRecorder) {
5660
r.chunksSkipped.Add(other.chunksSkipped.Load())
5761
r.seriesMissed.Add(other.seriesMissed.Load())
5862
r.chunksMissed.Add(other.chunksMissed.Load())
63+
r.seriesEmpty.Add(other.seriesEmpty.Load())
64+
r.chunksEmpty.Add(other.chunksEmpty.Load())
5965
r.chunksFiltered.Add(other.chunksFiltered.Load())
6066
}
6167

@@ -66,13 +72,15 @@ func (r *BloomRecorder) Report(logger log.Logger, metrics *Metrics) {
6672
seriesFound = r.seriesFound.Load()
6773
seriesSkipped = r.seriesSkipped.Load()
6874
seriesMissed = r.seriesMissed.Load()
69-
seriesRequested = seriesFound + seriesSkipped + seriesMissed
75+
seriesEmpty = r.seriesEmpty.Load()
76+
seriesRequested = seriesFound + seriesSkipped + seriesMissed + seriesEmpty
7077

7178
chunksFound = r.chunksFound.Load()
7279
chunksSkipped = r.chunksSkipped.Load()
7380
chunksMissed = r.chunksMissed.Load()
7481
chunksFiltered = r.chunksFiltered.Load()
75-
chunksRequested = chunksFound + chunksSkipped + chunksMissed
82+
chunksEmpty = r.chunksEmpty.Load()
83+
chunksRequested = chunksFound + chunksSkipped + chunksMissed + chunksEmpty
7684
)
7785
level.Debug(logger).Log(
7886
"recorder_msg", "bloom search results",
@@ -82,37 +90,41 @@ func (r *BloomRecorder) Report(logger log.Logger, metrics *Metrics) {
8290
"recorder_series_found", seriesFound,
8391
"recorder_series_skipped", seriesSkipped,
8492
"recorder_series_missed", seriesMissed,
93+
"recorder_series_empty", seriesEmpty,
8594

8695
"recorder_chunks_requested", chunksRequested,
8796
"recorder_chunks_found", chunksFound,
8897
"recorder_chunks_skipped", chunksSkipped,
8998
"recorder_chunks_missed", chunksMissed,
99+
"recorder_chunks_empty", chunksEmpty,
90100
"recorder_chunks_filtered", chunksFiltered,
91101
)
92102

93103
if metrics != nil {
94104
metrics.recorderSeries.WithLabelValues(recorderRequested).Add(float64(seriesRequested))
95105
metrics.recorderSeries.WithLabelValues(recorderFound).Add(float64(seriesFound))
96106
metrics.recorderSeries.WithLabelValues(recorderSkipped).Add(float64(seriesSkipped))
107+
metrics.recorderSeries.WithLabelValues(recorderEmpty).Add(float64(seriesEmpty))
97108
metrics.recorderSeries.WithLabelValues(recorderMissed).Add(float64(seriesMissed))
98109

99110
metrics.recorderChunks.WithLabelValues(recorderRequested).Add(float64(chunksRequested))
100111
metrics.recorderChunks.WithLabelValues(recorderFound).Add(float64(chunksFound))
101112
metrics.recorderChunks.WithLabelValues(recorderSkipped).Add(float64(chunksSkipped))
102113
metrics.recorderChunks.WithLabelValues(recorderMissed).Add(float64(chunksMissed))
114+
metrics.recorderChunks.WithLabelValues(recorderEmpty).Add(float64(chunksEmpty))
103115
metrics.recorderChunks.WithLabelValues(recorderFiltered).Add(float64(chunksFiltered))
104116
}
105117
}
106118

107-
func (r *BloomRecorder) record(
108-
seriesFound, chunksFound, seriesSkipped, chunksSkipped, seriesMissed, chunksMissed, chunksFiltered int,
109-
) {
119+
func (r *BloomRecorder) record(seriesFound, chunksFound, seriesSkipped, chunksSkipped, seriesMissed, chunksMissed, seriesEmpty, chunksEmpty, chunksFiltered int) {
110120
r.seriesFound.Add(int64(seriesFound))
111121
r.chunksFound.Add(int64(chunksFound))
112122
r.seriesSkipped.Add(int64(seriesSkipped))
113123
r.chunksSkipped.Add(int64(chunksSkipped))
114124
r.seriesMissed.Add(int64(seriesMissed))
115125
r.chunksMissed.Add(int64(chunksMissed))
126+
r.seriesEmpty.Add(int64(seriesEmpty))
127+
r.chunksEmpty.Add(int64(chunksEmpty))
116128
r.chunksFiltered.Add(int64(chunksFiltered))
117129
}
118130

@@ -170,6 +182,7 @@ func (fq *FusedQuerier) recordMissingFp(
170182
0, 0, // found
171183
0, 0, // skipped
172184
1, len(input.Chks), // missed
185+
0, 0, // empty
173186
0, // chunks filtered
174187
)
175188
})
@@ -184,6 +197,22 @@ func (fq *FusedQuerier) recordSkippedFp(
184197
0, 0, // found
185198
1, len(input.Chks), // skipped
186199
0, 0, // missed
200+
0, 0, // empty
201+
0, // chunks filtered
202+
)
203+
})
204+
}
205+
206+
func (fq *FusedQuerier) recordEmptyFp(
207+
batch []Request,
208+
fp model.Fingerprint,
209+
) {
210+
fq.noRemovals(batch, fp, func(input Request) {
211+
input.Recorder.record(
212+
0, 0, // found
213+
0, 0, // skipped
214+
0, 0, // missed
215+
1, len(input.Chks), // empty
187216
0, // chunks filtered
188217
)
189218
})
@@ -280,6 +309,19 @@ func (fq *FusedQuerier) runSeries(_ Schema, series *SeriesWithMeta, reqs []Reque
280309
})
281310
}
282311

312+
if len(series.Offsets) == 0 {
313+
// We end up here for series with no structured metadata fields.
314+
// While building blooms, these series would yield empty blooms.
315+
// We add these series to the index of the block so we don't report them as missing,
316+
// but we don't filter any chunks for them.
317+
level.Debug(fq.logger).Log(
318+
"msg", "series with empty offsets",
319+
"fp", series.Fingerprint,
320+
)
321+
fq.recordEmptyFp(reqs, series.Fingerprint)
322+
return
323+
}
324+
283325
for i, offset := range series.Offsets {
284326
skip := fq.bq.blooms.LoadOffset(offset)
285327
if skip {
@@ -361,6 +403,7 @@ func (fq *FusedQuerier) runSeries(_ Schema, series *SeriesWithMeta, reqs []Reque
361403
1, len(inputs[i].InBlooms), // found
362404
0, 0, // skipped
363405
0, len(inputs[i].Missing), // missed
406+
0, 0, // empty
364407
len(removals), // filtered
365408
)
366409
req.Response <- Output{

pkg/storage/bloom/v1/index.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ func aggregateHeaders(xs []SeriesHeader) SeriesHeader {
153153
fromFp, _ := xs[0].Bounds.Bounds()
154154
_, throughFP := xs[len(xs)-1].Bounds.Bounds()
155155
res := SeriesHeader{
156-
Bounds: NewBounds(fromFp, throughFP),
156+
NumSeries: len(xs),
157+
Bounds: NewBounds(fromFp, throughFP),
157158
}
158159

159160
for i, x := range xs {

0 commit comments

Comments
 (0)