Skip to content

Commit 41c5ee2

Browse files
authored
fix(regression): reverts #13039 to prevent use-after-free corruptions (#13162)
1 parent 9823f20 commit 41c5ee2

File tree

21 files changed

+98
-680
lines changed

21 files changed

+98
-680
lines changed

pkg/bloombuild/builder/spec_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func dummyBloomGen(t *testing.T, opts v1.BlockOptions, store v1.Iterator[*v1.Ser
7474
for i, b := range blocks {
7575
bqs = append(bqs, &bloomshipper.CloseableBlockQuerier{
7676
BlockRef: refs[i],
77-
BlockQuerier: v1.NewBlockQuerier(b, &v1.SimpleHeapAllocator{}, v1.DefaultMaxPageSize),
77+
BlockQuerier: v1.NewBlockQuerier(b, false, v1.DefaultMaxPageSize),
7878
})
7979
}
8080

@@ -152,7 +152,7 @@ func TestSimpleBloomGenerator(t *testing.T) {
152152
expectedRefs := v1.PointerSlice(data)
153153
outputRefs := make([]*v1.SeriesWithBloom, 0, len(data))
154154
for _, block := range outputBlocks {
155-
bq := v1.NewBlockQuerier(block, &v1.SimpleHeapAllocator{}, v1.DefaultMaxPageSize)
155+
bq := v1.NewBlockQuerier(block, false, v1.DefaultMaxPageSize)
156156
for bq.Next() {
157157
outputRefs = append(outputRefs, bq.At())
158158
}

pkg/bloomcompactor/spec_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func dummyBloomGen(t *testing.T, opts v1.BlockOptions, store v1.Iterator[*v1.Ser
7474
for i, b := range blocks {
7575
bqs = append(bqs, &bloomshipper.CloseableBlockQuerier{
7676
BlockRef: refs[i],
77-
BlockQuerier: v1.NewBlockQuerier(b, &v1.SimpleHeapAllocator{}, v1.DefaultMaxPageSize),
77+
BlockQuerier: v1.NewBlockQuerier(b, false, v1.DefaultMaxPageSize),
7878
})
7979
}
8080

@@ -152,7 +152,7 @@ func TestSimpleBloomGenerator(t *testing.T) {
152152
expectedRefs := v1.PointerSlice(data)
153153
outputRefs := make([]*v1.SeriesWithBloom, 0, len(data))
154154
for _, block := range outputBlocks {
155-
bq := v1.NewBlockQuerier(block, &v1.SimpleHeapAllocator{}, v1.DefaultMaxPageSize)
155+
bq := v1.NewBlockQuerier(block, false, v1.DefaultMaxPageSize)
156156
for bq.Next() {
157157
outputRefs = append(outputRefs, bq.At())
158158
}

pkg/bloomgateway/bloomgateway_test.go

+44
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,50 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {
215215
}
216216
})
217217

218+
t.Run("request cancellation does not result in channel locking", func(t *testing.T) {
219+
now := mktime("2024-01-25 10:00")
220+
221+
// replace store implementation and re-initialize workers and sub-services
222+
refs, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff)
223+
mockStore := newMockBloomStore(queriers, metas)
224+
mockStore.delay = 2000 * time.Millisecond
225+
226+
reg := prometheus.NewRegistry()
227+
gw, err := New(cfg, mockStore, logger, reg)
228+
require.NoError(t, err)
229+
230+
err = services.StartAndAwaitRunning(context.Background(), gw)
231+
require.NoError(t, err)
232+
t.Cleanup(func() {
233+
err = services.StopAndAwaitTerminated(context.Background(), gw)
234+
require.NoError(t, err)
235+
})
236+
237+
chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100)
238+
239+
// saturate workers
240+
// then send additional request
241+
for i := 0; i < gw.cfg.WorkerConcurrency+1; i++ {
242+
expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`)
243+
require.NoError(t, err)
244+
245+
req := &logproto.FilterChunkRefRequest{
246+
From: now.Add(-24 * time.Hour),
247+
Through: now,
248+
Refs: groupRefs(t, chunkRefs),
249+
Plan: plan.QueryPlan{AST: expr},
250+
Blocks: stringSlice(refs),
251+
}
252+
253+
ctx, cancelFn := context.WithTimeout(context.Background(), 500*time.Millisecond)
254+
ctx = user.InjectOrgID(ctx, tenantID)
255+
t.Cleanup(cancelFn)
256+
257+
res, err := gw.FilterChunkRefs(ctx, req)
258+
require.ErrorContainsf(t, err, context.DeadlineExceeded.Error(), "%+v", res)
259+
}
260+
})
261+
218262
t.Run("returns unfiltered chunk refs if no filters provided", func(t *testing.T) {
219263
now := mktime("2023-10-03 10:00")
220264

pkg/bloomgateway/util_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ func createBlocks(t *testing.T, tenant string, n int, from, through model.Time,
399399
// }
400400
// }
401401
querier := &bloomshipper.CloseableBlockQuerier{
402-
BlockQuerier: v1.NewBlockQuerier(block, &v1.SimpleHeapAllocator{}, v1.DefaultMaxPageSize),
402+
BlockQuerier: v1.NewBlockQuerier(block, false, v1.DefaultMaxPageSize),
403403
BlockRef: blockRef,
404404
}
405405
queriers = append(queriers, querier)

pkg/loki/modules.go

-15
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ import (
3535

3636
"github.com/grafana/loki/v3/pkg/bloomcompactor"
3737
"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
38-
v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1"
3938
"github.com/grafana/loki/v3/pkg/storage/types"
4039

4140
"github.com/grafana/loki/v3/pkg/analytics"
@@ -80,7 +79,6 @@ import (
8079
"github.com/grafana/loki/v3/pkg/util/httpreq"
8180
"github.com/grafana/loki/v3/pkg/util/limiter"
8281
util_log "github.com/grafana/loki/v3/pkg/util/log"
83-
"github.com/grafana/loki/v3/pkg/util/mempool"
8482
"github.com/grafana/loki/v3/pkg/util/querylimits"
8583
lokiring "github.com/grafana/loki/v3/pkg/util/ring"
8684
serverutil "github.com/grafana/loki/v3/pkg/util/server"
@@ -732,19 +730,6 @@ func (t *Loki) initBloomStore() (services.Service, error) {
732730
reg := prometheus.DefaultRegisterer
733731
bsCfg := t.Cfg.StorageConfig.BloomShipperConfig
734732

735-
// Set global BloomPageAllocator variable
736-
switch bsCfg.MemoryManagement.BloomPageAllocationType {
737-
case "simple":
738-
bloomshipper.BloomPageAllocator = &v1.SimpleHeapAllocator{}
739-
case "dynamic":
740-
bloomshipper.BloomPageAllocator = v1.BloomPagePool
741-
case "fixed":
742-
bloomshipper.BloomPageAllocator = mempool.New("bloom-page-pool", bsCfg.MemoryManagement.BloomPageMemPoolBuckets, reg)
743-
default:
744-
// do nothing
745-
bloomshipper.BloomPageAllocator = nil
746-
}
747-
748733
var metasCache cache.Cache
749734
if t.Cfg.isTarget(IndexGateway) && cache.IsCacheConfigured(bsCfg.MetasCache) {
750735
metasCache, err = cache.New(bsCfg.MetasCache, reg, logger, stats.BloomMetasCache, constants.Loki)

pkg/storage/bloom/v1/block.go

+2-6
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,11 @@ type BlockQuerier struct {
117117
// will be returned to the pool for efficiency. This can only safely be used
118118
// when the underlying bloom bytes don't escape the decoder, i.e.
119119
// when loading blooms for querying (bloom-gw) but not for writing (bloom-compactor).
120-
func NewBlockQuerier(b *Block, alloc Allocator, maxPageSize int) *BlockQuerier {
120+
func NewBlockQuerier(b *Block, noCapture bool, maxPageSize int) *BlockQuerier {
121121
return &BlockQuerier{
122122
block: b,
123123
series: NewLazySeriesIter(b),
124-
blooms: NewLazyBloomIter(b, alloc, maxPageSize),
124+
blooms: NewLazyBloomIter(b, noCapture, maxPageSize),
125125
}
126126
}
127127

@@ -173,7 +173,3 @@ func (bq *BlockQuerier) Err() error {
173173

174174
return bq.blooms.Err()
175175
}
176-
177-
func (bq *BlockQuerier) Close() {
178-
bq.blooms.Close()
179-
}

pkg/storage/bloom/v1/bloom.go

+15-28
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ type Bloom struct {
2424

2525
func (b *Bloom) Encode(enc *encoding.Encbuf) error {
2626
// divide by 8 b/c bloom capacity is measured in bits, but we want bytes
27-
buf := bytes.NewBuffer(make([]byte, 0, int(b.Capacity()/8)))
27+
buf := bytes.NewBuffer(BloomPagePool.Get(int(b.Capacity() / 8)))
2828

2929
// TODO(owen-d): have encoder implement writer directly so we don't need
3030
// to indirect via a buffer
@@ -36,6 +36,7 @@ func (b *Bloom) Encode(enc *encoding.Encbuf) error {
3636
data := buf.Bytes()
3737
enc.PutUvarint(len(data)) // length of bloom filter
3838
enc.PutBytes(data)
39+
BloomPagePool.Put(data[:0]) // release to pool
3940
return nil
4041
}
4142

@@ -63,14 +64,11 @@ func (b *Bloom) Decode(dec *encoding.Decbuf) error {
6364
return nil
6465
}
6566

66-
func LazyDecodeBloomPage(r io.Reader, alloc Allocator, pool chunkenc.ReaderPool, page BloomPageHeader) (*BloomPageDecoder, error) {
67-
data, err := alloc.Get(page.Len)
68-
if err != nil {
69-
return nil, errors.Wrap(err, "allocating buffer")
70-
}
71-
defer alloc.Put(data)
67+
func LazyDecodeBloomPage(r io.Reader, pool chunkenc.ReaderPool, page BloomPageHeader) (*BloomPageDecoder, error) {
68+
data := BloomPagePool.Get(page.Len)[:page.Len]
69+
defer BloomPagePool.Put(data)
7270

73-
_, err = io.ReadFull(r, data)
71+
_, err := io.ReadFull(r, data)
7472
if err != nil {
7573
return nil, errors.Wrap(err, "reading bloom page")
7674
}
@@ -86,10 +84,7 @@ func LazyDecodeBloomPage(r io.Reader, alloc Allocator, pool chunkenc.ReaderPool,
8684
}
8785
defer pool.PutReader(decompressor)
8886

89-
b, err := alloc.Get(page.DecompressedLen)
90-
if err != nil {
91-
return nil, errors.Wrap(err, "allocating buffer")
92-
}
87+
b := BloomPagePool.Get(page.DecompressedLen)[:page.DecompressedLen]
9388

9489
if _, err = io.ReadFull(decompressor, b); err != nil {
9590
return nil, errors.Wrap(err, "decompressing bloom page")
@@ -101,18 +96,14 @@ func LazyDecodeBloomPage(r io.Reader, alloc Allocator, pool chunkenc.ReaderPool,
10196
}
10297

10398
// shortcut to skip allocations when we know the page is not compressed
104-
func LazyDecodeBloomPageNoCompression(r io.Reader, alloc Allocator, page BloomPageHeader) (*BloomPageDecoder, error) {
99+
func LazyDecodeBloomPageNoCompression(r io.Reader, page BloomPageHeader) (*BloomPageDecoder, error) {
105100
// data + checksum
106101
if page.Len != page.DecompressedLen+4 {
107102
return nil, errors.New("the Len and DecompressedLen of the page do not match")
108103
}
104+
data := BloomPagePool.Get(page.Len)[:page.Len]
109105

110-
data, err := alloc.Get(page.Len)
111-
if err != nil {
112-
return nil, errors.Wrap(err, "allocating buffer")
113-
}
114-
115-
_, err = io.ReadFull(r, data)
106+
_, err := io.ReadFull(r, data)
116107
if err != nil {
117108
return nil, errors.Wrap(err, "reading bloom page")
118109
}
@@ -167,16 +158,12 @@ type BloomPageDecoder struct {
167158
// This can only safely be used when the underlying bloom
168159
// bytes don't escape the decoder:
169160
// on reads in the bloom-gw but not in the bloom-compactor
170-
func (d *BloomPageDecoder) Relinquish(alloc Allocator) {
171-
if d == nil {
172-
return
173-
}
174-
161+
func (d *BloomPageDecoder) Relinquish() {
175162
data := d.data
176163
d.data = nil
177164

178165
if cap(data) > 0 {
179-
_ = alloc.Put(data)
166+
BloomPagePool.Put(data)
180167
}
181168
}
182169

@@ -290,7 +277,7 @@ func (b *BloomBlock) DecodeHeaders(r io.ReadSeeker) (uint32, error) {
290277
// BloomPageDecoder returns a decoder for the given page index.
291278
// It may skip the page if it's too large.
292279
// NB(owen-d): if `skip` is true, err _must_ be nil.
293-
func (b *BloomBlock) BloomPageDecoder(r io.ReadSeeker, alloc Allocator, pageIdx int, maxPageSize int, metrics *Metrics) (res *BloomPageDecoder, skip bool, err error) {
280+
func (b *BloomBlock) BloomPageDecoder(r io.ReadSeeker, pageIdx int, maxPageSize int, metrics *Metrics) (res *BloomPageDecoder, skip bool, err error) {
294281
if pageIdx < 0 || pageIdx >= len(b.pageHeaders) {
295282
metrics.pagesSkipped.WithLabelValues(pageTypeBloom, skipReasonOOB).Inc()
296283
metrics.bytesSkipped.WithLabelValues(pageTypeBloom, skipReasonOOB).Add(float64(b.pageHeaders[pageIdx].DecompressedLen))
@@ -313,9 +300,9 @@ func (b *BloomBlock) BloomPageDecoder(r io.ReadSeeker, alloc Allocator, pageIdx
313300
}
314301

315302
if b.schema.encoding == chunkenc.EncNone {
316-
res, err = LazyDecodeBloomPageNoCompression(r, alloc, page)
303+
res, err = LazyDecodeBloomPageNoCompression(r, page)
317304
} else {
318-
res, err = LazyDecodeBloomPage(r, alloc, b.schema.DecompressorPool(), page)
305+
res, err = LazyDecodeBloomPage(r, b.schema.DecompressorPool(), page)
319306
}
320307

321308
if err != nil {

pkg/storage/bloom/v1/bloom_querier.go

+15-15
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ type BloomQuerier interface {
77
}
88

99
type LazyBloomIter struct {
10+
usePool bool
11+
1012
b *Block
1113
m int // max page size in bytes
1214

13-
alloc Allocator
14-
1515
// state
1616
initialized bool
1717
err error
@@ -24,11 +24,11 @@ type LazyBloomIter struct {
2424
// will be returned to the pool for efficiency.
2525
// This can only safely be used when the underlying bloom
2626
// bytes don't escape the decoder.
27-
func NewLazyBloomIter(b *Block, alloc Allocator, maxSize int) *LazyBloomIter {
27+
func NewLazyBloomIter(b *Block, pool bool, maxSize int) *LazyBloomIter {
2828
return &LazyBloomIter{
29-
b: b,
30-
m: maxSize,
31-
alloc: alloc,
29+
usePool: pool,
30+
b: b,
31+
m: maxSize,
3232
}
3333
}
3434

@@ -53,14 +53,16 @@ func (it *LazyBloomIter) LoadOffset(offset BloomOffset) (skip bool) {
5353

5454
// drop the current page if it exists and
5555
// we're using the pool
56-
it.curPage.Relinquish(it.alloc)
56+
if it.curPage != nil && it.usePool {
57+
it.curPage.Relinquish()
58+
}
5759

5860
r, err := it.b.reader.Blooms()
5961
if err != nil {
6062
it.err = errors.Wrap(err, "getting blooms reader")
6163
return false
6264
}
63-
decoder, skip, err := it.b.blooms.BloomPageDecoder(r, it.alloc, offset.Page, it.m, it.b.metrics)
65+
decoder, skip, err := it.b.blooms.BloomPageDecoder(r, offset.Page, it.m, it.b.metrics)
6466
if err != nil {
6567
it.err = errors.Wrap(err, "loading bloom page")
6668
return false
@@ -104,7 +106,6 @@ func (it *LazyBloomIter) next() bool {
104106
var skip bool
105107
it.curPage, skip, err = it.b.blooms.BloomPageDecoder(
106108
r,
107-
it.alloc,
108109
it.curPageIndex,
109110
it.m,
110111
it.b.metrics,
@@ -129,8 +130,11 @@ func (it *LazyBloomIter) next() bool {
129130

130131
// we've exhausted the current page, progress to next
131132
it.curPageIndex++
132-
// drop the current page if it exists
133-
it.curPage.Relinquish(it.alloc)
133+
// drop the current page if it exists and
134+
// we're using the pool
135+
if it.usePool {
136+
it.curPage.Relinquish()
137+
}
134138
it.curPage = nil
135139
continue
136140
}
@@ -157,7 +161,3 @@ func (it *LazyBloomIter) Err() error {
157161
return nil
158162
}
159163
}
160-
161-
func (it *LazyBloomIter) Close() {
162-
it.curPage.Relinquish(it.alloc)
163-
}

pkg/storage/bloom/v1/builder_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ func TestBlockBuilder_RoundTrip(t *testing.T) {
117117
}
118118

119119
block := NewBlock(tc.reader, NewMetrics(nil))
120-
querier := NewBlockQuerier(block, &SimpleHeapAllocator{}, DefaultMaxPageSize)
120+
querier := NewBlockQuerier(block, false, DefaultMaxPageSize)
121121

122122
err = block.LoadHeaders()
123123
require.Nil(t, err)
@@ -218,7 +218,7 @@ func TestMergeBuilder(t *testing.T) {
218218
itr := NewSliceIter[SeriesWithBloom](data[min:max])
219219
_, err = builder.BuildFrom(itr)
220220
require.Nil(t, err)
221-
blocks = append(blocks, NewPeekingIter[*SeriesWithBloom](NewBlockQuerier(NewBlock(reader, NewMetrics(nil)), &SimpleHeapAllocator{}, DefaultMaxPageSize)))
221+
blocks = append(blocks, NewPeekingIter[*SeriesWithBloom](NewBlockQuerier(NewBlock(reader, NewMetrics(nil)), false, DefaultMaxPageSize)))
222222
}
223223

224224
// We're not testing the ability to extend a bloom in this test
@@ -252,7 +252,7 @@ func TestMergeBuilder(t *testing.T) {
252252
require.Nil(t, err)
253253

254254
block := NewBlock(reader, NewMetrics(nil))
255-
querier := NewBlockQuerier(block, &SimpleHeapAllocator{}, DefaultMaxPageSize)
255+
querier := NewBlockQuerier(block, false, DefaultMaxPageSize)
256256

257257
EqualIterators[*SeriesWithBloom](
258258
t,
@@ -296,7 +296,7 @@ func TestBlockReset(t *testing.T) {
296296
_, err = builder.BuildFrom(itr)
297297
require.Nil(t, err)
298298
block := NewBlock(reader, NewMetrics(nil))
299-
querier := NewBlockQuerier(block, &SimpleHeapAllocator{}, DefaultMaxPageSize)
299+
querier := NewBlockQuerier(block, false, DefaultMaxPageSize)
300300

301301
rounds := make([][]model.Fingerprint, 2)
302302

@@ -362,7 +362,7 @@ func TestMergeBuilder_Roundtrip(t *testing.T) {
362362
_, err = builder.BuildFrom(itr)
363363
require.Nil(t, err)
364364
block := NewBlock(reader, NewMetrics(nil))
365-
querier := NewBlockQuerier(block, &SimpleHeapAllocator{}, DefaultMaxPageSize)
365+
querier := NewBlockQuerier(block, false, DefaultMaxPageSize)
366366

367367
// rather than use the block querier directly, collect it's data
368368
// so we can use it in a few places later
@@ -423,7 +423,7 @@ func TestMergeBuilder_Roundtrip(t *testing.T) {
423423

424424
// ensure the new block contains one copy of all the data
425425
// by comparing it against an iterator over the source data
426-
mergedBlockQuerier := NewBlockQuerier(NewBlock(reader, NewMetrics(nil)), &SimpleHeapAllocator{}, DefaultMaxPageSize)
426+
mergedBlockQuerier := NewBlockQuerier(NewBlock(reader, NewMetrics(nil)), false, DefaultMaxPageSize)
427427
sourceItr := NewSliceIter[*SeriesWithBloom](PointerSlice[SeriesWithBloom](xs))
428428

429429
EqualIterators[*SeriesWithBloom](

0 commit comments

Comments
 (0)