Skip to content

Commit 9637790

Browse files
authored
chore: Move compression utilities into separate package (#14167)
Compression tooling has been part of the `chunkenc` (chunk encoding) package in the past for legacy reasons. Since more components use this now, it's easier to keep it in a separate package. This also eliminates the confusion around "encoding", since this has been incorrectly used synonymously with "compression" in the past. --- Signed-off-by: Christian Haudum <[email protected]> Co-authored-by: Robert Fratto <[email protected]>
1 parent ce2e6d5 commit 9637790

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+776
-714
lines changed

pkg/bloombuild/builder/builder.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121

2222
"github.com/grafana/loki/v3/pkg/bloombuild/common"
2323
"github.com/grafana/loki/v3/pkg/bloombuild/protos"
24-
"github.com/grafana/loki/v3/pkg/chunkenc"
24+
"github.com/grafana/loki/v3/pkg/compression"
2525
iter "github.com/grafana/loki/v3/pkg/iter/v2"
2626
"github.com/grafana/loki/v3/pkg/storage"
2727
v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1"
@@ -333,7 +333,7 @@ func (b *Builder) processTask(
333333
return nil, fmt.Errorf("failed to get client: %w", err)
334334
}
335335

336-
blockEnc, err := chunkenc.ParseEncoding(b.limits.BloomBlockEncoding(task.Tenant))
336+
blockEnc, err := compression.ParseEncoding(b.limits.BloomBlockEncoding(task.Tenant))
337337
if err != nil {
338338
return nil, fmt.Errorf("failed to parse block encoding: %w", err)
339339
}

pkg/bloombuild/builder/spec_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"github.com/prometheus/common/model"
1111
"github.com/stretchr/testify/require"
1212

13-
"github.com/grafana/loki/v3/pkg/chunkenc"
13+
"github.com/grafana/loki/v3/pkg/compression"
1414
v2 "github.com/grafana/loki/v3/pkg/iter/v2"
1515
v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1"
1616
"github.com/grafana/loki/v3/pkg/storage/stores/shipper/bloomshipper"
@@ -115,7 +115,7 @@ func dummyBloomGen(t *testing.T, opts v1.BlockOptions, store v2.Iterator[*v1.Ser
115115

116116
func TestSimpleBloomGenerator(t *testing.T) {
117117
const maxBlockSize = 100 << 20 // 100MB
118-
for _, enc := range []chunkenc.Encoding{chunkenc.EncNone, chunkenc.EncGZIP, chunkenc.EncSnappy} {
118+
for _, enc := range []compression.Encoding{compression.EncNone, compression.EncGZIP, compression.EncSnappy} {
119119
for _, tc := range []struct {
120120
desc string
121121
fromSchema, toSchema v1.BlockOptions

pkg/bloombuild/common/tsdb.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
"github.com/prometheus/common/model"
1414
"github.com/prometheus/prometheus/model/labels"
1515

16-
"github.com/grafana/loki/v3/pkg/chunkenc"
16+
"github.com/grafana/loki/v3/pkg/compression"
1717
iter "github.com/grafana/loki/v3/pkg/iter/v2"
1818
baseStore "github.com/grafana/loki/v3/pkg/storage"
1919
v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1"
@@ -102,7 +102,7 @@ func (b *BloomTSDBStore) LoadTSDB(
102102
}
103103
defer data.Close()
104104

105-
decompressorPool := chunkenc.GetReaderPool(chunkenc.EncGZIP)
105+
decompressorPool := compression.GetReaderPool(compression.EncGZIP)
106106
decompressor, err := decompressorPool.GetReader(data)
107107
if err != nil {
108108
return nil, errors.Wrap(err, "failed to get decompressor")

pkg/bloombuild/planner/planner_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import (
2323

2424
"github.com/grafana/loki/v3/pkg/bloombuild/common"
2525
"github.com/grafana/loki/v3/pkg/bloombuild/protos"
26-
"github.com/grafana/loki/v3/pkg/chunkenc"
26+
"github.com/grafana/loki/v3/pkg/compression"
2727
iter "github.com/grafana/loki/v3/pkg/iter/v2"
2828
"github.com/grafana/loki/v3/pkg/storage"
2929
v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1"
@@ -188,7 +188,7 @@ func genBlock(ref bloomshipper.BlockRef) (bloomshipper.Block, error) {
188188
writer := v1.NewMemoryBlockWriter(indexBuf, bloomsBuf)
189189
reader := v1.NewByteReader(indexBuf, bloomsBuf)
190190

191-
blockOpts := v1.NewBlockOptions(chunkenc.EncNone, 4, 1, 0, 0)
191+
blockOpts := v1.NewBlockOptions(compression.EncNone, 4, 1, 0, 0)
192192

193193
builder, err := v1.NewBlockBuilder(blockOpts, writer)
194194
if err != nil {

pkg/chunkenc/dumb_chunk.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"sort"
77
"time"
88

9+
"github.com/grafana/loki/v3/pkg/compression"
910
"github.com/grafana/loki/v3/pkg/iter"
1011
"github.com/grafana/loki/v3/pkg/logproto"
1112
"github.com/grafana/loki/v3/pkg/logql/log"
@@ -69,7 +70,7 @@ func (c *dumbChunk) Utilization() float64 {
6970
return float64(len(c.entries)) / float64(tmpNumEntries)
7071
}
7172

72-
func (c *dumbChunk) Encoding() Encoding { return EncNone }
73+
func (c *dumbChunk) Encoding() compression.Encoding { return compression.EncNone }
7374

7475
// Returns an iterator that goes from _most_ recent to _least_ recent (ie,
7576
// backwards).

pkg/chunkenc/interface.go

+2-82
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import (
55
"errors"
66
"fmt"
77
"io"
8-
"strings"
98
"time"
109

10+
"github.com/grafana/loki/v3/pkg/compression"
1111
"github.com/grafana/loki/v3/pkg/iter"
1212
"github.com/grafana/loki/v3/pkg/logproto"
1313
"github.com/grafana/loki/v3/pkg/logql/log"
@@ -48,86 +48,6 @@ func IsOutOfOrderErr(err error) bool {
4848
return err == ErrOutOfOrder || IsErrTooFarBehind(err)
4949
}
5050

51-
// Encoding is the identifier for a chunk encoding.
52-
type Encoding byte
53-
54-
// The different available encodings.
55-
// Make sure to preserve the order, as these numeric values are written to the chunks!
56-
const (
57-
EncNone Encoding = iota
58-
EncGZIP
59-
EncDumb
60-
EncLZ4_64k
61-
EncSnappy
62-
EncLZ4_256k
63-
EncLZ4_1M
64-
EncLZ4_4M
65-
EncFlate
66-
EncZstd
67-
)
68-
69-
var supportedEncoding = []Encoding{
70-
EncNone,
71-
EncGZIP,
72-
EncLZ4_64k,
73-
EncSnappy,
74-
EncLZ4_256k,
75-
EncLZ4_1M,
76-
EncLZ4_4M,
77-
EncFlate,
78-
EncZstd,
79-
}
80-
81-
func (e Encoding) String() string {
82-
switch e {
83-
case EncGZIP:
84-
return "gzip"
85-
case EncNone:
86-
return "none"
87-
case EncDumb:
88-
return "dumb"
89-
case EncLZ4_64k:
90-
return "lz4-64k"
91-
case EncLZ4_256k:
92-
return "lz4-256k"
93-
case EncLZ4_1M:
94-
return "lz4-1M"
95-
case EncLZ4_4M:
96-
return "lz4"
97-
case EncSnappy:
98-
return "snappy"
99-
case EncFlate:
100-
return "flate"
101-
case EncZstd:
102-
return "zstd"
103-
default:
104-
return "unknown"
105-
}
106-
}
107-
108-
// ParseEncoding parses an chunk encoding (compression algorithm) by its name.
109-
func ParseEncoding(enc string) (Encoding, error) {
110-
for _, e := range supportedEncoding {
111-
if strings.EqualFold(e.String(), enc) {
112-
return e, nil
113-
}
114-
}
115-
return 0, fmt.Errorf("invalid encoding: %s, supported: %s", enc, SupportedEncoding())
116-
117-
}
118-
119-
// SupportedEncoding returns the list of supported Encoding.
120-
func SupportedEncoding() string {
121-
var sb strings.Builder
122-
for i := range supportedEncoding {
123-
sb.WriteString(supportedEncoding[i].String())
124-
if i != len(supportedEncoding)-1 {
125-
sb.WriteString(", ")
126-
}
127-
}
128-
return sb.String()
129-
}
130-
13151
// Chunk is the interface for the compressed logs chunk format.
13252
type Chunk interface {
13353
Bounds() (time.Time, time.Time)
@@ -148,7 +68,7 @@ type Chunk interface {
14868
UncompressedSize() int
14969
CompressedSize() int
15070
Close() error
151-
Encoding() Encoding
71+
Encoding() compression.Encoding
15272
Rebound(start, end time.Time, filter filter.Func) (Chunk, error)
15373
}
15474

pkg/chunkenc/interface_test.go

-23
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,6 @@ import (
77
"github.com/stretchr/testify/require"
88
)
99

10-
func TestParseEncoding(t *testing.T) {
11-
tests := []struct {
12-
enc string
13-
want Encoding
14-
wantErr bool
15-
}{
16-
{"gzip", EncGZIP, false},
17-
{"bad", 0, true},
18-
}
19-
for _, tt := range tests {
20-
t.Run(tt.enc, func(t *testing.T) {
21-
got, err := ParseEncoding(tt.enc)
22-
if (err != nil) != tt.wantErr {
23-
t.Errorf("ParseEncoding() error = %v, wantErr %v", err, tt.wantErr)
24-
return
25-
}
26-
if got != tt.want {
27-
t.Errorf("ParseEncoding() = %v, want %v", got, tt.want)
28-
}
29-
})
30-
}
31-
}
32-
3310
func TestIsOutOfOrderErr(t *testing.T) {
3411
now := time.Now()
3512

pkg/chunkenc/memchunk.go

+18-17
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/pkg/errors"
1717
"github.com/prometheus/prometheus/model/labels"
1818

19+
"github.com/grafana/loki/v3/pkg/compression"
1920
"github.com/grafana/loki/v3/pkg/iter"
2021
"github.com/grafana/loki/v3/pkg/logproto"
2122
"github.com/grafana/loki/v3/pkg/logql/log"
@@ -131,7 +132,7 @@ type MemChunk struct {
131132
head HeadBlock
132133

133134
format byte
134-
encoding Encoding
135+
encoding compression.Encoding
135136
headFmt HeadBlockFmt
136137

137138
// compressed size of chunk. Set when chunk is cut or while decoding chunk from storage.
@@ -196,7 +197,7 @@ func (hb *headBlock) Append(ts int64, line string, _ labels.Labels) (bool, error
196197
return false, nil
197198
}
198199

199-
func (hb *headBlock) Serialise(pool WriterPool) ([]byte, error) {
200+
func (hb *headBlock) Serialise(pool compression.WriterPool) ([]byte, error) {
200201
inBuf := serializeBytesBufferPool.Get().(*bytes.Buffer)
201202
defer func() {
202203
inBuf.Reset()
@@ -354,7 +355,7 @@ type entry struct {
354355
}
355356

356357
// NewMemChunk returns a new in-mem chunk.
357-
func NewMemChunk(chunkFormat byte, enc Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk {
358+
func NewMemChunk(chunkFormat byte, enc compression.Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk {
358359
return newMemChunkWithFormat(chunkFormat, enc, head, blockSize, targetSize)
359360
}
360361

@@ -369,7 +370,7 @@ func panicIfInvalidFormat(chunkFmt byte, head HeadBlockFmt) {
369370
}
370371

371372
// NewMemChunk returns a new in-mem chunk.
372-
func newMemChunkWithFormat(format byte, enc Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk {
373+
func newMemChunkWithFormat(format byte, enc compression.Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk {
373374
panicIfInvalidFormat(format, head)
374375

375376
symbolizer := newSymbolizer()
@@ -413,10 +414,10 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me
413414
bc.format = version
414415
switch version {
415416
case ChunkFormatV1:
416-
bc.encoding = EncGZIP
417+
bc.encoding = compression.EncGZIP
417418
case ChunkFormatV2, ChunkFormatV3, ChunkFormatV4:
418419
// format v2+ has a byte for block encoding.
419-
enc := Encoding(db.byte())
420+
enc := compression.Encoding(db.byte())
420421
if db.err() != nil {
421422
return nil, errors.Wrap(db.err(), "verifying encoding")
422423
}
@@ -535,7 +536,7 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me
535536
if fromCheckpoint {
536537
bc.symbolizer = symbolizerFromCheckpoint(lb)
537538
} else {
538-
symbolizer, err := symbolizerFromEnc(lb, GetReaderPool(bc.encoding))
539+
symbolizer, err := symbolizerFromEnc(lb, compression.GetReaderPool(bc.encoding))
539540
if err != nil {
540541
return nil, err
541542
}
@@ -653,7 +654,7 @@ func (c *MemChunk) writeTo(w io.Writer, forCheckpoint bool) (int64, error) {
653654
}
654655
} else {
655656
var err error
656-
n, crcHash, err = c.symbolizer.SerializeTo(w, GetWriterPool(c.encoding))
657+
n, crcHash, err = c.symbolizer.SerializeTo(w, compression.GetWriterPool(c.encoding))
657658
if err != nil {
658659
return offset, errors.Wrap(err, "write structured metadata")
659660
}
@@ -776,7 +777,7 @@ func MemchunkFromCheckpoint(chk, head []byte, desiredIfNotUnordered HeadBlockFmt
776777
}
777778

778779
// Encoding implements Chunk.
779-
func (c *MemChunk) Encoding() Encoding {
780+
func (c *MemChunk) Encoding() compression.Encoding {
780781
return c.encoding
781782
}
782783

@@ -941,7 +942,7 @@ func (c *MemChunk) cut() error {
941942
return nil
942943
}
943944

944-
b, err := c.head.Serialise(GetWriterPool(c.encoding))
945+
b, err := c.head.Serialise(compression.GetWriterPool(c.encoding))
945946
if err != nil {
946947
return err
947948
}
@@ -1172,7 +1173,7 @@ func (c *MemChunk) Rebound(start, end time.Time, filter filter.Func) (Chunk, err
11721173
// then allows us to bind a decoding context to a block when requested, but otherwise helps reduce the
11731174
// chances of chunk<>block encoding drift in the codebase as the latter is parameterized by the former.
11741175
type encBlock struct {
1175-
enc Encoding
1176+
enc compression.Encoding
11761177
format byte
11771178
symbolizer *symbolizer
11781179
block
@@ -1182,14 +1183,14 @@ func (b encBlock) Iterator(ctx context.Context, pipeline log.StreamPipeline) ite
11821183
if len(b.b) == 0 {
11831184
return iter.NoopEntryIterator
11841185
}
1185-
return newEntryIterator(ctx, GetReaderPool(b.enc), b.b, pipeline, b.format, b.symbolizer)
1186+
return newEntryIterator(ctx, compression.GetReaderPool(b.enc), b.b, pipeline, b.format, b.symbolizer)
11861187
}
11871188

11881189
func (b encBlock) SampleIterator(ctx context.Context, extractor log.StreamSampleExtractor) iter.SampleIterator {
11891190
if len(b.b) == 0 {
11901191
return iter.NoopSampleIterator
11911192
}
1192-
return newSampleIterator(ctx, GetReaderPool(b.enc), b.b, b.format, extractor, b.symbolizer)
1193+
return newSampleIterator(ctx, compression.GetReaderPool(b.enc), b.b, b.format, extractor, b.symbolizer)
11931194
}
11941195

11951196
func (b block) Offset() int {
@@ -1339,7 +1340,7 @@ type bufferedIterator struct {
13391340
stats *stats.Context
13401341

13411342
reader io.Reader
1342-
pool ReaderPool
1343+
pool compression.ReaderPool
13431344
symbolizer *symbolizer
13441345

13451346
err error
@@ -1358,7 +1359,7 @@ type bufferedIterator struct {
13581359
closed bool
13591360
}
13601361

1361-
func newBufferedIterator(ctx context.Context, pool ReaderPool, b []byte, format byte, symbolizer *symbolizer) *bufferedIterator {
1362+
func newBufferedIterator(ctx context.Context, pool compression.ReaderPool, b []byte, format byte, symbolizer *symbolizer) *bufferedIterator {
13621363
stats := stats.FromContext(ctx)
13631364
stats.AddCompressedBytes(int64(len(b)))
13641365
return &bufferedIterator{
@@ -1619,7 +1620,7 @@ func (si *bufferedIterator) close() {
16191620
si.origBytes = nil
16201621
}
16211622

1622-
func newEntryIterator(ctx context.Context, pool ReaderPool, b []byte, pipeline log.StreamPipeline, format byte, symbolizer *symbolizer) iter.EntryIterator {
1623+
func newEntryIterator(ctx context.Context, pool compression.ReaderPool, b []byte, pipeline log.StreamPipeline, format byte, symbolizer *symbolizer) iter.EntryIterator {
16231624
return &entryBufferedIterator{
16241625
bufferedIterator: newBufferedIterator(ctx, pool, b, format, symbolizer),
16251626
pipeline: pipeline,
@@ -1671,7 +1672,7 @@ func (e *entryBufferedIterator) Close() error {
16711672
return e.bufferedIterator.Close()
16721673
}
16731674

1674-
func newSampleIterator(ctx context.Context, pool ReaderPool, b []byte, format byte, extractor log.StreamSampleExtractor, symbolizer *symbolizer) iter.SampleIterator {
1675+
func newSampleIterator(ctx context.Context, pool compression.ReaderPool, b []byte, format byte, extractor log.StreamSampleExtractor, symbolizer *symbolizer) iter.SampleIterator {
16751676
return &sampleBufferedIterator{
16761677
bufferedIterator: newBufferedIterator(ctx, pool, b, format, symbolizer),
16771678
extractor: extractor,

0 commit comments

Comments
 (0)