Skip to content

Commit 70bfc98

Browse files
grafanabotkavirajk
andauthored
[release-2.9.x] Pin chunk and index format to schema version. (#10333)
Backport bbfb13c from #10213 --- We pin all three `Chunk`, `HeadBlock` and `TSDB` Version to `schema` version in period config. This is the following mapping (after being discussed with @owen-d and @sandeepsukhani ) `v12` (current existing schema) - ChunkFormatV3 (UnorderedHeadBlock) + TSDBv2 `v13` (introducing new schema) - ChunkFormatV4 (UnorderedWithNonIndexedLabelsHeadBlockFmt) + TSDBv3 Note the new schema `v13` supports the latest chunk and index format. **NOTES for Reviewer** 1. General approach is we removed the idea of `index.LiveFormat`, `chunkenc.DefaultChunkFormat` and `chunkenc.DefaultHeadBlockFmt` and made following two changes. These variables were used before to tie chunk and tsdb formats specific to Loki versions. This PR remove that coupling and pin these formats to `schema` version instead. 1. These variables were replaced with explicit chunk and index formats within those packages (and it's tests) 2. If these variables were used outside it's own packages say by ingester, compactor, etc. Then we extract correct chunk and index versions from the `schema` config. 2. Add two methods to `periodConfig`. (1) `ChunkFormat()` returning chunk and head format tied to schema (2) `TSDBFormat()` returning tsdb format tied to schema. 2. Other ideas I thought of doing but didn't end up doing is make `ChunkFormat` and `IndexFormat` as separate type (rather than `byte` and `int` currently. Similar to `HeadBlockFmt` type). But didnt' do it eventually to keep the PR small and don't want to complicate with lots of changes. 4. Moved couple of test cases from `chunkenc` to `config` package, because the test case was actually testing methods on `schemaconfig` and it was creating cycling dependencies. Co-authored-by: Kaviraj Kanagaraj <[email protected]>
1 parent 9b7a634 commit 70bfc98

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1907
-1395
lines changed

pkg/chunkenc/memchunk.go

+49-35
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,10 @@ import (
2828

2929
const (
3030
_ byte = iota
31-
chunkFormatV1
32-
chunkFormatV2
33-
chunkFormatV3
34-
chunkFormatV4
35-
36-
DefaultChunkFormat = chunkFormatV4 // the currently used chunk format
31+
ChunkFormatV1
32+
ChunkFormatV2
33+
ChunkFormatV3
34+
ChunkFormatV4
3735

3836
blocksPerChunk = 10
3937
maxLineLength = 1024 * 1024 * 1024
@@ -84,10 +82,22 @@ const (
8482
OrderedHeadBlockFmt
8583
UnorderedHeadBlockFmt
8684
UnorderedWithNonIndexedLabelsHeadBlockFmt
87-
88-
DefaultHeadBlockFmt = UnorderedWithNonIndexedLabelsHeadBlockFmt
8985
)
9086

87+
// ChunkHeadFormatFor returns corresponding head block format for the given `chunkfmt`.
88+
func ChunkHeadFormatFor(chunkfmt byte) HeadBlockFmt {
89+
if chunkfmt < ChunkFormatV3 {
90+
return OrderedHeadBlockFmt
91+
}
92+
93+
if chunkfmt == ChunkFormatV3 {
94+
return UnorderedHeadBlockFmt
95+
}
96+
97+
// return the latest head format for all chunkformat >v3
98+
return UnorderedWithNonIndexedLabelsHeadBlockFmt
99+
}
100+
91101
var magicNumber = uint32(0x12EE56A)
92102

93103
// The table gets initialized with sync.Once but may still cause a race
@@ -293,7 +303,7 @@ func (hb *headBlock) LoadBytes(b []byte) error {
293303
return errors.Wrap(db.err(), "verifying headblock header")
294304
}
295305
switch version {
296-
case chunkFormatV1, chunkFormatV2, chunkFormatV3, chunkFormatV4:
306+
case ChunkFormatV1, ChunkFormatV2, ChunkFormatV3, ChunkFormatV4:
297307
default:
298308
return errors.Errorf("incompatible headBlock version (%v), only V1,V2,V3 is currently supported", version)
299309
}
@@ -344,15 +354,16 @@ type entry struct {
344354
}
345355

346356
// NewMemChunk returns a new in-mem chunk.
347-
func NewMemChunk(enc Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk {
348-
return newMemChunkWithFormat(DefaultChunkFormat, enc, head, blockSize, targetSize)
357+
func NewMemChunk(chunkFormat byte, enc Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk {
358+
return newMemChunkWithFormat(chunkFormat, enc, head, blockSize, targetSize)
349359
}
350360

351361
func panicIfInvalidFormat(chunkFmt byte, head HeadBlockFmt) {
352-
if chunkFmt == chunkFormatV2 && head != OrderedHeadBlockFmt {
362+
if chunkFmt == ChunkFormatV2 && head != OrderedHeadBlockFmt {
353363
panic("only OrderedHeadBlockFmt is supported for V2 chunks")
354364
}
355-
if chunkFmt == chunkFormatV4 && head != UnorderedWithNonIndexedLabelsHeadBlockFmt {
365+
if chunkFmt == ChunkFormatV4 && head != UnorderedWithNonIndexedLabelsHeadBlockFmt {
366+
fmt.Println("received head fmt", head.String())
356367
panic("only UnorderedWithNonIndexedLabelsHeadBlockFmt is supported for V4 chunks")
357368
}
358369
}
@@ -401,9 +412,9 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me
401412
}
402413
bc.format = version
403414
switch version {
404-
case chunkFormatV1:
415+
case ChunkFormatV1:
405416
bc.encoding = EncGZIP
406-
case chunkFormatV2, chunkFormatV3, chunkFormatV4:
417+
case ChunkFormatV2, ChunkFormatV3, ChunkFormatV4:
407418
// format v2+ has a byte for block encoding.
408419
enc := Encoding(db.byte())
409420
if db.err() != nil {
@@ -414,6 +425,9 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me
414425
return nil, errors.Errorf("invalid version %d", version)
415426
}
416427

428+
// Set the correct headblock format based on chunk format
429+
bc.headFmt = ChunkHeadFormatFor(version)
430+
417431
// readSectionLenAndOffset reads len and offset for different sections within the chunk.
418432
// Starting from chunk version 4, we have started writing offset and length of various sections within the chunk.
419433
// These len and offset pairs would be stored together at the end of the chunk.
@@ -427,7 +441,7 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me
427441

428442
metasOffset := uint64(0)
429443
metasLen := uint64(0)
430-
if version >= chunkFormatV4 {
444+
if version >= ChunkFormatV4 {
431445
// version >= 4 starts writing length of sections after their offsets
432446
metasLen, metasOffset = readSectionLenAndOffset(chunkMetasSectionIdx)
433447
} else {
@@ -458,7 +472,7 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me
458472

459473
// Read offset and length.
460474
blk.offset = db.uvarint()
461-
if version >= chunkFormatV3 {
475+
if version >= ChunkFormatV3 {
462476
blk.uncompressedSize = db.uvarint()
463477
}
464478
l := db.uvarint()
@@ -481,7 +495,7 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me
481495
}
482496
}
483497

484-
if version >= chunkFormatV4 {
498+
if version >= ChunkFormatV4 {
485499
nonIndexedLabelsLen, nonIndexedLabelsOffset := readSectionLenAndOffset(chunkNonIndexedLabelsSectionIdx)
486500
lb := b[nonIndexedLabelsOffset : nonIndexedLabelsOffset+nonIndexedLabelsLen] // non-indexed labels Offset + checksum
487501
db = decbuf{b: lb}
@@ -526,7 +540,7 @@ func (c *MemChunk) Bytes() ([]byte, error) {
526540
func (c *MemChunk) BytesSize() int {
527541
size := 4 // magic number
528542
size++ // format
529-
if c.format > chunkFormatV1 {
543+
if c.format > ChunkFormatV1 {
530544
size++ // chunk format v2+ has a byte for encoding.
531545
}
532546

@@ -538,7 +552,7 @@ func (c *MemChunk) BytesSize() int {
538552
size += binary.MaxVarintLen64 // mint
539553
size += binary.MaxVarintLen64 // maxt
540554
size += binary.MaxVarintLen32 // offset
541-
if c.format >= chunkFormatV3 {
555+
if c.format >= ChunkFormatV3 {
542556
size += binary.MaxVarintLen32 // uncompressed size
543557
}
544558
size += binary.MaxVarintLen32 // len(b)
@@ -550,7 +564,7 @@ func (c *MemChunk) BytesSize() int {
550564
size += crc32.Size // metablock crc
551565
size += 8 // metaoffset
552566

553-
if c.format >= chunkFormatV4 {
567+
if c.format >= ChunkFormatV4 {
554568
size += 8 // metablock length
555569

556570
size += c.symbolizer.CheckpointSize() // non-indexed labels block
@@ -586,7 +600,7 @@ func (c *MemChunk) writeTo(w io.Writer, forCheckpoint bool) (int64, error) {
586600
// Write the header (magicNum + version).
587601
eb.putBE32(magicNumber)
588602
eb.putByte(c.format)
589-
if c.format > chunkFormatV1 {
603+
if c.format > ChunkFormatV1 {
590604
// chunk format v2+ has a byte for encoding.
591605
eb.putByte(byte(c.encoding))
592606
}
@@ -599,7 +613,7 @@ func (c *MemChunk) writeTo(w io.Writer, forCheckpoint bool) (int64, error) {
599613
nonIndexedLabelsOffset := offset
600614
nonIndexedLabelsLen := 0
601615

602-
if c.format >= chunkFormatV4 {
616+
if c.format >= ChunkFormatV4 {
603617
var (
604618
n int
605619
crcHash []byte
@@ -655,7 +669,7 @@ func (c *MemChunk) writeTo(w io.Writer, forCheckpoint bool) (int64, error) {
655669
eb.putVarint64(b.mint)
656670
eb.putVarint64(b.maxt)
657671
eb.putUvarint(b.offset)
658-
if c.format >= chunkFormatV3 {
672+
if c.format >= ChunkFormatV3 {
659673
eb.putUvarint(b.uncompressedSize)
660674
}
661675
eb.putUvarint(len(b.b))
@@ -669,7 +683,7 @@ func (c *MemChunk) writeTo(w io.Writer, forCheckpoint bool) (int64, error) {
669683
}
670684
offset += int64(n)
671685

672-
if c.format >= chunkFormatV4 {
686+
if c.format >= ChunkFormatV4 {
673687
// Write non-indexed labels offset and length
674688
eb.reset()
675689
eb.putBE64int(nonIndexedLabelsLen)
@@ -683,7 +697,7 @@ func (c *MemChunk) writeTo(w io.Writer, forCheckpoint bool) (int64, error) {
683697

684698
// Write the metasOffset.
685699
eb.reset()
686-
if c.format >= chunkFormatV4 {
700+
if c.format >= ChunkFormatV4 {
687701
eb.putBE64int(metasLen)
688702
}
689703
eb.putBE64int(int(metasOffset))
@@ -763,7 +777,7 @@ func (c *MemChunk) SpaceFor(e *logproto.Entry) bool {
763777
// a great check, but it will guarantee we are always under the target size
764778
newHBSize := c.head.UncompressedSize() + len(e.Line)
765779
nonIndexedLabelsSize := 0
766-
if c.format >= chunkFormatV4 {
780+
if c.format >= ChunkFormatV4 {
767781
newHBSize += metaLabelsLen(logproto.FromLabelAdaptersToLabels(e.NonIndexedLabels))
768782
// non-indexed labels are compressed while serializing the chunk so we don't know what their size would be after compression.
769783
// As adoption increases, their overall size can be non-trivial so we can't ignore them while calculating chunk size.
@@ -786,7 +800,7 @@ func (c *MemChunk) UncompressedSize() int {
786800
size += b.uncompressedSize
787801
}
788802

789-
if c.format >= chunkFormatV4 {
803+
if c.format >= ChunkFormatV4 {
790804
size += c.symbolizer.UncompressedSize()
791805
}
792806

@@ -802,7 +816,7 @@ func (c *MemChunk) CompressedSize() int {
802816
size := 0
803817
// Better to account for any uncompressed data than ignore it even though this isn't accurate.
804818
size += c.head.UncompressedSize()
805-
if c.format >= chunkFormatV4 {
819+
if c.format >= ChunkFormatV4 {
806820
size += c.symbolizer.UncompressedSize() // length of each symbol
807821
}
808822

@@ -829,7 +843,7 @@ func (c *MemChunk) Append(entry *logproto.Entry) error {
829843
return ErrOutOfOrder
830844
}
831845

832-
if c.format < chunkFormatV4 {
846+
if c.format < ChunkFormatV4 {
833847
entry.NonIndexedLabels = nil
834848
}
835849
if err := c.head.Append(entryTimestamp, entry.Line, logproto.FromLabelAdaptersToLabels(entry.NonIndexedLabels)); err != nil {
@@ -940,7 +954,7 @@ func (c *MemChunk) Iterator(ctx context.Context, mintT, maxtT time.Time, directi
940954
mint, maxt := mintT.UnixNano(), maxtT.UnixNano()
941955
blockItrs := make([]iter.EntryIterator, 0, len(c.blocks)+1)
942956

943-
if c.format >= chunkFormatV4 {
957+
if c.format >= ChunkFormatV4 {
944958
stats := stats.FromContext(ctx)
945959
stats.AddCompressedBytes(int64(c.symbolizer.CompressedSize()))
946960
decompressedSize := int64(c.symbolizer.DecompressedSize())
@@ -1025,7 +1039,7 @@ func (c *MemChunk) SampleIterator(ctx context.Context, from, through time.Time,
10251039
mint, maxt := from.UnixNano(), through.UnixNano()
10261040
its := make([]iter.SampleIterator, 0, len(c.blocks)+1)
10271041

1028-
if c.format >= chunkFormatV4 {
1042+
if c.format >= ChunkFormatV4 {
10291043
stats := stats.FromContext(ctx)
10301044
stats.AddCompressedBytes(int64(c.symbolizer.CompressedSize()))
10311045
decompressedSize := int64(c.symbolizer.DecompressedSize())
@@ -1095,12 +1109,12 @@ func (c *MemChunk) Rebound(start, end time.Time, filter filter.Func) (Chunk, err
10951109
// as close as possible, respect the block/target sizes specified. However,
10961110
// if the blockSize is not set, use reasonable defaults.
10971111
if c.blockSize > 0 {
1098-
newChunk = NewMemChunk(c.Encoding(), DefaultHeadBlockFmt, c.blockSize, c.targetSize)
1112+
newChunk = NewMemChunk(c.format, c.Encoding(), c.headFmt, c.blockSize, c.targetSize)
10991113
} else {
11001114
// Using defaultBlockSize for target block size.
11011115
// The alternative here could be going over all the blocks and using the size of the largest block as target block size but I(Sandeep) feel that it is not worth the complexity.
11021116
// For target chunk size I am using compressed size of original chunk since the newChunk should anyways be lower in size than that.
1103-
newChunk = NewMemChunk(c.Encoding(), DefaultHeadBlockFmt, defaultBlockSize, c.CompressedSize())
1117+
newChunk = NewMemChunk(c.format, c.Encoding(), c.headFmt, defaultBlockSize, c.CompressedSize())
11041118
}
11051119

11061120
for itr.Next() {
@@ -1423,7 +1437,7 @@ func (si *bufferedIterator) moveNext() (int64, []byte, labels.Labels, bool) {
14231437

14241438
decompressedBytes += int64(lineSize)
14251439

1426-
if si.format < chunkFormatV4 {
1440+
if si.format < ChunkFormatV4 {
14271441
si.stats.AddDecompressedBytes(decompressedBytes)
14281442
si.stats.AddDecompressedLines(1)
14291443
return ts, si.buf[:lineSize], nil, true

0 commit comments

Comments
 (0)