From 3c461ddf06a9da022bd54031e51ffeeffb291951 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Fri, 20 Oct 2023 11:34:59 +0800 Subject: [PATCH] util: Change the name of ListInDisk (#47777) ref pingcap/tidb#47733 --- docs/design/2021-08-18-charsets.md | 18 +++--- pkg/executor/aggregate/agg_hash_executor.go | 6 +- pkg/planner/cardinality/row_size.go | 14 ++-- pkg/planner/cardinality/row_size_test.go | 20 +++--- pkg/planner/core/task.go | 2 +- pkg/util/chunk/BUILD.bazel | 4 +- pkg/util/chunk/list.go | 2 +- pkg/util/chunk/row_container.go | 6 +- pkg/util/chunk/{disk.go => row_in_disk.go} | 59 ++++++++--------- .../{disk_test.go => row_in_disk_test.go} | 64 +++++++++---------- pkg/util/memory/tracker.go | 4 +- 11 files changed, 100 insertions(+), 99 deletions(-) rename pkg/util/chunk/{disk.go => row_in_disk.go} (85%) rename pkg/util/chunk/{disk_test.go => row_in_disk_test.go} (88%) diff --git a/docs/design/2021-08-18-charsets.md b/docs/design/2021-08-18-charsets.md index b048341f818e6..76a2f446a9777 100644 --- a/docs/design/2021-08-18-charsets.md +++ b/docs/design/2021-08-18-charsets.md @@ -65,7 +65,7 @@ After receiving the non-utf-8 character set request, this solution will convert - All ParseOneStmt/Parse usage needs to be checked - For SQL strings that need to be temporarily saved, you need to bring character set information. For example, BindSQL, View Select Stmt, etc. - For internally executed SQL statements, since they are already utf-8, they do not need to be processed. For example, the table creation statement in the perfschema package. - + ### Runtime - Add a repertoire field to collationInfo to facilitate automatic character set conversion in expressions, so that many errors like "illegal mix of collations" can be avoided. - The corresponding types of the Repertoire attribute are as follows: @@ -75,11 +75,11 @@ After receiving the non-utf-8 character set request, this solution will convert const ( // RepertoireASCII is pure ASCII and it’s Unicode range: U+0000..U+007F - RepertoireASCII Repertoire = 1 - // RepertoireExtended is Extended characters and it’s Unicode range: U+0080..U+FFFF - RepertoireExtended Repertoire = 1 << 1 - // RepertoireUnicode consists ASCII and EXTENDED, and it’s Unicode range: U+0000..U+FFFF - RepertoireUnicode Repertoire = ASCII | EXTENDED + RepertoireASCII Repertoire = 1 + // RepertoireExtended is Extended characters and it’s Unicode range: U+0080..U+FFFF + RepertoireExtended Repertoire = 1 << 1 + // RepertoireUnicode consists ASCII and EXTENDED, and it’s Unicode range: U+0000..U+FFFF + RepertoireUnicode Repertoire = ASCII | EXTENDED ) ``` @@ -90,7 +90,7 @@ After receiving the non-utf-8 character set request, this solution will convert ### Optimizer -- The statistics module may need special processing functions based on charset: AvgColSizeListInDisk, GetFixedLen, BucketToString and DecodedString, etc. +- The statistics module may need special processing functions based on charset: AvgColSizeDataInDiskByRows, GetFixedLen, BucketToString and DecodedString, etc. - Ranger module - The processing of prefix len needs to consider the charset. - Functions such as BuildFromPatternLike and checkLikeFunc may need to consider charset. @@ -123,7 +123,7 @@ Other behaviors that need to be dealt with: - Upgrade compatibility: - There may be compatibility issues when performing operations during the rolling upgrade. - The new version of the cluster is expected to have no compatibility issues when reading old data. -- Downgrade compatibility: +- Downgrade compatibility: - Downgrade is not compatible. The index key uses the table of gbk_bin/gbk_chinese_ci. The lower version of TiDB will have problems when decoding, and it needs to be transcoded before downgrading. #### Compatibility with MySQL @@ -189,7 +189,7 @@ Test the compatibility of some related features, such as SQL binding, SQL hints, - Version 4.0 and above test upgrade: - During the rolling upgrade process, gbk operation is not supported. - After the upgrade, normal gbk related operations are supported. - + ### Downgrade compatibility There will be incompatibility issues when downgrading tables that use gbk encoding. diff --git a/pkg/executor/aggregate/agg_hash_executor.go b/pkg/executor/aggregate/agg_hash_executor.go index 69d2faca949fa..9c4283dc6920b 100644 --- a/pkg/executor/aggregate/agg_hash_executor.go +++ b/pkg/executor/aggregate/agg_hash_executor.go @@ -127,8 +127,8 @@ type HashAggExec struct { stats *HashAggRuntimeStats // listInDisk is the chunks to store row values for spilled data. - // The HashAggExec may be set to `spill mode` multiple times, and all spilled data will be appended to ListInDisk. - listInDisk *chunk.ListInDisk + // The HashAggExec may be set to `spill mode` multiple times, and all spilled data will be appended to DataInDiskByRows. + listInDisk *chunk.DataInDiskByRows // numOfSpilledChks indicates the number of all the spilled chunks. numOfSpilledChks int // offsetOfSpilledChks indicates the offset of the chunk be read from the disk. @@ -240,7 +240,7 @@ func (e *HashAggExec) initForUnparallelExec() { e.offsetOfSpilledChks, e.numOfSpilledChks = 0, 0 e.executed, e.isChildDrained = false, false - e.listInDisk = chunk.NewListInDisk(exec.RetTypes(e.Children(0))) + e.listInDisk = chunk.NewDataInDiskByRows(exec.RetTypes(e.Children(0))) e.tmpChkForSpill = exec.TryNewCacheChunk(e.Children(0)) if vars := e.Ctx().GetSessionVars(); vars.TrackAggregateMemoryUsage && variable.EnableTmpStorageOnOOM.Load() { diff --git a/pkg/planner/cardinality/row_size.go b/pkg/planner/cardinality/row_size.go index be5bfdf01b7a2..e908e73c7d955 100644 --- a/pkg/planner/cardinality/row_size.go +++ b/pkg/planner/cardinality/row_size.go @@ -88,8 +88,8 @@ func GetAvgRowSize(ctx sessionctx.Context, coll *statistics.HistColl, cols []*ex return size + float64(len(cols)) } -// GetAvgRowSizeListInDisk computes average row size for given columns. -func GetAvgRowSizeListInDisk(coll *statistics.HistColl, cols []*expression.Column) (size float64) { +// GetAvgRowSizeDataInDiskByRows computes average row size for given columns. +func GetAvgRowSizeDataInDiskByRows(coll *statistics.HistColl, cols []*expression.Column) (size float64) { if coll.Pseudo || len(coll.Columns) == 0 || coll.RealtimeCount == 0 { for _, col := range cols { size += float64(chunk.EstimateTypeWidth(col.GetType())) @@ -103,10 +103,10 @@ func GetAvgRowSizeListInDisk(coll *statistics.HistColl, cols []*expression.Colum size += float64(chunk.EstimateTypeWidth(col.GetType())) continue } - size += AvgColSizeListInDisk(colHist, coll.RealtimeCount) + size += AvgColSizeDataInDiskByRows(colHist, coll.RealtimeCount) } } - // Add 8 byte for each column's size record. See `ListInDisk` for details. + // Add 8 byte for each column's size record. See `DataInDiskByRows` for details. return size + float64(8*len(cols)) } @@ -160,9 +160,9 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 { return math.Round((avgSize-math.Log2(avgSize))*100)/100 + 8 } -// AvgColSizeListInDisk is the average column size of the histogram. These sizes are derived -// from `chunk.ListInDisk` so we need to update them if those 2 functions are changed. -func AvgColSizeListInDisk(c *statistics.Column, count int64) float64 { +// AvgColSizeDataInDiskByRows is the average column size of the histogram. These sizes are derived +// from `chunk.DataInDiskByRows` so we need to update them if those 2 functions are changed. +func AvgColSizeDataInDiskByRows(c *statistics.Column, count int64) float64 { if count == 0 { return 0 } diff --git a/pkg/planner/cardinality/row_size_test.go b/pkg/planner/cardinality/row_size_test.go index 41d9404676c89..0659bfd1a92d3 100644 --- a/pkg/planner/cardinality/row_size_test.go +++ b/pkg/planner/cardinality/row_size_test.go @@ -40,21 +40,21 @@ func TestAvgColLen(t *testing.T) { tableInfo := tbl.Meta() statsTbl := do.StatsHandle().GetTableStats(tableInfo) require.Equal(t, 1.0, cardinality.AvgColSize(statsTbl.Columns[tableInfo.Columns[0].ID], statsTbl.RealtimeCount, false)) - require.Equal(t, 8.0, cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[0].ID], statsTbl.RealtimeCount)) + require.Equal(t, 8.0, cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[0].ID], statsTbl.RealtimeCount)) require.Equal(t, 8.0, cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[0].ID], statsTbl.RealtimeCount)) // The size of varchar type is LEN + BYTE, here is 1 + 7 = 8 require.Equal(t, 8.0, cardinality.AvgColSize(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount, false)) require.Equal(t, 8.0, cardinality.AvgColSize(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount, false)) require.Equal(t, 8.0, cardinality.AvgColSize(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount, false)) - require.Equal(t, 8.0-3, cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount)) - require.Equal(t, float64(unsafe.Sizeof(float32(12.3))), cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount)) - require.Equal(t, float64(unsafe.Sizeof(types.ZeroTime)), cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount)) + require.Equal(t, 8.0-3, cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount)) + require.Equal(t, float64(unsafe.Sizeof(float32(12.3))), cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount)) + require.Equal(t, float64(unsafe.Sizeof(types.ZeroTime)), cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount)) require.Equal(t, 8.0-3+8, cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount)) require.Equal(t, float64(unsafe.Sizeof(float32(12.3))), cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount)) require.Equal(t, float64(unsafe.Sizeof(types.ZeroTime)), cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount)) require.Equal(t, 8.0, cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[4].ID], statsTbl.RealtimeCount)) - require.Equal(t, 0.0, cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[4].ID], statsTbl.RealtimeCount)) + require.Equal(t, 0.0, cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[4].ID], statsTbl.RealtimeCount)) testKit.MustExec("insert into t values(132, '123456789112', 1232.3, '2018-03-07 19:17:29', NULL)") testKit.MustExec("analyze table t") statsTbl = do.StatsHandle().GetTableStats(tableInfo) @@ -62,14 +62,14 @@ func TestAvgColLen(t *testing.T) { require.Equal(t, 10.5, cardinality.AvgColSize(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount, false)) require.Equal(t, 8.0, cardinality.AvgColSize(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount, false)) require.Equal(t, 8.0, cardinality.AvgColSize(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount, false)) - require.Equal(t, 8.0, cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[0].ID], statsTbl.RealtimeCount)) - require.Equal(t, math.Round((10.5-math.Log2(10.5))*100)/100, cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount)) - require.Equal(t, float64(unsafe.Sizeof(float32(12.3))), cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount)) - require.Equal(t, float64(unsafe.Sizeof(types.ZeroTime)), cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount)) + require.Equal(t, 8.0, cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[0].ID], statsTbl.RealtimeCount)) + require.Equal(t, math.Round((10.5-math.Log2(10.5))*100)/100, cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount)) + require.Equal(t, float64(unsafe.Sizeof(float32(12.3))), cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount)) + require.Equal(t, float64(unsafe.Sizeof(types.ZeroTime)), cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount)) require.Equal(t, 8.0, cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[0].ID], statsTbl.RealtimeCount)) require.Equal(t, math.Round((10.5-math.Log2(10.5))*100)/100+8, cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[1].ID], statsTbl.RealtimeCount)) require.Equal(t, float64(unsafe.Sizeof(float32(12.3))), cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[2].ID], statsTbl.RealtimeCount)) require.Equal(t, float64(unsafe.Sizeof(types.ZeroTime)), cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[3].ID], statsTbl.RealtimeCount)) require.Equal(t, 8.0, cardinality.AvgColSizeChunkFormat(statsTbl.Columns[tableInfo.Columns[4].ID], statsTbl.RealtimeCount)) - require.Equal(t, 0.0, cardinality.AvgColSizeListInDisk(statsTbl.Columns[tableInfo.Columns[4].ID], statsTbl.RealtimeCount)) + require.Equal(t, 0.0, cardinality.AvgColSizeDataInDiskByRows(statsTbl.Columns[tableInfo.Columns[4].ID], statsTbl.RealtimeCount)) } diff --git a/pkg/planner/core/task.go b/pkg/planner/core/task.go index 07c9a92cdac13..ac61f483fca81 100644 --- a/pkg/planner/core/task.go +++ b/pkg/planner/core/task.go @@ -307,7 +307,7 @@ func (p *PhysicalIndexJoin) attach2Task(tasks ...task) task { // RowSize for cost model ver2 is simplified, always use this function to calculate row size. func getAvgRowSize(stats *property.StatsInfo, cols []*expression.Column) (size float64) { if stats.HistColl != nil { - size = cardinality.GetAvgRowSizeListInDisk(stats.HistColl, cols) + size = cardinality.GetAvgRowSizeDataInDiskByRows(stats.HistColl, cols) } else { // Estimate using just the type info. for _, col := range cols { diff --git a/pkg/util/chunk/BUILD.bazel b/pkg/util/chunk/BUILD.bazel index ee4d5eee5d8ec..e8ee18cba64ea 100644 --- a/pkg/util/chunk/BUILD.bazel +++ b/pkg/util/chunk/BUILD.bazel @@ -9,7 +9,6 @@ go_library( "codec.go", "column.go", "compare.go", - "disk.go", "iterator.go", "list.go", "mutrow.go", @@ -17,6 +16,7 @@ go_library( "row.go", "row_container.go", "row_container_reader.go", + "row_in_disk.go", ], importpath = "github.com/pingcap/tidb/pkg/util/chunk", visibility = ["//visibility:public"], @@ -48,13 +48,13 @@ go_test( "chunk_util_test.go", "codec_test.go", "column_test.go", - "disk_test.go", "iterator_test.go", "list_test.go", "main_test.go", "mutrow_test.go", "pool_test.go", "row_container_test.go", + "row_in_disk_test.go", ], embed = [":chunk"], flaky = True, diff --git a/pkg/util/chunk/list.go b/pkg/util/chunk/list.go index 53a52581e74cb..f8246850ecd6b 100644 --- a/pkg/util/chunk/list.go +++ b/pkg/util/chunk/list.go @@ -72,7 +72,7 @@ func (l *List) FieldTypes() []*types.FieldType { return l.fieldTypes } -// NumRowsOfChunk returns the number of rows of a chunk in the ListInDisk. +// NumRowsOfChunk returns the number of rows of a chunk in the DataInDiskByRows. func (l *List) NumRowsOfChunk(chkID int) int { return l.chunks[chkID].NumRows() } diff --git a/pkg/util/chunk/row_container.go b/pkg/util/chunk/row_container.go index 97c268fccecfa..324a2edbb686b 100644 --- a/pkg/util/chunk/row_container.go +++ b/pkg/util/chunk/row_container.go @@ -32,7 +32,7 @@ import ( type rowContainerRecord struct { inMemory *List - inDisk *ListInDisk + inDisk *DataInDiskByRows // spillError stores the error when spilling. spillError error } @@ -152,7 +152,7 @@ func (c *RowContainer) spillToDisk(preSpillError error) { var err error memory.QueryForceDisk.Add(1) n := c.m.records.inMemory.NumChunks() - c.m.records.inDisk = NewListInDisk(c.m.records.inMemory.FieldTypes()) + c.m.records.inDisk = NewDataInDiskByRows(c.m.records.inMemory.FieldTypes()) c.m.records.inDisk.diskTracker.AttachTo(c.diskTracker) defer func() { if r := recover(); r != nil { @@ -221,7 +221,7 @@ func (c *RowContainer) NumRow() int { return c.m.records.inMemory.Len() } -// NumRowsOfChunk returns the number of rows of a chunk in the ListInDisk. +// NumRowsOfChunk returns the number of rows of a chunk in the DataInDiskByRows. func (c *RowContainer) NumRowsOfChunk(chkID int) int { c.m.RLock() defer c.m.RUnlock() diff --git a/pkg/util/chunk/disk.go b/pkg/util/chunk/row_in_disk.go similarity index 85% rename from pkg/util/chunk/disk.go rename to pkg/util/chunk/row_in_disk.go index fa36de5658a31..3fb7d09878d25 100644 --- a/pkg/util/chunk/disk.go +++ b/pkg/util/chunk/row_in_disk.go @@ -30,8 +30,9 @@ import ( "github.com/pingcap/tidb/pkg/util/memory" ) -// ListInDisk represents a slice of chunks storing in temporary disk. -type ListInDisk struct { +// DataInDiskByRows represents some data stored in temporary disk. +// These data are stored in row format, so they can only be restored by rows. +type DataInDiskByRows struct { fieldTypes []*types.FieldType numRowsOfEachChunk []int rowNumOfEachChunkFirstRow []int @@ -97,46 +98,46 @@ func (l *diskFileReaderWriter) getWriter() io.Writer { return l.w } -var defaultChunkListInDiskPath = "chunk.ListInDisk" -var defaultChunkListInDiskOffsetPath = "chunk.ListInDiskOffset" +var defaultChunkDataInDiskByRowsPath = "chunk.DataInDiskByRows" +var defaultChunkDataInDiskByRowsOffsetPath = "chunk.DataInDiskByRowsOffset" -// NewListInDisk creates a new ListInDisk with field types. -func NewListInDisk(fieldTypes []*types.FieldType) *ListInDisk { - l := &ListInDisk{ +// NewDataInDiskByRows creates a new DataInDiskByRows with field types. +func NewDataInDiskByRows(fieldTypes []*types.FieldType) *DataInDiskByRows { + l := &DataInDiskByRows{ fieldTypes: fieldTypes, // TODO(fengliyuan): set the quota of disk usage. - diskTracker: disk.NewTracker(memory.LabelForChunkListInDisk, -1), + diskTracker: disk.NewTracker(memory.LabelForChunkDataInDiskByRows, -1), } return l } -func (l *ListInDisk) initDiskFile() (err error) { +func (l *DataInDiskByRows) initDiskFile() (err error) { err = disk.CheckAndInitTempDir() if err != nil { return } - err = l.dataFile.initWithFileName(defaultChunkListInDiskPath + strconv.Itoa(l.diskTracker.Label())) + err = l.dataFile.initWithFileName(defaultChunkDataInDiskByRowsPath + strconv.Itoa(l.diskTracker.Label())) if err != nil { return } - err = l.offsetFile.initWithFileName(defaultChunkListInDiskOffsetPath + strconv.Itoa(l.diskTracker.Label())) + err = l.offsetFile.initWithFileName(defaultChunkDataInDiskByRowsOffsetPath + strconv.Itoa(l.diskTracker.Label())) return } -// Len returns the number of rows in ListInDisk -func (l *ListInDisk) Len() int { +// Len returns the number of rows in DataInDiskByRows +func (l *DataInDiskByRows) Len() int { return l.totalNumRows } // GetDiskTracker returns the memory tracker of this List. -func (l *ListInDisk) GetDiskTracker() *disk.Tracker { +func (l *DataInDiskByRows) GetDiskTracker() *disk.Tracker { return l.diskTracker } -// Add adds a chunk to the ListInDisk. Caller must make sure the input chk +// Add adds a chunk to the DataInDiskByRows. Caller must make sure the input chk // is not empty and not used any more and has the same field types. // Warning: Do not use Add concurrently. -func (l *ListInDisk) Add(chk *Chunk) (err error) { +func (l *DataInDiskByRows) Add(chk *Chunk) (err error) { if chk.NumRows() == 0 { return errors2.New("chunk appended to List should have at least 1 row") } @@ -169,8 +170,8 @@ func (l *ListInDisk) Add(chk *Chunk) (err error) { return } -// GetChunk gets a Chunk from the ListInDisk by chkIdx. -func (l *ListInDisk) GetChunk(chkIdx int) (*Chunk, error) { +// GetChunk gets a Chunk from the DataInDiskByRows by chkIdx. +func (l *DataInDiskByRows) GetChunk(chkIdx int) (*Chunk, error) { chk := NewChunkWithCapacity(l.fieldTypes, l.NumRowsOfChunk(chkIdx)) chkSize := l.numRowsOfEachChunk[chkIdx] @@ -206,14 +207,14 @@ func (l *ListInDisk) GetChunk(chkIdx int) (*Chunk, error) { return chk, formatChErr } -// GetRow gets a Row from the ListInDisk by RowPtr. -func (l *ListInDisk) GetRow(ptr RowPtr) (row Row, err error) { +// GetRow gets a Row from the DataInDiskByRows by RowPtr. +func (l *DataInDiskByRows) GetRow(ptr RowPtr) (row Row, err error) { row, _, err = l.GetRowAndAppendToChunk(ptr, nil) return row, err } -// GetRowAndAppendToChunk gets a Row from the ListInDisk by RowPtr. Return the Row and the Ref Chunk. -func (l *ListInDisk) GetRowAndAppendToChunk(ptr RowPtr, chk *Chunk) (row Row, _ *Chunk, err error) { +// GetRowAndAppendToChunk gets a Row from the DataInDiskByRows by RowPtr. Return the Row and the Ref Chunk. +func (l *DataInDiskByRows) GetRowAndAppendToChunk(ptr RowPtr, chk *Chunk) (row Row, _ *Chunk, err error) { off, err := l.getOffset(ptr.ChkIdx, ptr.RowIdx) if err != nil { return @@ -228,7 +229,7 @@ func (l *ListInDisk) GetRowAndAppendToChunk(ptr RowPtr, chk *Chunk) (row Row, _ return row, chk, err } -func (l *ListInDisk) getOffset(chkIdx uint32, rowIdx uint32) (int64, error) { +func (l *DataInDiskByRows) getOffset(chkIdx uint32, rowIdx uint32) (int64, error) { offsetInOffsetFile := l.rowNumOfEachChunkFirstRow[chkIdx] + int(rowIdx) b := make([]byte, 8) reader := l.offsetFile.getSectionReader(int64(offsetInOffsetFile) * 8) @@ -242,18 +243,18 @@ func (l *ListInDisk) getOffset(chkIdx uint32, rowIdx uint32) (int64, error) { return bytesToI64Slice(b)[0], nil } -// NumRowsOfChunk returns the number of rows of a chunk in the ListInDisk. -func (l *ListInDisk) NumRowsOfChunk(chkID int) int { +// NumRowsOfChunk returns the number of rows of a chunk in the DataInDiskByRows. +func (l *DataInDiskByRows) NumRowsOfChunk(chkID int) int { return l.numRowsOfEachChunk[chkID] } -// NumChunks returns the number of chunks in the ListInDisk. -func (l *ListInDisk) NumChunks() int { +// NumChunks returns the number of chunks in the DataInDiskByRows. +func (l *DataInDiskByRows) NumChunks() int { return len(l.numRowsOfEachChunk) } // Close releases the disk resource. -func (l *ListInDisk) Close() error { +func (l *DataInDiskByRows) Close() error { if l.dataFile.disk != nil { l.diskTracker.Consume(-l.diskTracker.BytesConsumed()) terror.Call(l.dataFile.disk.Close) @@ -433,7 +434,7 @@ func (format *diskFormatRow) toRow(fields []*types.FieldType, chk *Chunk) (Row, } // ReaderWithCache helps to read data that has not be flushed to underlying layer. -// By using ReaderWithCache, user can still write data into ListInDisk even after reading. +// By using ReaderWithCache, user can still write data into DataInDiskByRows even after reading. type ReaderWithCache struct { r io.ReaderAt cacheOff int64 diff --git a/pkg/util/chunk/disk_test.go b/pkg/util/chunk/row_in_disk_test.go similarity index 88% rename from pkg/util/chunk/disk_test.go rename to pkg/util/chunk/row_in_disk_test.go index 677a964b257b5..933a30834a779 100644 --- a/pkg/util/chunk/disk_test.go +++ b/pkg/util/chunk/row_in_disk_test.go @@ -64,10 +64,10 @@ func initChunks(numChk, numRow int) ([]*Chunk, []*types.FieldType) { return chks, fields } -func TestListInDisk(t *testing.T) { +func TestDataInDiskByRows(t *testing.T) { numChk, numRow := 2, 2 chks, fields := initChunks(numChk, numRow) - l := NewListInDisk(fields) + l := NewDataInDiskByRows(fields) defer func() { err := l.Close() require.NoError(t, err) @@ -92,11 +92,11 @@ func TestListInDisk(t *testing.T) { } } -func BenchmarkListInDiskAdd(b *testing.B) { +func BenchmarkDataInDiskByRowsAdd(b *testing.B) { numChk, numRow := 1, 2 chks, fields := initChunks(numChk, numRow) chk := chks[0] - l := NewListInDisk(fields) + l := NewDataInDiskByRows(fields) defer l.Close() b.ResetTimer() @@ -108,10 +108,10 @@ func BenchmarkListInDiskAdd(b *testing.B) { } } -func BenchmarkListInDiskGetRow(b *testing.B) { +func BenchmarkDataInDiskByRowsGetRow(b *testing.B) { numChk, numRow := 10000, 2 chks, fields := initChunks(numChk, numRow) - l := NewListInDisk(fields) + l := NewDataInDiskByRows(fields) defer l.Close() for _, chk := range chks { err := l.Add(chk) @@ -140,7 +140,7 @@ func BenchmarkListInDiskGetRow(b *testing.B) { } type listInDiskWriteDisk struct { - ListInDisk + DataInDiskByRows } func (l *diskFileReaderWriter) flushForTest() (err error) { @@ -158,8 +158,8 @@ func (l *diskFileReaderWriter) flushForTest() (err error) { return nil } -func newListInDiskWriteDisk(fieldTypes []*types.FieldType) (*listInDiskWriteDisk, error) { - l := listInDiskWriteDisk{*NewListInDisk(fieldTypes)} +func newDataInDiskByRowsWriteDisk(fieldTypes []*types.FieldType) (*listInDiskWriteDisk, error) { + l := listInDiskWriteDisk{*NewDataInDiskByRows(fieldTypes)} disk, err := os.CreateTemp(config.GetGlobalConfig().TempStoragePath, strconv.Itoa(l.diskTracker.Label())) if err != nil { return nil, err @@ -214,12 +214,12 @@ func checkRow(t *testing.T, row1, row2 Row) { } } -func testListInDisk(t *testing.T, concurrency int) { +func testDataInDiskByRows(t *testing.T, concurrency int) { numChk, numRow := 10, 1000 chks, fields := initChunks(numChk, numRow) - lChecksum := NewListInDisk(fields) + lChecksum := NewDataInDiskByRows(fields) defer lChecksum.Close() - lDisk, err := newListInDiskWriteDisk(fields) + lDisk, err := newDataInDiskByRowsWriteDisk(fields) require.NoError(t, err) defer lDisk.Close() for _, chk := range chks { @@ -261,11 +261,11 @@ func testListInDisk(t *testing.T, concurrency int) { wg.Wait() } -func BenchmarkListInDisk_GetChunk(b *testing.B) { +func BenchmarkDataInDiskByRows_GetChunk(b *testing.B) { b.StopTimer() numChk, numRow := 10, 1000 chks, fields := initChunks(numChk, numRow) - l := NewListInDisk(fields) + l := NewDataInDiskByRows(fields) defer l.Close() for _, chk := range chks { _ = l.Add(chk) @@ -278,31 +278,31 @@ func BenchmarkListInDisk_GetChunk(b *testing.B) { } } -func TestListInDiskWithChecksum1(t *testing.T) { +func TestDataInDiskByRowsWithChecksum1(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext }) - testListInDisk(t, 1) + testDataInDiskByRows(t, 1) } -func TestListInDiskWithChecksum2(t *testing.T) { +func TestDataInDiskByRowsWithChecksum2(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext }) - testListInDisk(t, 2) + testDataInDiskByRows(t, 2) } -func TestListInDiskWithChecksum8(t *testing.T) { +func TestDataInDiskByRowsWithChecksum8(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext }) - testListInDisk(t, 8) + testDataInDiskByRows(t, 8) } -func TestListInDiskWithChecksumReaderWithCache(t *testing.T) { +func TestDataInDiskByRowsWithChecksumReaderWithCache(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext @@ -310,7 +310,7 @@ func TestListInDiskWithChecksumReaderWithCache(t *testing.T) { testReaderWithCache(t) } -func TestListInDiskWithChecksumReaderWithCacheNoFlush(t *testing.T) { +func TestDataInDiskByRowsWithChecksumReaderWithCacheNoFlush(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext @@ -318,31 +318,31 @@ func TestListInDiskWithChecksumReaderWithCacheNoFlush(t *testing.T) { testReaderWithCacheNoFlush(t) } -func TestListInDiskWithChecksumAndEncrypt1(t *testing.T) { +func TestDataInDiskByRowsWithChecksumAndEncrypt1(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR }) - testListInDisk(t, 1) + testDataInDiskByRows(t, 1) } -func TestListInDiskWithChecksumAndEncrypt2(t *testing.T) { +func TestDataInDiskByRowsWithChecksumAndEncrypt2(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR }) - testListInDisk(t, 2) + testDataInDiskByRows(t, 2) } -func TestListInDiskWithChecksumAndEncrypt8(t *testing.T) { +func TestDataInDiskByRowsWithChecksumAndEncrypt8(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR }) - testListInDisk(t, 8) + testDataInDiskByRows(t, 8) } -func TestListInDiskWithChecksumAndEncryptReaderWithCache(t *testing.T) { +func TestDataInDiskByRowsWithChecksumAndEncryptReaderWithCache(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR @@ -350,7 +350,7 @@ func TestListInDiskWithChecksumAndEncryptReaderWithCache(t *testing.T) { testReaderWithCache(t) } -func TestListInDiskWithChecksumAndEncryptReaderWithCacheNoFlush(t *testing.T) { +func TestDataInDiskByRowsWithChecksumAndEncryptReaderWithCacheNoFlush(t *testing.T) { defer config.RestoreFunc()() config.UpdateGlobal(func(conf *config.Config) { conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR @@ -383,7 +383,7 @@ func testReaderWithCache(t *testing.T) { field := []*types.FieldType{types.NewFieldType(mysql.TypeString)} chk := NewChunkWithCapacity(field, 1) chk.AppendString(0, buf.String()) - l := NewListInDisk(field) + l := NewDataInDiskByRows(field) err := l.Add(chk) require.NoError(t, err) @@ -453,7 +453,7 @@ func testReaderWithCacheNoFlush(t *testing.T) { field := []*types.FieldType{types.NewFieldType(mysql.TypeString)} chk := NewChunkWithCapacity(field, 1) chk.AppendString(0, testData) - l := NewListInDisk(field) + l := NewDataInDiskByRows(field) err := l.Add(chk) require.NoError(t, err) diff --git a/pkg/util/memory/tracker.go b/pkg/util/memory/tracker.go index a85c7185a2b0f..5b8d8cf101122 100644 --- a/pkg/util/memory/tracker.go +++ b/pkg/util/memory/tracker.go @@ -817,8 +817,8 @@ const ( LabelForChunkList int = -7 // LabelForGlobalSimpleLRUCache represents the label of the Global SimpleLRUCache LabelForGlobalSimpleLRUCache int = -8 - // LabelForChunkListInDisk represents the label of the chunk list in disk - LabelForChunkListInDisk int = -9 + // LabelForChunkDataInDiskByRows represents the label of the chunk list in disk + LabelForChunkDataInDiskByRows int = -9 // LabelForRowContainer represents the label of the row container LabelForRowContainer int = -10 // LabelForGlobalStorage represents the label of the Global Storage