Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

restore: merge small ranges #578

Merged
merged 25 commits into from
Dec 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fa4d3a1
restore: merge small ranges
overvenus Nov 2, 2020
f1298e2
Merge branch 'master' into debug-optimize-backupmeta
overvenus Nov 23, 2020
e9c6795
address lints
overvenus Nov 23, 2020
57ecf92
fix panic on empty backup
overvenus Nov 23, 2020
9923efc
fix br_other test
overvenus Nov 23, 2020
f2a1d40
address comments
overvenus Nov 25, 2020
8998c1d
address comments
overvenus Nov 25, 2020
410de98
Merge branch 'master' of http://github.com/pingcap/br into debug-opti…
overvenus Nov 25, 2020
0f83629
fix tests
overvenus Nov 25, 2020
22d59a6
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 1, 2020
5c3f29f
address lints
overvenus Dec 1, 2020
4ff72de
restore: do not modify file start key and end key
overvenus Dec 2, 2020
9d88692
fix divide by zero
overvenus Dec 3, 2020
6d1c5f5
empty rewrite rules for raw kv restore
overvenus Dec 7, 2020
a5955c3
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 7, 2020
7218d0a
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 8, 2020
da86c97
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 11, 2020
4c28fab
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 18, 2020
42800ae
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 23, 2020
0e94162
address comment
overvenus Dec 23, 2020
8e19d4d
restore: do not merge different indexes
overvenus Dec 23, 2020
a241278
Merge branch 'debug-optimize-backupmeta' into debug-optimize-backupmeta
overvenus Dec 23, 2020
03bef3c
address lints
overvenus Dec 23, 2020
047d50a
Update pkg/restore/merge.go
overvenus Dec 24, 2020
6adfcee
fix build
overvenus Dec 24, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions cmd/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,18 @@ origin sha256 is %s`,

func newBackupMetaCommand() *cobra.Command {
command := &cobra.Command{
Use: "backupmeta",
Short: "check the backup meta",
Args: cobra.NoArgs,
Use: "backupmeta",
Short: "utilities of backupmeta",
SilenceUsage: false,
}
command.AddCommand(newBackupMetaValidateCommand())
return command
}

func newBackupMetaValidateCommand() *cobra.Command {
command := &cobra.Command{
Use: "validate",
Short: "validate key range and rewrite rules of backupmeta",
RunE: func(cmd *cobra.Command, _ []string) error {
ctx, cancel := context.WithCancel(GetDefaultContext())
defer cancel()
Expand Down Expand Up @@ -234,7 +243,6 @@ func newBackupMetaCommand() *cobra.Command {
},
}
command.Flags().Uint64("offset", 0, "the offset of table id alloctor")
command.Hidden = true
return command
}

Expand Down
4 changes: 3 additions & 1 deletion pkg/backup/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,9 @@ func appendRanges(tbl *model.TableInfo, tblID int64) ([]kv.KeyRange, error) {
return kvRanges, nil
}

// BuildBackupRangeAndSchema gets the range and schema of tables.
// BuildBackupRangeAndSchema gets KV range and schema of tables.
// KV ranges are separated by Table IDs.
// Also, KV ranges are separated by Index IDs in the same table.
func BuildBackupRangeAndSchema(
dom *domain.Domain,
storage kv.Storage,
Expand Down
3 changes: 0 additions & 3 deletions pkg/restore/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,6 @@ func (rc *Client) createTable(
if rc.IsSkipCreateSQL() {
log.Info("skip create table and alter autoIncID", zap.Stringer("table", table.Info.Name))
} else {
// don't use rc.ctx here...
// remove the ctx field of Client would be a great work,
// we just take a small step here :<
err := db.CreateTable(ctx, table)
if err != nil {
return CreatedTable{}, errors.Trace(err)
Expand Down
155 changes: 155 additions & 0 deletions pkg/restore/merge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package restore

import (
"strings"

"github.com/pingcap/errors"
kvproto "github.com/pingcap/kvproto/pkg/backup"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/tablecodec"

berrors "github.com/pingcap/br/pkg/errors"
"github.com/pingcap/br/pkg/rtree"
"github.com/pingcap/br/pkg/utils"
)

const (
// DefaultMergeRegionSizeBytes is the default region split size, 96MB.
// See https://github.com/tikv/tikv/blob/v4.0.8/components/raftstore/src/coprocessor/config.rs#L35-L38
DefaultMergeRegionSizeBytes uint64 = 96 * utils.MB

// DefaultMergeRegionKeyCount is the default region key count, 960000.
DefaultMergeRegionKeyCount uint64 = 960000

writeCFName = "write"
defaultCFName = "default"
)

// MergeRangesStat holds statistics for the MergeRanges.
type MergeRangesStat struct {
TotalFiles int
TotalWriteCFFile int
TotalDefaultCFFile int
TotalRegions int
RegionKeysAvg int
RegionBytesAvg int
MergedRegions int
MergedRegionKeysAvg int
MergedRegionBytesAvg int
}

// MergeFileRanges returns ranges of the files are merged based on
// splitSizeBytes and splitKeyCount.
//
// By merging small ranges, it speeds up restoring a backup that contains many
// small ranges (regions) as it reduces split region and scatter region.
func MergeFileRanges(
files []*kvproto.File, splitSizeBytes, splitKeyCount uint64,
) ([]rtree.Range, *MergeRangesStat, error) {
if len(files) == 0 {
return []rtree.Range{}, &MergeRangesStat{}, nil
}
totalBytes := uint64(0)
totalKvs := uint64(0)
totalFiles := len(files)
writeCFFile := 0
defaultCFFile := 0

filesMap := make(map[string][]*kvproto.File)
for _, file := range files {
filesMap[string(file.StartKey)] = append(filesMap[string(file.StartKey)], file)

// We skips all default cf files because we don't range overlap.
if file.Cf == writeCFName || strings.Contains(file.GetName(), writeCFName) {
writeCFFile++
} else if file.Cf == defaultCFName || strings.Contains(file.GetName(), defaultCFName) {
defaultCFFile++
}
totalBytes += file.TotalBytes
totalKvs += file.TotalKvs
}
if writeCFFile == 0 && defaultCFFile == 0 {
return []rtree.Range{}, nil, errors.Annotatef(berrors.ErrRestoreInvalidBackup,
"unknown backup data from neither Wrtie CF nor Default CF")
}

// RawKV does not have data in write CF.
totalRegions := writeCFFile
if defaultCFFile > writeCFFile {
totalRegions = defaultCFFile
}

// Check if files are overlapped
rangeTree := rtree.NewRangeTree()
for key := range filesMap {
files := filesMap[key]
if out := rangeTree.InsertRange(rtree.Range{
StartKey: files[0].GetStartKey(),
EndKey: files[0].GetEndKey(),
Files: files,
}); out != nil {
return nil, nil, errors.Annotatef(berrors.ErrRestoreInvalidRange,
"duplicate range %s files %+v", out, files)
}
}

needMerge := func(left, right *rtree.Range) bool {
leftBytes, leftKeys := left.BytesAndKeys()
rightBytes, rightKeys := right.BytesAndKeys()
if rightBytes == 0 {
return true
}
if leftBytes+rightBytes > splitSizeBytes {
return false
}
if leftKeys+rightKeys > splitKeyCount {
return false
}
// Do not merge ranges in different tables.
if tablecodec.DecodeTableID(kv.Key(left.StartKey)) != tablecodec.DecodeTableID(kv.Key(right.StartKey)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we merge index range and row key range with same table id here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's ok as long as they belong to the same table.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we merge record key and index key into one file. we need to find the correct rewrite rule.

startKey, endKey, err = rewriteFileKeys(file, rewriteRules)

but default rewrite rule starts with record prefix
OldKeyPrefix: append(tablecodec.EncodeTablePrefix(oldTableID), recordPrefixSep...),

and with this rule sst_importer cannot rewrite index key correctly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, we need to separate ranges by index ID too.

return false
}
// Do not merge ranges in different indexes even if they are in the same
// table, as rewrite rule only supports rewriting one pattern.
// tableID, indexID, indexValues, err
_, indexID1, _, err1 := tablecodec.DecodeIndexKey(kv.Key(left.StartKey))
_, indexID2, _, err2 := tablecodec.DecodeIndexKey(kv.Key(right.StartKey))
// If both of them are index keys, ...
if err1 == nil && err2 == nil {
// Merge left and right if they are in the same index.
return indexID1 == indexID2
}
// Otherwise, merge if they are both record keys
return err1 != nil && err2 != nil
}
sortedRanges := rangeTree.GetSortedRanges()
for i := 1; i < len(sortedRanges); {
if !needMerge(&sortedRanges[i-1], &sortedRanges[i]) {
i++
continue
}
sortedRanges[i-1].EndKey = sortedRanges[i].EndKey
sortedRanges[i-1].Files = append(sortedRanges[i-1].Files, sortedRanges[i].Files...)
// TODO: this is slow when there are lots of ranges need to merge.
sortedRanges = append(sortedRanges[:i], sortedRanges[i+1:]...)
}

regionBytesAvg := totalBytes / uint64(totalRegions)
regionKeysAvg := totalKvs / uint64(totalRegions)
mergedRegionBytesAvg := totalBytes / uint64(len(sortedRanges))
mergedRegionKeysAvg := totalKvs / uint64(len(sortedRanges))

return sortedRanges, &MergeRangesStat{
TotalFiles: totalFiles,
TotalWriteCFFile: writeCFFile,
TotalDefaultCFFile: defaultCFFile,
TotalRegions: totalRegions,
RegionKeysAvg: int(regionKeysAvg),
RegionBytesAvg: int(regionBytesAvg),
MergedRegions: len(sortedRanges),
MergedRegionKeysAvg: int(mergedRegionKeysAvg),
MergedRegionBytesAvg: int(mergedRegionBytesAvg),
}, nil
}
Loading