This repository has been archived by the owner on Jul 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 101
restore: merge small ranges #578
Merged
ti-srebot
merged 25 commits into
pingcap:master
from
overvenus:debug-optimize-backupmeta
Dec 24, 2020
Merged
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
fa4d3a1
restore: merge small ranges
overvenus f1298e2
Merge branch 'master' into debug-optimize-backupmeta
overvenus e9c6795
address lints
overvenus 57ecf92
fix panic on empty backup
overvenus 9923efc
fix br_other test
overvenus f2a1d40
address comments
overvenus 8998c1d
address comments
overvenus 410de98
Merge branch 'master' of http://github.com/pingcap/br into debug-opti…
overvenus 0f83629
fix tests
overvenus 22d59a6
Merge branch 'master' into debug-optimize-backupmeta
overvenus 5c3f29f
address lints
overvenus 4ff72de
restore: do not modify file start key and end key
overvenus 9d88692
fix divide by zero
overvenus 6d1c5f5
empty rewrite rules for raw kv restore
overvenus a5955c3
Merge branch 'master' into debug-optimize-backupmeta
overvenus 7218d0a
Merge branch 'master' into debug-optimize-backupmeta
overvenus da86c97
Merge branch 'master' into debug-optimize-backupmeta
overvenus 4c28fab
Merge branch 'master' into debug-optimize-backupmeta
overvenus 42800ae
Merge branch 'master' into debug-optimize-backupmeta
overvenus 0e94162
address comment
overvenus 8e19d4d
restore: do not merge different indexes
overvenus a241278
Merge branch 'debug-optimize-backupmeta' into debug-optimize-backupmeta
overvenus 03bef3c
address lints
overvenus 047d50a
Update pkg/restore/merge.go
overvenus 6adfcee
fix build
overvenus File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. | ||
|
||
package restore | ||
|
||
import ( | ||
"strings" | ||
|
||
"github.com/pingcap/errors" | ||
kvproto "github.com/pingcap/kvproto/pkg/backup" | ||
"github.com/pingcap/tidb/kv" | ||
"github.com/pingcap/tidb/tablecodec" | ||
|
||
berrors "github.com/pingcap/br/pkg/errors" | ||
"github.com/pingcap/br/pkg/rtree" | ||
"github.com/pingcap/br/pkg/utils" | ||
) | ||
|
||
const ( | ||
// DefaultMergeRegionSizeBytes is the default region split size, 96MB. | ||
// See https://github.com/tikv/tikv/blob/v4.0.8/components/raftstore/src/coprocessor/config.rs#L35-L38 | ||
DefaultMergeRegionSizeBytes uint64 = 96 * utils.MB | ||
|
||
// DefaultMergeRegionKeyCount is the default region key count, 960000. | ||
DefaultMergeRegionKeyCount uint64 = 960000 | ||
|
||
writeCFName = "write" | ||
defaultCFName = "default" | ||
) | ||
|
||
// MergeRangesStat holds statistics for the MergeRanges. | ||
type MergeRangesStat struct { | ||
TotalFiles int | ||
TotalWriteCFFile int | ||
TotalDefaultCFFile int | ||
TotalRegions int | ||
RegionKeysAvg int | ||
RegionBytesAvg int | ||
MergedRegions int | ||
MergedRegionKeysAvg int | ||
MergedRegionBytesAvg int | ||
} | ||
|
||
// MergeFileRanges returns ranges of the files are merged based on | ||
// splitSizeBytes and splitKeyCount. | ||
// | ||
// By merging small ranges, it speeds up restoring a backup that contains many | ||
// small ranges (regions) as it reduces split region and scatter region. | ||
func MergeFileRanges( | ||
files []*kvproto.File, splitSizeBytes, splitKeyCount uint64, | ||
) ([]rtree.Range, *MergeRangesStat, error) { | ||
if len(files) == 0 { | ||
return []rtree.Range{}, &MergeRangesStat{}, nil | ||
} | ||
totalBytes := uint64(0) | ||
totalKvs := uint64(0) | ||
totalFiles := len(files) | ||
writeCFFile := 0 | ||
defaultCFFile := 0 | ||
|
||
filesMap := make(map[string][]*kvproto.File) | ||
for _, file := range files { | ||
filesMap[string(file.StartKey)] = append(filesMap[string(file.StartKey)], file) | ||
|
||
// We skips all default cf files because we don't range overlap. | ||
if file.Cf == writeCFName || strings.Contains(file.GetName(), writeCFName) { | ||
writeCFFile++ | ||
} else if file.Cf == defaultCFName || strings.Contains(file.GetName(), defaultCFName) { | ||
defaultCFFile++ | ||
} | ||
totalBytes += file.TotalBytes | ||
totalKvs += file.TotalKvs | ||
} | ||
if writeCFFile == 0 && defaultCFFile == 0 { | ||
return []rtree.Range{}, nil, errors.Annotatef(berrors.ErrRestoreInvalidBackup, | ||
"unknown backup data from neither Wrtie CF nor Default CF") | ||
} | ||
|
||
// RawKV does not have data in write CF. | ||
totalRegions := writeCFFile | ||
if defaultCFFile > writeCFFile { | ||
totalRegions = defaultCFFile | ||
} | ||
|
||
// Check if files are overlapped | ||
rangeTree := rtree.NewRangeTree() | ||
for key := range filesMap { | ||
files := filesMap[key] | ||
if out := rangeTree.InsertRange(rtree.Range{ | ||
StartKey: files[0].GetStartKey(), | ||
EndKey: files[0].GetEndKey(), | ||
Files: files, | ||
}); out != nil { | ||
return nil, nil, errors.Annotatef(berrors.ErrRestoreInvalidRange, | ||
"duplicate range %s files %+v", out, files) | ||
} | ||
} | ||
|
||
needMerge := func(left, right *rtree.Range) bool { | ||
leftBytes, leftKeys := left.BytesAndKeys() | ||
rightBytes, rightKeys := right.BytesAndKeys() | ||
if rightBytes == 0 { | ||
return true | ||
} | ||
if leftBytes+rightBytes > splitSizeBytes { | ||
return false | ||
} | ||
if leftKeys+rightKeys > splitKeyCount { | ||
return false | ||
} | ||
// Do not merge ranges in different tables. | ||
if tablecodec.DecodeTableID(kv.Key(left.StartKey)) != tablecodec.DecodeTableID(kv.Key(right.StartKey)) { | ||
return false | ||
} | ||
// Do not merge ranges in different indexes even if they are in the same | ||
// table, as rewrite rule only supports rewriting one pattern. | ||
// tableID, indexID, indexValues, err | ||
_, indexID1, _, err1 := tablecodec.DecodeIndexKey(kv.Key(left.StartKey)) | ||
_, indexID2, _, err2 := tablecodec.DecodeIndexKey(kv.Key(right.StartKey)) | ||
// If both of them are index keys, ... | ||
if err1 == nil && err2 == nil { | ||
// Merge left and right if they are in the same index. | ||
return indexID1 == indexID2 | ||
} | ||
// Otherwise, merge if they are both record keys | ||
return err1 != nil && err2 != nil | ||
} | ||
sortedRanges := rangeTree.GetSortedRanges() | ||
for i := 1; i < len(sortedRanges); { | ||
if !needMerge(&sortedRanges[i-1], &sortedRanges[i]) { | ||
i++ | ||
continue | ||
} | ||
sortedRanges[i-1].EndKey = sortedRanges[i].EndKey | ||
sortedRanges[i-1].Files = append(sortedRanges[i-1].Files, sortedRanges[i].Files...) | ||
// TODO: this is slow when there are lots of ranges need to merge. | ||
sortedRanges = append(sortedRanges[:i], sortedRanges[i+1:]...) | ||
} | ||
|
||
regionBytesAvg := totalBytes / uint64(totalRegions) | ||
regionKeysAvg := totalKvs / uint64(totalRegions) | ||
mergedRegionBytesAvg := totalBytes / uint64(len(sortedRanges)) | ||
mergedRegionKeysAvg := totalKvs / uint64(len(sortedRanges)) | ||
|
||
return sortedRanges, &MergeRangesStat{ | ||
TotalFiles: totalFiles, | ||
TotalWriteCFFile: writeCFFile, | ||
TotalDefaultCFFile: defaultCFFile, | ||
TotalRegions: totalRegions, | ||
RegionKeysAvg: int(regionKeysAvg), | ||
RegionBytesAvg: int(regionBytesAvg), | ||
MergedRegions: len(sortedRanges), | ||
MergedRegionKeysAvg: int(mergedRegionKeysAvg), | ||
MergedRegionBytesAvg: int(mergedRegionBytesAvg), | ||
}, nil | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we merge index range and row key range with same table id here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's ok as long as they belong to the same table.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we merge record key and index key into one file. we need to find the correct rewrite rule.
br/pkg/restore/import.go
Line 212 in c344d1e
but default rewrite rule starts with record prefix
br/pkg/restore/util.go
Line 73 in c344d1e
and with this rule sst_importer cannot rewrite index key correctly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right, we need to separate ranges by index ID too.