Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

restore: merge small ranges #578

Merged
merged 25 commits into from
Dec 24, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fa4d3a1
restore: merge small ranges
overvenus Nov 2, 2020
f1298e2
Merge branch 'master' into debug-optimize-backupmeta
overvenus Nov 23, 2020
e9c6795
address lints
overvenus Nov 23, 2020
57ecf92
fix panic on empty backup
overvenus Nov 23, 2020
9923efc
fix br_other test
overvenus Nov 23, 2020
f2a1d40
address comments
overvenus Nov 25, 2020
8998c1d
address comments
overvenus Nov 25, 2020
410de98
Merge branch 'master' of http://github.com/pingcap/br into debug-opti…
overvenus Nov 25, 2020
0f83629
fix tests
overvenus Nov 25, 2020
22d59a6
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 1, 2020
5c3f29f
address lints
overvenus Dec 1, 2020
4ff72de
restore: do not modify file start key and end key
overvenus Dec 2, 2020
9d88692
fix divide by zero
overvenus Dec 3, 2020
6d1c5f5
empty rewrite rules for raw kv restore
overvenus Dec 7, 2020
a5955c3
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 7, 2020
7218d0a
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 8, 2020
da86c97
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 11, 2020
4c28fab
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 18, 2020
42800ae
Merge branch 'master' into debug-optimize-backupmeta
overvenus Dec 23, 2020
0e94162
address comment
overvenus Dec 23, 2020
8e19d4d
restore: do not merge different indexes
overvenus Dec 23, 2020
a241278
Merge branch 'debug-optimize-backupmeta' into debug-optimize-backupmeta
overvenus Dec 23, 2020
03bef3c
address lints
overvenus Dec 23, 2020
047d50a
Update pkg/restore/merge.go
overvenus Dec 24, 2020
6adfcee
fix build
overvenus Dec 24, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 68 additions & 4 deletions cmd/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"io/ioutil"
"path"
"reflect"

Expand Down Expand Up @@ -142,9 +143,73 @@ origin sha256 is %s`,

func newBackupMetaCommand() *cobra.Command {
command := &cobra.Command{
Use: "backupmeta",
Short: "check the backup meta",
Args: cobra.NoArgs,
Use: "backupmeta",
Short: "utilities of backupmeta",
SilenceUsage: false,
}
command.AddCommand(newBackupMetaValidateCommand())
command.AddCommand(newBackupMetaMergeCommand())
return command
}

func newBackupMetaMergeCommand() *cobra.Command {
command := &cobra.Command{
Use: "merge",
Short: "Merge empty or small ranges in backupmeta",
RunE: func(cmd *cobra.Command, _ []string) error {
ctx, cancel := context.WithCancel(GetDefaultContext())
defer cancel()

output, err := cmd.Flags().GetString("output")
if err != nil {
return err
}

var cfg task.Config
if err = cfg.ParseFromFlags(cmd.Flags()); err != nil {
return err
}
_, _, backupMeta, err := task.ReadBackupMeta(ctx, utils.MetaFile, &cfg)
if err != nil {
log.Error("read backupmeta failed", zap.Error(err))
return err
}
mergedRangesStats, err := restore.MergeRanges(backupMeta)
if err != nil {
return err
}
backupMetaData, err := proto.Marshal(backupMeta)
if err != nil {
return err
}
err = ioutil.WriteFile(output, backupMetaData, 0644) // nolint:gosec
if err != nil {
return err
}

cmd.Println("Reduce file done",
"\nFiles: ", mergedRangesStats.TotalFiles,
"\n Write CF: ", mergedRangesStats.TotalWriteCFFile,
"\n Default CF: ", mergedRangesStats.TotalDefaultCFFile,
"\nRegions: ", mergedRangesStats.TotalRegions,
"\nKeys avg: ", mergedRangesStats.RegionKeysAvg,
"\nBytes avg: ", mergedRangesStats.RegionBytesAvg, "(byte)",
"\nMerged files: ", mergedRangesStats.MergedFiles,
"\nMerged regions: ", mergedRangesStats.MergedRegions,
"\nMerged keys avg: ", mergedRangesStats.MergedRegionKeysAvg,
"\nMerged bytes avg: ", mergedRangesStats.MergedRegionBytesAvg, "(byte)",
)
return nil
},
}
command.Flags().StringP("output", "o", "", "write reduced backupmeta to a given path")
return command
}

func newBackupMetaValidateCommand() *cobra.Command {
command := &cobra.Command{
Use: "validate",
Short: "validate key range and rewrite rules of backupmeta",
RunE: func(cmd *cobra.Command, _ []string) error {
ctx, cancel := context.WithCancel(GetDefaultContext())
defer cancel()
Expand Down Expand Up @@ -233,7 +298,6 @@ func newBackupMetaCommand() *cobra.Command {
},
}
command.Flags().Uint64("offset", 0, "the offset of table id alloctor")
command.Hidden = true
return command
}

Expand Down
134 changes: 134 additions & 0 deletions pkg/restore/merge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package restore

import (
"github.com/pingcap/errors"
kvproto "github.com/pingcap/kvproto/pkg/backup"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/tablecodec"

berrors "github.com/pingcap/br/pkg/errors"
"github.com/pingcap/br/pkg/rtree"
"github.com/pingcap/br/pkg/utils"
)

// MergeRangesStat holds statistics for the MergeRanges.
type MergeRangesStat struct {
TotalFiles int
TotalWriteCFFile int
TotalDefaultCFFile int
TotalRegions int
RegionKeysAvg int
RegionBytesAvg int
MergedFiles int
MergedRegions int
MergedRegionKeysAvg int
MergedRegionBytesAvg int
}

// MergeRanges merges small ranges into a bigger one.
// It speeds up restoring a backup that contains many small ranges (regions),
// as it reduces split region and scatter region.
// Note: this function modify backupMeta in place.
func MergeRanges(backupMeta *kvproto.BackupMeta) (*MergeRangesStat, error) {
// Skip if the backup is empty.
if len(backupMeta.Files) == 0 {
return &MergeRangesStat{}, nil
}
totalBytes := uint64(0)
totalKvs := uint64(0)
totalFiles := len(backupMeta.Files)
writeCFFile := 0
defaultCFFile := 0
filesMap := make(map[string][]*kvproto.File)
for i := range backupMeta.Files {
file := backupMeta.Files[i]
filesMap[string(file.StartKey)] = append(filesMap[string(file.StartKey)], file)
if file.Cf == "write" {
writeCFFile++
} else {
defaultCFFile++
}
totalBytes += file.TotalBytes
totalKvs += file.TotalKvs
}
rangeTree := rtree.NewRangeTree()
// Check if files are overlapped
for key := range filesMap {
files := filesMap[key]
if out := rangeTree.InsertRange(rtree.Range{
StartKey: files[0].GetStartKey(),
EndKey: files[0].GetEndKey(),
Files: files,
}); out != nil {
return nil, errors.Annotatef(berrors.ErrRestoreInvalidRange,
"duplicate range %s files %+v", out, files)
}
}

needMerge := func(left, right *rtree.Range) bool {
// See https://github.com/tikv/tikv/blob/v4.0.8/components/raftstore/src/coprocessor/config.rs#L35-L38
const (
// splitSizeMB is the default region split size.
splitSizeMB uint64 = 96 * utils.MB
// splitKeys is the default region split key count.
splitKeys uint64 = 960000
)
leftBytes, leftKeys := left.BytesAndKeys()
rightBytes, rightKeys := right.BytesAndKeys()
if rightBytes == 0 {
return true
}
if leftBytes+rightBytes > splitSizeMB {
return false
}
if leftKeys+rightKeys > splitKeys {
return false
}
// Do not merge ranges in different tables.
if tablecodec.DecodeTableID(kv.Key(left.StartKey)) != tablecodec.DecodeTableID(kv.Key(right.StartKey)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we merge index range and row key range with same table id here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's ok as long as they belong to the same table.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we merge record key and index key into one file. we need to find the correct rewrite rule.

startKey, endKey, err = rewriteFileKeys(file, rewriteRules)

but default rewrite rule starts with record prefix
OldKeyPrefix: append(tablecodec.EncodeTablePrefix(oldTableID), recordPrefixSep...),

and with this rule sst_importer cannot rewrite index key correctly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, we need to separate ranges by index ID too.

return false
}
return true
}
sortedRanges := rangeTree.GetSortedRanges()
for i := 1; i < len(sortedRanges); {
if !needMerge(&sortedRanges[i-1], &sortedRanges[i]) {
i++
continue
}
sortedRanges[i-1].EndKey = sortedRanges[i].EndKey
startKey, endKey := sortedRanges[i-1].StartKey, sortedRanges[i-1].EndKey
sortedRanges[i-1].Files = append(sortedRanges[i-1].Files, sortedRanges[i].Files...)
for j := range sortedRanges[i-1].Files {
sortedRanges[i-1].Files[j].StartKey = startKey
sortedRanges[i-1].Files[j].EndKey = endKey
}
// TODO: this is slow when there are lots of ranges need to merge.
sortedRanges = append(sortedRanges[:i], sortedRanges[i+1:]...)
}

backupMeta.Files = backupMeta.Files[:0]
for i := range sortedRanges {
backupMeta.Files = append(backupMeta.Files, sortedRanges[i].Files...)
}

regionBytesAvg := totalBytes / uint64(writeCFFile)
regionKeysAvg := totalKvs / uint64(writeCFFile)
mergedRegionBytesAvg := totalBytes / uint64(len(sortedRanges))
mergedRegionKeysAvg := totalKvs / uint64(len(sortedRanges))

return &MergeRangesStat{
TotalFiles: totalFiles,
TotalWriteCFFile: writeCFFile,
TotalDefaultCFFile: defaultCFFile,
TotalRegions: writeCFFile,
RegionKeysAvg: int(regionKeysAvg),
RegionBytesAvg: int(regionBytesAvg),
MergedFiles: len(backupMeta.Files),
MergedRegions: len(sortedRanges),
MergedRegionKeysAvg: int(mergedRegionKeysAvg),
MergedRegionBytesAvg: int(mergedRegionBytesAvg),
}, nil
}
Loading