From dea291427631555246a02f22965cd096006ef535 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Wed, 14 Jun 2023 19:29:29 +0800 Subject: [PATCH 01/16] hot region Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/mock/mockcluster/mockcluster.go | 4 +- pkg/schedule/schedulers/hot_region.go | 79 ++++++++++++++++------ pkg/schedule/schedulers/hot_region_test.go | 50 +++++++++++++- pkg/schedule/schedulers/utils.go | 7 +- 4 files changed, 114 insertions(+), 26 deletions(-) diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index 5ff91567f66..765b6869205 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -828,8 +828,8 @@ func (mc *Cluster) MockRegionInfo(regionID uint64, leaderStoreID uint64, followerStoreIDs, learnerStoreIDs []uint64, epoch *metapb.RegionEpoch) *core.RegionInfo { region := &metapb.Region{ Id: regionID, - StartKey: []byte(fmt.Sprintf("%20d", regionID)), - EndKey: []byte(fmt.Sprintf("%20d", regionID+1)), + StartKey: []byte(fmt.Sprintf("%20d0", regionID)), + EndKey: []byte(fmt.Sprintf("%20d0", regionID+1)), RegionEpoch: epoch, } var leader *metapb.Peer diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index af8f5310f49..2f8a465d5d2 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -62,6 +62,7 @@ var ( hotSchedulerRegionBucketsNotHotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_buckets_not_hot") hotSchedulerSplitSuccessCounter = schedulerCounter.WithLabelValues(HotRegionName, "split_success") hotSchedulerNeedSplitBeforeScheduleCounter = schedulerCounter.WithLabelValues(HotRegionName, "need_split_before_move_peer") + hotSchedulerRegionIsTooHotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_is_too_hot") hotSchedulerMoveLeaderCounter = schedulerCounter.WithLabelValues(HotRegionName, moveLeader.String()) hotSchedulerMovePeerCounter = schedulerCounter.WithLabelValues(HotRegionName, movePeer.String()) @@ -159,21 +160,23 @@ func (h *baseHotScheduler) prepareForBalance(rw statistics.RWType, cluster sche. // It makes each dim rate or count become `weight` times to the origin value. func (h *baseHotScheduler) summaryPendingInfluence() { for id, p := range h.regionPendings { - from := h.stInfos[p.from] - to := h.stInfos[p.to] - maxZombieDur := p.maxZombieDuration - weight, needGC := calcPendingInfluence(p.op, maxZombieDur) - - if needGC { - delete(h.regionPendings, id) - continue - } + for _, from := range p.froms { + from := h.stInfos[from] + to := h.stInfos[p.to] + maxZombieDur := p.maxZombieDuration + weight, needGC := calcPendingInfluence(p.op, maxZombieDur) + + if needGC { + delete(h.regionPendings, id) + continue + } - if from != nil && weight > 0 { - from.AddInfluence(&p.origin, -weight) - } - if to != nil && weight > 0 { - to.AddInfluence(&p.origin, weight) + if from != nil && weight > 0 { + from.AddInfluence(&p.origin, -weight) + } + if to != nil && weight > 0 { + to.AddInfluence(&p.origin, weight) + } } } for storeID, info := range h.stInfos { @@ -294,7 +297,7 @@ func (h *hotScheduler) dispatch(typ statistics.RWType, cluster sche.ClusterInfor return nil } -func (h *hotScheduler) tryAddPendingInfluence(op *operator.Operator, srcStore, dstStore uint64, infl statistics.Influence, maxZombieDur time.Duration) bool { +func (h *hotScheduler) tryAddPendingInfluence(op *operator.Operator, srcStore []uint64, dstStore uint64, infl statistics.Influence, maxZombieDur time.Duration) bool { regionID := op.RegionID() _, ok := h.regionPendings[regionID] if ok { @@ -664,6 +667,17 @@ func (bs *balanceSolver) solve() []*operator.Operator { } } bs.cur.mainPeerStat = mainPeerStat + if regionNeedSplit(srcStore, mainPeerStat) { + hotSchedulerRegionIsTooHotCounter.Inc() + ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}) + if len(ops) > 0 { + bs.best = &solution{} + bs.ops = ops + bs.cur.calcPeersRate(bs.firstPriority, bs.secondPriority) + bs.best = bs.cur + return ops + } + } for _, dstStore := range bs.filterDstStores() { bs.cur.dstStore = dstStore @@ -723,7 +737,8 @@ func (bs *balanceSolver) tryAddPendingInfluence() bool { if bs.best == nil || len(bs.ops) == 0 { return false } - if bs.best.srcStore.IsTiFlash() != bs.best.dstStore.IsTiFlash() { + isSplit := bs.ops[0].Kind() == operator.OpSplit + if !isSplit && bs.best.srcStore.IsTiFlash() != bs.best.dstStore.IsTiFlash() { hotSchedulerNotSameEngineCounter.Inc() return false } @@ -731,16 +746,29 @@ func (bs *balanceSolver) tryAddPendingInfluence() bool { // TODO: Process operators atomically. // main peer - srcStoreID := bs.best.srcStore.GetID() - dstStoreID := bs.best.dstStore.GetID() + + srcStoreIDs := make([]uint64, 0) + dstStoreID := uint64(0) + if isSplit { + region := bs.GetRegion(bs.ops[0].RegionID()) + for id, _ := range region.GetStoreIDs() { + srcStoreIDs = append(srcStoreIDs, id) + } + } else { + srcStoreIDs = append(srcStoreIDs, bs.best.srcStore.GetID()) + dstStoreID = bs.best.dstStore.GetID() + } infl := bs.collectPendingInfluence(bs.best.mainPeerStat) - if !bs.sche.tryAddPendingInfluence(bs.ops[0], srcStoreID, dstStoreID, infl, maxZombieDur) { + if !bs.sche.tryAddPendingInfluence(bs.ops[0], srcStoreIDs, dstStoreID, infl, maxZombieDur) { return false } + if isSplit { + return true + } // revert peers if bs.best.revertPeerStat != nil && len(bs.ops) > 1 { infl := bs.collectPendingInfluence(bs.best.revertPeerStat) - if !bs.sche.tryAddPendingInfluence(bs.ops[1], dstStoreID, srcStoreID, infl, maxZombieDur) { + if !bs.sche.tryAddPendingInfluence(bs.ops[1], srcStoreIDs, dstStoreID, infl, maxZombieDur) { return false } } @@ -1529,6 +1557,10 @@ func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo) []*oper for _, region := range regions { createFunc(region) } + // the split buckets's priority is highest + if len(operators) > 0 { + bs.cur.progressiveRank = -5 + } return operators } @@ -1789,3 +1821,10 @@ func dimToString(dim int) string { func prioritiesToDim(priorities []string) (firstPriority int, secondPriority int) { return stringToDim(priorities[0]), stringToDim(priorities[1]) } + +// regionNeedSplit returns true if any dim of the hot region is greater than the store threshold. +func regionNeedSplit(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { + return slice.AnyOf(store.LoadPred.Current.Loads, func(i int) bool { + return region.Loads[i] > store.LoadPred.Current.Loads[i]*0.3 + }) +} diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 3bb6df6b273..2c5f67bec8a 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -16,6 +16,7 @@ package schedulers import ( "encoding/hex" + "fmt" "math" "testing" "time" @@ -141,7 +142,7 @@ func checkGCPendingOpInfos(re *require.Assertions, enablePlacementRules bool) { op.Start() op.SetStatusReachTime(operator.CREATED, time.Now().Add(-5*statistics.StoreHeartBeatReportInterval*time.Second)) op.SetStatusReachTime(operator.STARTED, time.Now().Add((-5*statistics.StoreHeartBeatReportInterval+1)*time.Second)) - return newPendingInfluence(op, 2, 4, statistics.Influence{}, hb.conf.GetStoreStatZombieDuration()) + return newPendingInfluence(op, []uint64{2}, 4, statistics.Influence{}, hb.conf.GetStoreStatZombieDuration()) } justDoneOpInfluence := func(region *core.RegionInfo, ty opType) *pendingInfluence { infl := notDoneOpInfluence(region, ty) @@ -202,6 +203,53 @@ func TestHotWriteRegionScheduleByteRateOnly(t *testing.T) { checkHotWriteRegionScheduleByteRateOnly(re, true /* enable placement rules */) } +func TestSplitRegionInIfRegionIsTooHot(t *testing.T) { + re := require.New(t) + statistics.Denoising = false + cancel, _, tc, oc := prepareSchedulersTest() + defer cancel() + tc.SetHotRegionCacheHitsThreshold(1) + hb, err := CreateScheduler(statistics.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) + re.NoError(err) + // the hot range is [a,c],[e,f] + b := &metapb.Buckets{ + RegionId: 1, + PeriodInMs: 1000, + Keys: [][]byte{[]byte(fmt.Sprintf("%21d", 11)), + []byte(fmt.Sprintf("%21d", 12)), + []byte(fmt.Sprintf("%21d", 13)), + []byte(fmt.Sprintf("%21d", 14)), + []byte(fmt.Sprintf("%21d", 15)), + []byte(fmt.Sprintf("%21d", 16))}, + Stats: &metapb.BucketStats{ + ReadBytes: []uint64{10 * units.KiB, 10 * units.KiB, 0, 10 * units.KiB, 10 * units.KiB}, + ReadKeys: []uint64{256, 256, 0, 256, 256}, + ReadQps: []uint64{0, 0, 0, 0, 0}, + WriteBytes: []uint64{0, 0, 0, 0, 0}, + WriteQps: []uint64{0, 0, 0, 0, 0}, + WriteKeys: []uint64{0, 0, 0, 0, 0}, + }, + } + + task := buckets.NewCheckPeerTask(b) + re.True(tc.HotBucketCache.CheckAsync(task)) + time.Sleep(time.Millisecond * 10) + + tc.AddRegionStore(1, 3) + tc.AddRegionStore(2, 2) + tc.AddRegionStore(3, 2) + + tc.UpdateStorageReadBytes(1, 6*units.MiB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadBytes(2, 1*units.MiB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadBytes(3, 1*units.MiB*statistics.StoreHeartBeatReportInterval) + // Region 1, 2 and 3 are hot regions. + addRegionInfo(tc, statistics.Read, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 4 * units.MiB, 0, 0}, + }) + ops, _ := hb.Schedule(tc, false) + re.Len(ops, 1) +} + func TestSplitBuckets(t *testing.T) { re := require.New(t) statistics.Denoising = false diff --git a/pkg/schedule/schedulers/utils.go b/pkg/schedule/schedulers/utils.go index 45c3ada135f..fa6384632b4 100644 --- a/pkg/schedule/schedulers/utils.go +++ b/pkg/schedule/schedulers/utils.go @@ -236,15 +236,16 @@ func getKeyRanges(args []string) ([]core.KeyRange, error) { type pendingInfluence struct { op *operator.Operator - from, to uint64 + froms []uint64 + to uint64 origin statistics.Influence maxZombieDuration time.Duration } -func newPendingInfluence(op *operator.Operator, from, to uint64, infl statistics.Influence, maxZombieDur time.Duration) *pendingInfluence { +func newPendingInfluence(op *operator.Operator, froms []uint64, to uint64, infl statistics.Influence, maxZombieDur time.Duration) *pendingInfluence { return &pendingInfluence{ op: op, - from: from, + froms: froms, to: to, origin: infl, maxZombieDuration: maxZombieDur, From f1676d4734ddc813107c9d4950d184b87b2019b0 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Thu, 15 Jun 2023 18:27:07 +0800 Subject: [PATCH 02/16] add test for write Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 7 ++--- pkg/schedule/schedulers/hot_region_test.go | 36 ++++++++++++++-------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 2f8a465d5d2..d10df0b6d5f 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -667,11 +667,10 @@ func (bs *balanceSolver) solve() []*operator.Operator { } } bs.cur.mainPeerStat = mainPeerStat - if regionNeedSplit(srcStore, mainPeerStat) { + if regionIsTooHot(srcStore, mainPeerStat) { hotSchedulerRegionIsTooHotCounter.Inc() ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}) if len(ops) > 0 { - bs.best = &solution{} bs.ops = ops bs.cur.calcPeersRate(bs.firstPriority, bs.secondPriority) bs.best = bs.cur @@ -1822,8 +1821,8 @@ func prioritiesToDim(priorities []string) (firstPriority int, secondPriority int return stringToDim(priorities[0]), stringToDim(priorities[1]) } -// regionNeedSplit returns true if any dim of the hot region is greater than the store threshold. -func regionNeedSplit(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { +// regionIsTooHot returns true if any dim of the hot region is greater than the store threshold. +func regionIsTooHot(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { return slice.AnyOf(store.LoadPred.Current.Loads, func(i int) bool { return region.Loads[i] > store.LoadPred.Current.Loads[i]*0.3 }) diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 2c5f67bec8a..ccf51c0fc18 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -211,23 +211,18 @@ func TestSplitRegionInIfRegionIsTooHot(t *testing.T) { tc.SetHotRegionCacheHitsThreshold(1) hb, err := CreateScheduler(statistics.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) - // the hot range is [a,c],[e,f] b := &metapb.Buckets{ RegionId: 1, PeriodInMs: 1000, Keys: [][]byte{[]byte(fmt.Sprintf("%21d", 11)), - []byte(fmt.Sprintf("%21d", 12)), - []byte(fmt.Sprintf("%21d", 13)), - []byte(fmt.Sprintf("%21d", 14)), - []byte(fmt.Sprintf("%21d", 15)), - []byte(fmt.Sprintf("%21d", 16))}, + []byte(fmt.Sprintf("%21d", 12))}, Stats: &metapb.BucketStats{ - ReadBytes: []uint64{10 * units.KiB, 10 * units.KiB, 0, 10 * units.KiB, 10 * units.KiB}, - ReadKeys: []uint64{256, 256, 0, 256, 256}, - ReadQps: []uint64{0, 0, 0, 0, 0}, - WriteBytes: []uint64{0, 0, 0, 0, 0}, - WriteQps: []uint64{0, 0, 0, 0, 0}, - WriteKeys: []uint64{0, 0, 0, 0, 0}, + ReadBytes: []uint64{10 * units.KiB}, + ReadKeys: []uint64{256}, + ReadQps: []uint64{0}, + WriteBytes: []uint64{0}, + WriteQps: []uint64{0}, + WriteKeys: []uint64{0}, }, } @@ -248,6 +243,23 @@ func TestSplitRegionInIfRegionIsTooHot(t *testing.T) { }) ops, _ := hb.Schedule(tc, false) re.Len(ops, 1) + re.Equal(operator.OpSplit, ops[0].Kind()) + ops, _ = hb.Schedule(tc, false) + re.Len(ops, 0) + + tc.UpdateStorageWrittenBytes(1, 6*units.MiB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(2, 1*units.MiB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(3, 1*units.MiB*statistics.StoreHeartBeatReportInterval) + // Region 1, 2 and 3 are hot regions. + addRegionInfo(tc, statistics.Write, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 4 * units.MiB, 0, 0}, + }) + hb, err = CreateScheduler(statistics.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) + ops, _ = hb.Schedule(tc, false) + re.Len(ops, 1) + re.Equal(operator.OpSplit, ops[0].Kind()) + ops, _ = hb.Schedule(tc, false) + re.Len(ops, 0) } func TestSplitBuckets(t *testing.T) { From 9fe27cf478c474b79f86021c9b66a4e0431f7eb7 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Fri, 16 Jun 2023 10:32:51 +0800 Subject: [PATCH 03/16] extra const variable Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index d10df0b6d5f..e6aaec86bf0 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -206,6 +206,7 @@ const ( minHotScheduleInterval = time.Second maxHotScheduleInterval = 20 * time.Second + regionTooHotThreshold = 0.3 ) var ( @@ -667,7 +668,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { } } bs.cur.mainPeerStat = mainPeerStat - if regionIsTooHot(srcStore, mainPeerStat) { + if regionTooHot(srcStore, mainPeerStat) { hotSchedulerRegionIsTooHotCounter.Inc() ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}) if len(ops) > 0 { @@ -1821,9 +1822,9 @@ func prioritiesToDim(priorities []string) (firstPriority int, secondPriority int return stringToDim(priorities[0]), stringToDim(priorities[1]) } -// regionIsTooHot returns true if any dim of the hot region is greater than the store threshold. -func regionIsTooHot(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { +// regionTooHot returns true if any dim of the hot region is greater than the store threshold. +func regionTooHot(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { return slice.AnyOf(store.LoadPred.Current.Loads, func(i int) bool { - return region.Loads[i] > store.LoadPred.Current.Loads[i]*0.3 + return region.Loads[i] > store.LoadPred.Current.Loads[i]*regionTooHotThreshold }) } From 25b01453c757c90685d192350f3f77311d47c3aa Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Mon, 19 Jun 2023 11:00:36 +0800 Subject: [PATCH 04/16] lint Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 2 +- pkg/schedule/schedulers/hot_region_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index e6aaec86bf0..61ef0db5d3f 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -751,7 +751,7 @@ func (bs *balanceSolver) tryAddPendingInfluence() bool { dstStoreID := uint64(0) if isSplit { region := bs.GetRegion(bs.ops[0].RegionID()) - for id, _ := range region.GetStoreIDs() { + for id := range region.GetStoreIDs() { srcStoreIDs = append(srcStoreIDs, id) } } else { diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index ccf51c0fc18..eb971eed1f1 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -203,7 +203,7 @@ func TestHotWriteRegionScheduleByteRateOnly(t *testing.T) { checkHotWriteRegionScheduleByteRateOnly(re, true /* enable placement rules */) } -func TestSplitRegionInIfRegionIsTooHot(t *testing.T) { +func TestSplitIfRegionTooHot(t *testing.T) { re := require.New(t) statistics.Denoising = false cancel, _, tc, oc := prepareSchedulersTest() @@ -254,7 +254,7 @@ func TestSplitRegionInIfRegionIsTooHot(t *testing.T) { addRegionInfo(tc, statistics.Write, []testRegionInfo{ {1, []uint64{1, 2, 3}, 4 * units.MiB, 0, 0}, }) - hb, err = CreateScheduler(statistics.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) + hb, _ = CreateScheduler(statistics.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) ops, _ = hb.Schedule(tc, false) re.Len(ops, 1) re.Equal(operator.OpSplit, ops[0].Kind()) From 311bb195d89660b413395ec6e2274e8c50940817 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Mon, 26 Jun 2023 11:09:42 +0800 Subject: [PATCH 05/16] pass ut Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 2 +- pkg/schedule/schedulers/hot_region_test.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 61ef0db5d3f..eebe4667efe 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -1557,7 +1557,7 @@ func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo) []*oper for _, region := range regions { createFunc(region) } - // the split buckets's priority is highest + // the split bucket's priority is highest if len(operators) > 0 { bs.cur.progressiveRank = -5 } diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index eb971eed1f1..2d43177fc2e 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -271,6 +271,7 @@ func TestSplitBuckets(t *testing.T) { hb, err := CreateScheduler(statistics.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) solve := newBalanceSolver(hb.(*hotScheduler), tc, statistics.Read, transferLeader) + solve.cur = &solution{} region := core.NewTestRegionInfo(1, 1, []byte(""), []byte("")) // the hot range is [a,c],[e,f] From 4933372729af699898a9172e80acf0fea313fa63 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Tue, 4 Jul 2023 13:36:15 +0800 Subject: [PATCH 06/16] skip if only one bucket hot Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 22 ++++++++++++++-------- pkg/schedule/schedulers/hot_region_test.go | 21 ++++++++++++--------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index e659379e406..14b1b81512c 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -58,11 +58,12 @@ var ( hotSchedulerSnapshotSenderLimitCounter = schedulerCounter.WithLabelValues(HotRegionName, "snapshot_sender_limit") // counter related with the split region - hotSchedulerNotFoundSplitKeysCounter = schedulerCounter.WithLabelValues(HotRegionName, "not_found_split_keys") - hotSchedulerRegionBucketsNotHotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_buckets_not_hot") - hotSchedulerSplitSuccessCounter = schedulerCounter.WithLabelValues(HotRegionName, "split_success") - hotSchedulerNeedSplitBeforeScheduleCounter = schedulerCounter.WithLabelValues(HotRegionName, "need_split_before_move_peer") - hotSchedulerRegionIsTooHotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_is_too_hot") + hotSchedulerNotFoundSplitKeysCounter = schedulerCounter.WithLabelValues(HotRegionName, "not_found_split_keys") + hotSchedulerRegionBucketsNotHotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_buckets_not_hot") + hotSchedulerRegionBucketsSingleHotSpotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_buckets_single_hot_spot") + hotSchedulerSplitSuccessCounter = schedulerCounter.WithLabelValues(HotRegionName, "split_success") + hotSchedulerNeedSplitBeforeScheduleCounter = schedulerCounter.WithLabelValues(HotRegionName, "need_split_before_move_peer") + hotSchedulerRegionIsTooHotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_is_too_hot") hotSchedulerMoveLeaderCounter = schedulerCounter.WithLabelValues(HotRegionName, moveLeader.String()) hotSchedulerMovePeerCounter = schedulerCounter.WithLabelValues(HotRegionName, movePeer.String()) @@ -670,7 +671,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { bs.cur.mainPeerStat = mainPeerStat if regionTooHot(srcStore, mainPeerStat) { hotSchedulerRegionIsTooHotCounter.Inc() - ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}) + ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}, true) if len(ops) > 0 { bs.ops = ops bs.cur.calcPeersRate(bs.firstPriority, bs.secondPriority) @@ -1463,7 +1464,7 @@ func (bs *balanceSolver) buildOperators() (ops []*operator.Operator) { } } if len(splitRegions) > 0 { - return bs.createSplitOperator(splitRegions) + return bs.createSplitOperator(splitRegions, false) } srcStoreID := bs.cur.srcStore.GetID() @@ -1503,7 +1504,7 @@ func (bs *balanceSolver) buildOperators() (ops []*operator.Operator) { } // createSplitOperator creates split operators for the given regions. -func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo) []*operator.Operator { +func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo, isTooHot bool) []*operator.Operator { if len(regions) == 0 { return nil } @@ -1520,6 +1521,11 @@ func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo) []*oper hotSchedulerRegionBucketsNotHotCounter.Inc() return } + // skip if only one hot buckets exists on this region. + if len(stats) <= 1 && isTooHot { + hotSchedulerRegionBucketsSingleHotSpotCounter.Inc() + return + } startKey, endKey := region.GetStartKey(), region.GetEndKey() splitKey := make([][]byte, 0) for _, stat := range stats { diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index ef5408afd70..5c5cf7e9fa4 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -214,15 +214,18 @@ func TestSplitIfRegionTooHot(t *testing.T) { b := &metapb.Buckets{ RegionId: 1, PeriodInMs: 1000, - Keys: [][]byte{[]byte(fmt.Sprintf("%21d", 11)), - []byte(fmt.Sprintf("%21d", 12))}, + Keys: [][]byte{ + []byte(fmt.Sprintf("%21d", 11)), + []byte(fmt.Sprintf("%21d", 12)), + []byte(fmt.Sprintf("%21d", 13)), + }, Stats: &metapb.BucketStats{ - ReadBytes: []uint64{10 * units.KiB}, - ReadKeys: []uint64{256}, - ReadQps: []uint64{0}, - WriteBytes: []uint64{0}, - WriteQps: []uint64{0}, - WriteKeys: []uint64{0}, + ReadBytes: []uint64{10 * units.KiB, 11 * units.KiB}, + ReadKeys: []uint64{256, 256}, + ReadQps: []uint64{0, 0}, + WriteBytes: []uint64{0, 0}, + WriteQps: []uint64{0, 0}, + WriteKeys: []uint64{0, 0}, }, } @@ -292,7 +295,7 @@ func TestSplitBuckets(t *testing.T) { task := buckets.NewCheckPeerTask(b) re.True(tc.HotBucketCache.CheckAsync(task)) time.Sleep(time.Millisecond * 10) - ops := solve.createSplitOperator([]*core.RegionInfo{region}) + ops := solve.createSplitOperator([]*core.RegionInfo{region}, false) re.Equal(1, len(ops)) op := ops[0] re.Equal(splitBucket, op.Desc()) From 85c242f25590e31a1ad4d34bdd1be8ca89bde400 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Wed, 5 Jul 2023 11:50:30 +0800 Subject: [PATCH 07/16] default MaxMovableHotPeerSize Signed-off-by: bufferflies <1045931706@qq.com> --- server/config/config.go | 4 ++++ server/config/persist_options.go | 6 +----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/server/config/config.go b/server/config/config.go index dc16f941059..31777cefa21 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -865,6 +865,10 @@ func (c *ScheduleConfig) adjust(meta *configutil.ConfigMetaData, reloading bool) configutil.AdjustUint64(&c.HotRegionsReservedDays, defaultHotRegionsReservedDays) } + if !meta.IsDefined("max-movable-hot-peer-size") { + configutil.AdjustInt64(&c.MaxMovableHotPeerSize, defaultMaxMovableHotPeerSize) + } + if !meta.IsDefined("slow-store-evicting-affected-store-ratio-threshold") { configutil.AdjustFloat64(&c.SlowStoreEvictingAffectedStoreRatioThreshold, defaultSlowStoreEvictingAffectedStoreRatioThreshold) } diff --git a/server/config/persist_options.go b/server/config/persist_options.go index 37cd8a8fcc8..fc1c9ae6894 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -624,11 +624,7 @@ func (o *PersistOptions) IsLocationReplacementEnabled() bool { // GetMaxMovableHotPeerSize returns the max movable hot peer size. func (o *PersistOptions) GetMaxMovableHotPeerSize() int64 { - size := o.GetScheduleConfig().MaxMovableHotPeerSize - if size <= 0 { - size = defaultMaxMovableHotPeerSize - } - return size + return o.GetScheduleConfig().MaxMovableHotPeerSize } // IsDebugMetricsEnabled returns if debug metrics is enabled. From ce8df89367698e56c6fca0f379c1723c5d528ff1 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Wed, 5 Jul 2023 14:26:55 +0800 Subject: [PATCH 08/16] rename too hot and add comment for function Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 14b1b81512c..618b5ad619d 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -63,7 +63,7 @@ var ( hotSchedulerRegionBucketsSingleHotSpotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_buckets_single_hot_spot") hotSchedulerSplitSuccessCounter = schedulerCounter.WithLabelValues(HotRegionName, "split_success") hotSchedulerNeedSplitBeforeScheduleCounter = schedulerCounter.WithLabelValues(HotRegionName, "need_split_before_move_peer") - hotSchedulerRegionIsTooHotCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_is_too_hot") + hotSchedulerRegionTooHotNeedSplitCounter = schedulerCounter.WithLabelValues(HotRegionName, "region_is_too_hot_need_split") hotSchedulerMoveLeaderCounter = schedulerCounter.WithLabelValues(HotRegionName, moveLeader.String()) hotSchedulerMovePeerCounter = schedulerCounter.WithLabelValues(HotRegionName, movePeer.String()) @@ -669,9 +669,9 @@ func (bs *balanceSolver) solve() []*operator.Operator { } } bs.cur.mainPeerStat = mainPeerStat - if regionTooHot(srcStore, mainPeerStat) { - hotSchedulerRegionIsTooHotCounter.Inc() - ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}, true) + if tooHotNeedSplit(srcStore, mainPeerStat) { + hotSchedulerRegionTooHotNeedSplitCounter.Inc() + ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}, true /*too hot need to split*/) if len(ops) > 0 { bs.ops = ops bs.cur.calcPeersRate(bs.firstPriority, bs.secondPriority) @@ -1464,7 +1464,7 @@ func (bs *balanceSolver) buildOperators() (ops []*operator.Operator) { } } if len(splitRegions) > 0 { - return bs.createSplitOperator(splitRegions, false) + return bs.createSplitOperator(splitRegions, false /* region is too big need split before move */) } srcStoreID := bs.cur.srcStore.GetID() @@ -1504,6 +1504,7 @@ func (bs *balanceSolver) buildOperators() (ops []*operator.Operator) { } // createSplitOperator creates split operators for the given regions. +// isTooHot true indicates that the region is too hot and needs split. func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo, isTooHot bool) []*operator.Operator { if len(regions) == 0 { return nil @@ -1828,8 +1829,8 @@ func prioritiesToDim(priorities []string) (firstPriority int, secondPriority int return stringToDim(priorities[0]), stringToDim(priorities[1]) } -// regionTooHot returns true if any dim of the hot region is greater than the store threshold. -func regionTooHot(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { +// tooHotNeedSplit returns true if any dim of the hot region is greater than the store threshold. +func tooHotNeedSplit(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { return slice.AnyOf(store.LoadPred.Current.Loads, func(i int) bool { return region.Loads[i] > store.LoadPred.Current.Loads[i]*regionTooHotThreshold }) From a3faee381055aac79920ac96f8e88117687f8d48 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Wed, 5 Jul 2023 17:07:44 +0800 Subject: [PATCH 09/16] add config for split thresholds Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 8 ++++---- pkg/schedule/schedulers/hot_region_config.go | 13 +++++++++++++ pkg/schedule/schedulers/hot_region_test.go | 8 ++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 618b5ad619d..88ab3afdd94 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -207,7 +207,6 @@ const ( minHotScheduleInterval = time.Second maxHotScheduleInterval = 20 * time.Second - regionTooHotThreshold = 0.3 ) var ( @@ -656,6 +655,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { } } snapshotFilter := filter.NewSnapshotSendFilter(bs.GetStores(), constant.Medium) + splitThresholds := bs.sche.conf.getSplitThresholds() for _, srcStore := range bs.filterSrcStores() { bs.cur.srcStore = srcStore srcStoreID := srcStore.GetID() @@ -669,7 +669,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { } } bs.cur.mainPeerStat = mainPeerStat - if tooHotNeedSplit(srcStore, mainPeerStat) { + if tooHotNeedSplit(srcStore, mainPeerStat, splitThresholds) { hotSchedulerRegionTooHotNeedSplitCounter.Inc() ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}, true /*too hot need to split*/) if len(ops) > 0 { @@ -1830,8 +1830,8 @@ func prioritiesToDim(priorities []string) (firstPriority int, secondPriority int } // tooHotNeedSplit returns true if any dim of the hot region is greater than the store threshold. -func tooHotNeedSplit(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat) bool { +func tooHotNeedSplit(store *statistics.StoreLoadDetail, region *statistics.HotPeerStat, splitThresholds float64) bool { return slice.AnyOf(store.LoadPred.Current.Loads, func(i int) bool { - return region.Loads[i] > store.LoadPred.Current.Loads[i]*regionTooHotThreshold + return region.Loads[i] > store.LoadPred.Current.Loads[i]*splitThresholds }) } diff --git a/pkg/schedule/schedulers/hot_region_config.go b/pkg/schedule/schedulers/hot_region_config.go index c50f18d7c68..313ffdae029 100644 --- a/pkg/schedule/schedulers/hot_region_config.go +++ b/pkg/schedule/schedulers/hot_region_config.go @@ -75,6 +75,7 @@ func initHotRegionScheduleConfig() *hotRegionSchedulerConfig { EnableForTiFlash: true, RankFormulaVersion: "v2", ForbidRWType: "none", + SplitThresholds: 0.5, } cfg.applyPrioritiesConfig(defaultPrioritiesConfig) return cfg @@ -102,6 +103,7 @@ func (conf *hotRegionSchedulerConfig) getValidConf() *hotRegionSchedulerConfig { EnableForTiFlash: conf.EnableForTiFlash, RankFormulaVersion: conf.getRankFormulaVersionLocked(), ForbidRWType: conf.getForbidRWTypeLocked(), + SplitThresholds: conf.SplitThresholds, } } @@ -143,6 +145,8 @@ type hotRegionSchedulerConfig struct { RankFormulaVersion string `json:"rank-formula-version"` // forbid read or write scheduler, only for test ForbidRWType string `json:"forbid-rw-type,omitempty"` + // SplitThresholds is the threshold to split hot region if the flow of on hot region exceeds it. + SplitThresholds float64 `json:"split-thresholds"` } func (conf *hotRegionSchedulerConfig) EncodeConfig() ([]byte, error) { @@ -316,6 +320,12 @@ func (conf *hotRegionSchedulerConfig) IsForbidRWType(rw statistics.RWType) bool return rw.String() == conf.ForbidRWType } +func (conf *hotRegionSchedulerConfig) getSplitThresholds() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.SplitThresholds +} + func (conf *hotRegionSchedulerConfig) getForbidRWTypeLocked() string { switch conf.ForbidRWType { case statistics.Read.String(), statistics.Write.String(): @@ -377,6 +387,9 @@ func (conf *hotRegionSchedulerConfig) valid() error { conf.ForbidRWType != "none" && conf.ForbidRWType != "" { return errs.ErrSchedulerConfig.FastGenByArgs("invalid forbid-rw-type") } + if conf.SplitThresholds < 0.2 || conf.SplitThresholds > 1.0 { + return errs.ErrSchedulerConfig.FastGenByArgs("invalid split-thresholds, should be in range [0.2, 1.0]") + } return nil } diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 1a4856a961d..d5a389b9a78 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -2478,6 +2478,14 @@ func TestConfigValidation(t *testing.T) { hc.ForbidRWType = "test" err = hc.valid() re.Error(err) + + hc.SplitThresholds = 0 + err = hc.valid() + re.Error(err) + + hc.SplitThresholds = 1.1 + err = hc.valid() + re.Error(err) } type maxZombieDurTestCase struct { From 6d8cdc7c532fd2c8c24faed69ab9c4a424941cf7 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Wed, 5 Jul 2023 17:24:15 +0800 Subject: [PATCH 10/16] pass unit Signed-off-by: bufferflies <1045931706@qq.com> --- tests/pdctl/scheduler/scheduler_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index db5ea2efe72..60f6ea9224d 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -340,6 +340,7 @@ func TestScheduler(t *testing.T) { "strict-picking-store": "true", "enable-for-tiflash": "true", "rank-formula-version": "v2", + "split-thresholds": 0.5, } var conf map[string]interface{} mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) From a6006b15b58af741a4539b694d84a6853fe8c325 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Thu, 6 Jul 2023 15:15:46 +0800 Subject: [PATCH 11/16] avoid panic Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 5 +++-- pkg/schedule/schedulers/hot_region_v2.go | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 88ab3afdd94..733ba4e6d9b 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -218,7 +218,8 @@ var ( // as it implies that this dimension is sufficiently uniform. stddevThreshold = 0.1 - splitBucket = "split-hot-region" + splitBucket = "split-hot-region" + splitProgressiveRank = int64(-5) ) type hotScheduler struct { @@ -1566,7 +1567,7 @@ func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo, isTooHo } // the split bucket's priority is highest if len(operators) > 0 { - bs.cur.progressiveRank = -5 + bs.cur.progressiveRank = splitProgressiveRank } return operators } diff --git a/pkg/schedule/schedulers/hot_region_v2.go b/pkg/schedule/schedulers/hot_region_v2.go index 2b7e96af0fb..ed950f91656 100644 --- a/pkg/schedule/schedulers/hot_region_v2.go +++ b/pkg/schedule/schedulers/hot_region_v2.go @@ -292,7 +292,7 @@ func (bs *balanceSolver) getScoreByPriorities(dim int, rs *rankV2Ratios) int { // betterThan checks if `bs.cur` is a better solution than `old`. func (bs *balanceSolver) betterThanV2(old *solution) bool { - if old == nil { + if old == nil || bs.cur.progressiveRank >= splitProgressiveRank { return true } if bs.cur.progressiveRank != old.progressiveRank { From 5f25ce5d33cfb379dd0f3c1900854959bc1a7583 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Thu, 6 Jul 2023 16:16:14 +0800 Subject: [PATCH 12/16] logic error Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 2 +- pkg/schedule/schedulers/hot_region_v2.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 733ba4e6d9b..ee56401160c 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -1273,7 +1273,7 @@ func (bs *balanceSolver) getMinRate(dim int) float64 { // betterThan checks if `bs.cur` is a better solution than `old`. func (bs *balanceSolver) betterThanV1(old *solution) bool { - if old == nil { + if old == nil || bs.cur.progressiveRank <= splitProgressiveRank { return true } if bs.cur.progressiveRank != old.progressiveRank { diff --git a/pkg/schedule/schedulers/hot_region_v2.go b/pkg/schedule/schedulers/hot_region_v2.go index ed950f91656..7b296446de2 100644 --- a/pkg/schedule/schedulers/hot_region_v2.go +++ b/pkg/schedule/schedulers/hot_region_v2.go @@ -292,7 +292,7 @@ func (bs *balanceSolver) getScoreByPriorities(dim int, rs *rankV2Ratios) int { // betterThan checks if `bs.cur` is a better solution than `old`. func (bs *balanceSolver) betterThanV2(old *solution) bool { - if old == nil || bs.cur.progressiveRank >= splitProgressiveRank { + if old == nil || bs.cur.progressiveRank <= splitProgressiveRank { return true } if bs.cur.progressiveRank != old.progressiveRank { From db9247d1776837462e1b8ba75ce4f570d525cecf Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Mon, 10 Jul 2023 15:38:47 +0800 Subject: [PATCH 13/16] make config more reasonable Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region.go | 10 ++++++++-- pkg/schedule/schedulers/hot_region_config.go | 6 +++--- pkg/schedule/schedulers/hot_region_test.go | 10 +++++++++- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index ee56401160c..7984aa15698 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -670,7 +670,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { } } bs.cur.mainPeerStat = mainPeerStat - if tooHotNeedSplit(srcStore, mainPeerStat, splitThresholds) { + if tooHotNeedSplit(srcStore, mainPeerStat, splitThresholds) && bs.GetStoreConfig().IsEnableRegionBucket() { hotSchedulerRegionTooHotNeedSplitCounter.Inc() ops := bs.createSplitOperator([]*core.RegionInfo{bs.cur.region}, true /*too hot need to split*/) if len(ops) > 0 { @@ -1541,7 +1541,13 @@ func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo, isTooHo // Otherwise, we should append the current start key and end key. // E.g. [a, b), [c, d) -> [a, b), [c, d) split keys is [a,b,c,d] if bytes.Equal(stat.StartKey, splitKey[len(splitKey)-1]) { - splitKey[len(splitKey)-1] = stat.EndKey + // If the region is too hot, we should split all the buckets to balance the load. + // otherwise, we should split the buckets that are too hot. + if isTooHot { + splitKey = append(splitKey, stat.EndKey) + } else { + splitKey[len(splitKey)-1] = stat.EndKey + } } else { splitKey = append(splitKey, stat.StartKey, stat.EndKey) } diff --git a/pkg/schedule/schedulers/hot_region_config.go b/pkg/schedule/schedulers/hot_region_config.go index 313ffdae029..ffbe805e0bf 100644 --- a/pkg/schedule/schedulers/hot_region_config.go +++ b/pkg/schedule/schedulers/hot_region_config.go @@ -75,7 +75,7 @@ func initHotRegionScheduleConfig() *hotRegionSchedulerConfig { EnableForTiFlash: true, RankFormulaVersion: "v2", ForbidRWType: "none", - SplitThresholds: 0.5, + SplitThresholds: 0.2, } cfg.applyPrioritiesConfig(defaultPrioritiesConfig) return cfg @@ -387,8 +387,8 @@ func (conf *hotRegionSchedulerConfig) valid() error { conf.ForbidRWType != "none" && conf.ForbidRWType != "" { return errs.ErrSchedulerConfig.FastGenByArgs("invalid forbid-rw-type") } - if conf.SplitThresholds < 0.2 || conf.SplitThresholds > 1.0 { - return errs.ErrSchedulerConfig.FastGenByArgs("invalid split-thresholds, should be in range [0.2, 1.0]") + if conf.SplitThresholds < 0.01 || conf.SplitThresholds > 1.0 { + return errs.ErrSchedulerConfig.FastGenByArgs("invalid split-thresholds, should be in range [0.01, 1.0]") } return nil } diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index d5a389b9a78..40b6e60a261 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -302,7 +302,15 @@ func TestSplitBuckets(t *testing.T) { expectKeys := [][]byte{[]byte("a"), []byte("c"), []byte("d"), []byte("f")} expectOp, err := operator.CreateSplitRegionOperator(splitBucket, region, operator.OpSplit, pdpb.CheckPolicy_USEKEY, expectKeys) re.NoError(err) - expectOp.GetCreateTime() + re.Equal(expectOp.Brief(), op.Brief()) + re.Equal(expectOp.GetAdditionalInfo(), op.GetAdditionalInfo()) + + ops = solve.createSplitOperator([]*core.RegionInfo{region}, true) + re.Equal(1, len(ops)) + op = ops[0] + re.Equal(splitBucket, op.Desc()) + expectKeys = [][]byte{[]byte("a"), []byte("b"), []byte("c"), []byte("d"), []byte("e"), []byte("f")} + expectOp, err = operator.CreateSplitRegionOperator(splitBucket, region, operator.OpSplit, pdpb.CheckPolicy_USEKEY, expectKeys) re.Equal(expectOp.Brief(), op.Brief()) re.Equal(expectOp.GetAdditionalInfo(), op.GetAdditionalInfo()) } From 36286450accac62370b483513cdf011021d367ee Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Mon, 10 Jul 2023 15:51:53 +0800 Subject: [PATCH 14/16] pass ut Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 40b6e60a261..657342d57b7 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -311,6 +311,7 @@ func TestSplitBuckets(t *testing.T) { re.Equal(splitBucket, op.Desc()) expectKeys = [][]byte{[]byte("a"), []byte("b"), []byte("c"), []byte("d"), []byte("e"), []byte("f")} expectOp, err = operator.CreateSplitRegionOperator(splitBucket, region, operator.OpSplit, pdpb.CheckPolicy_USEKEY, expectKeys) + re.Nil(err) re.Equal(expectOp.Brief(), op.Brief()) re.Equal(expectOp.GetAdditionalInfo(), op.GetAdditionalInfo()) } From 6310efe92c8d8f52b26fafbd904afb9981c1172a Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Mon, 10 Jul 2023 15:51:53 +0800 Subject: [PATCH 15/16] pass ut Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 40b6e60a261..825b19cd19f 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -311,6 +311,7 @@ func TestSplitBuckets(t *testing.T) { re.Equal(splitBucket, op.Desc()) expectKeys = [][]byte{[]byte("a"), []byte("b"), []byte("c"), []byte("d"), []byte("e"), []byte("f")} expectOp, err = operator.CreateSplitRegionOperator(splitBucket, region, operator.OpSplit, pdpb.CheckPolicy_USEKEY, expectKeys) + re.NoError(err) re.Equal(expectOp.Brief(), op.Brief()) re.Equal(expectOp.GetAdditionalInfo(), op.GetAdditionalInfo()) } From 1305bd337976a2442145066e22a6afc4c5badd97 Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Mon, 10 Jul 2023 17:46:49 +0800 Subject: [PATCH 16/16] pass ut Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/schedule/schedulers/hot_region_test.go | 1 + tests/pdctl/scheduler/scheduler_test.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 825b19cd19f..b61471ca79f 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -244,6 +244,7 @@ func TestSplitIfRegionTooHot(t *testing.T) { addRegionInfo(tc, statistics.Read, []testRegionInfo{ {1, []uint64{1, 2, 3}, 4 * units.MiB, 0, 0}, }) + tc.GetStoreConfig().SetRegionBucketEnabled(true) ops, _ := hb.Schedule(tc, false) re.Len(ops, 1) re.Equal(operator.OpSplit, ops[0].Kind()) diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 719afad6c8f..5be5cf02e77 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -339,7 +339,7 @@ func TestScheduler(t *testing.T) { "strict-picking-store": "true", "enable-for-tiflash": "true", "rank-formula-version": "v2", - "split-thresholds": 0.5, + "split-thresholds": 0.2, } var conf map[string]interface{} mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf)