From e6c2318b4f0e8145cc4b37a737e678934405a254 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 7 Mar 2024 14:33:36 +0800 Subject: [PATCH 1/3] This is an automated cherry-pick of #7878 close tikv/pd#7877 Signed-off-by: ti-chi-bot --- pkg/schedule/schedulers/grant_hot_region.go | 3 +- pkg/schedule/schedulers/hot_region.go | 55 +- pkg/schedule/schedulers/hot_region_config.go | 33 + pkg/schedule/schedulers/hot_region_test.go | 79 +- pkg/schedule/schedulers/hot_region_v2_test.go | 9 +- pkg/schedule/schedulers/shuffle_hot_region.go | 6 + pkg/statistics/store_collection_test.go | 4 +- pkg/statistics/store_load.go | 75 +- pkg/statistics/store_load_test.go | 20 +- tests/server/api/scheduler_test.go | 2 + .../pd-ctl/tests/scheduler/scheduler_test.go | 742 ++++++++++++++++++ 11 files changed, 986 insertions(+), 42 deletions(-) create mode 100644 tools/pd-ctl/tests/scheduler/scheduler_test.go diff --git a/pkg/schedule/schedulers/grant_hot_region.go b/pkg/schedule/schedulers/grant_hot_region.go index 5a68da069b8..771e1bbafe2 100644 --- a/pkg/schedule/schedulers/grant_hot_region.go +++ b/pkg/schedule/schedulers/grant_hot_region.go @@ -129,7 +129,8 @@ type grantHotRegionScheduler struct { // newGrantHotRegionScheduler creates an admin scheduler that transfers hot region peer to fixed store and hot region leader to one store. func newGrantHotRegionScheduler(opController *operator.Controller, conf *grantHotRegionSchedulerConfig) *grantHotRegionScheduler { - base := newBaseHotScheduler(opController) + base := newBaseHotScheduler(opController, + statistics.DefaultHistorySampleDuration, statistics.DefaultHistorySampleInterval) handler := newGrantHotRegionHandler(conf) ret := &grantHotRegionScheduler{ baseHotScheduler: base, diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 4806180e450..3169ece2672 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -127,13 +127,13 @@ type baseHotScheduler struct { updateWriteTime time.Time } -func newBaseHotScheduler(opController *operator.Controller) *baseHotScheduler { +func newBaseHotScheduler(opController *operator.Controller, sampleDuration time.Duration, sampleInterval time.Duration) *baseHotScheduler { base := NewBaseScheduler(opController) ret := &baseHotScheduler{ BaseScheduler: base, types: []utils.RWType{utils.Write, utils.Read}, regionPendings: make(map[uint64]*pendingInfluence), - stHistoryLoads: statistics.NewStoreHistoryLoads(utils.DimLen), + stHistoryLoads: statistics.NewStoreHistoryLoads(utils.DimLen, sampleDuration, sampleInterval), r: rand.New(rand.NewSource(time.Now().UnixNano())), } for ty := resourceType(0); ty < resourceTypeLen; ty++ { @@ -180,6 +180,10 @@ func (h *baseHotScheduler) prepareForBalance(rw utils.RWType, cluster sche.Sched } } +func (h *baseHotScheduler) updateHistoryLoadConfig(sampleDuration, sampleInterval time.Duration) { + h.stHistoryLoads = h.stHistoryLoads.UpdateConfig(sampleDuration, sampleInterval) +} + // summaryPendingInfluence calculate the summary of pending Influence for each store // and clean the region from regionInfluence if they have ended operator. // It makes each dim rate or count become `weight` times to the origin value. @@ -233,7 +237,8 @@ type hotScheduler struct { } func newHotScheduler(opController *operator.Controller, conf *hotRegionSchedulerConfig) *hotScheduler { - base := newBaseHotScheduler(opController) + base := newBaseHotScheduler(opController, + conf.GetHistorySampleDuration(), conf.GetHistorySampleInterval()) ret := &hotScheduler{ name: HotRegionName, baseHotScheduler: base, @@ -257,6 +262,49 @@ func (h *hotScheduler) EncodeConfig() ([]byte, error) { return h.conf.EncodeConfig() } +<<<<<<< HEAD +======= +func (h *hotScheduler) ReloadConfig() error { + h.conf.Lock() + defer h.conf.Unlock() + cfgData, err := h.conf.storage.LoadSchedulerConfig(h.GetName()) + if err != nil { + return err + } + if len(cfgData) == 0 { + return nil + } + newCfg := &hotRegionSchedulerConfig{} + if err := DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + h.conf.MinHotByteRate = newCfg.MinHotByteRate + h.conf.MinHotKeyRate = newCfg.MinHotKeyRate + h.conf.MinHotQueryRate = newCfg.MinHotQueryRate + h.conf.MaxZombieRounds = newCfg.MaxZombieRounds + h.conf.MaxPeerNum = newCfg.MaxPeerNum + h.conf.ByteRateRankStepRatio = newCfg.ByteRateRankStepRatio + h.conf.KeyRateRankStepRatio = newCfg.KeyRateRankStepRatio + h.conf.QueryRateRankStepRatio = newCfg.QueryRateRankStepRatio + h.conf.CountRankStepRatio = newCfg.CountRankStepRatio + h.conf.GreatDecRatio = newCfg.GreatDecRatio + h.conf.MinorDecRatio = newCfg.MinorDecRatio + h.conf.SrcToleranceRatio = newCfg.SrcToleranceRatio + h.conf.DstToleranceRatio = newCfg.DstToleranceRatio + h.conf.WriteLeaderPriorities = newCfg.WriteLeaderPriorities + h.conf.WritePeerPriorities = newCfg.WritePeerPriorities + h.conf.ReadPriorities = newCfg.ReadPriorities + h.conf.StrictPickingStore = newCfg.StrictPickingStore + h.conf.EnableForTiFlash = newCfg.EnableForTiFlash + h.conf.RankFormulaVersion = newCfg.RankFormulaVersion + h.conf.ForbidRWType = newCfg.ForbidRWType + h.conf.SplitThresholds = newCfg.SplitThresholds + h.conf.HistorySampleDuration = newCfg.HistorySampleDuration + h.conf.HistorySampleInterval = newCfg.HistorySampleInterval + return nil +} + +>>>>>>> bbd3bdb56 (scheduler: make history-sample-interval and history-sample-duration configurable (#7878)) func (h *hotScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { h.conf.ServeHTTP(w, r) } @@ -286,6 +334,7 @@ func (h *hotScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]* func (h *hotScheduler) dispatch(typ utils.RWType, cluster sche.SchedulerCluster) []*operator.Operator { h.Lock() defer h.Unlock() + h.updateHistoryLoadConfig(h.conf.GetHistorySampleDuration(), h.conf.GetHistorySampleInterval()) h.prepareForBalance(typ, cluster) // it can not move earlier to support to use api and metrics. if h.conf.IsForbidRWType(typ) { diff --git a/pkg/schedule/schedulers/hot_region_config.go b/pkg/schedule/schedulers/hot_region_config.go index 2ff78748f02..97a4c7315e5 100644 --- a/pkg/schedule/schedulers/hot_region_config.go +++ b/pkg/schedule/schedulers/hot_region_config.go @@ -26,10 +26,12 @@ import ( "github.com/tikv/pd/pkg/errs" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/slice" + "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/reflectutil" "github.com/tikv/pd/pkg/utils/syncutil" + "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/pkg/versioninfo" "github.com/unrolled/render" "go.uber.org/zap" @@ -76,6 +78,8 @@ func initHotRegionScheduleConfig() *hotRegionSchedulerConfig { RankFormulaVersion: "v2", ForbidRWType: "none", SplitThresholds: 0.2, + HistorySampleDuration: typeutil.NewDuration(statistics.DefaultHistorySampleDuration), + HistorySampleInterval: typeutil.NewDuration(statistics.DefaultHistorySampleInterval), } cfg.applyPrioritiesConfig(defaultPrioritiesConfig) return cfg @@ -104,6 +108,8 @@ func (conf *hotRegionSchedulerConfig) getValidConf() *hotRegionSchedulerConfig { RankFormulaVersion: conf.getRankFormulaVersionLocked(), ForbidRWType: conf.getForbidRWTypeLocked(), SplitThresholds: conf.SplitThresholds, + HistorySampleDuration: conf.HistorySampleDuration, + HistorySampleInterval: conf.HistorySampleInterval, } } @@ -147,6 +153,9 @@ type hotRegionSchedulerConfig struct { ForbidRWType string `json:"forbid-rw-type,omitempty"` // SplitThresholds is the threshold to split hot region if the first priority flow of on hot region exceeds it. SplitThresholds float64 `json:"split-thresholds"` + + HistorySampleDuration typeutil.Duration `json:"history-sample-duration"` + HistorySampleInterval typeutil.Duration `json:"history-sample-interval"` } func (conf *hotRegionSchedulerConfig) EncodeConfig() ([]byte, error) { @@ -305,6 +314,30 @@ func (conf *hotRegionSchedulerConfig) GetRankFormulaVersion() string { return conf.getRankFormulaVersionLocked() } +func (conf *hotRegionSchedulerConfig) GetHistorySampleDuration() time.Duration { + conf.RLock() + defer conf.RUnlock() + return conf.HistorySampleDuration.Duration +} + +func (conf *hotRegionSchedulerConfig) GetHistorySampleInterval() time.Duration { + conf.RLock() + defer conf.RUnlock() + return conf.HistorySampleInterval.Duration +} + +func (conf *hotRegionSchedulerConfig) SetHistorySampleDuration(d time.Duration) { + conf.Lock() + defer conf.Unlock() + conf.HistorySampleDuration = typeutil.NewDuration(d) +} + +func (conf *hotRegionSchedulerConfig) SetHistorySampleInterval(d time.Duration) { + conf.Lock() + defer conf.Unlock() + conf.HistorySampleInterval = typeutil.NewDuration(d) +} + func (conf *hotRegionSchedulerConfig) getRankFormulaVersionLocked() string { switch conf.RankFormulaVersion { case "v2": diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index d8f9bbc532c..6bd70537b45 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -40,6 +40,8 @@ import ( ) func init() { + // TODO: remove this global variable in the future. + // And use a function to create hot schduler for test. schedulePeerPr = 1.0 RegisterScheduler(utils.Write.String(), func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { cfg := initHotRegionScheduleConfig() @@ -199,7 +201,6 @@ func newTestRegion(id uint64) *core.RegionInfo { func TestHotWriteRegionScheduleByteRateOnly(t *testing.T) { re := require.New(t) statistics.Denoising = false - statistics.HistorySampleDuration = 0 statisticsInterval = 0 checkHotWriteRegionScheduleByteRateOnly(re, false /* disable placement rules */) checkHotWriteRegionScheduleByteRateOnly(re, true /* enable placement rules */) @@ -393,6 +394,64 @@ func TestSplitBucketsByLoad(t *testing.T) { } } +<<<<<<< HEAD +======= +func checkHotWriteRegionPlacement(re *require.Assertions, enablePlacementRules bool) { + cancel, _, tc, oc := prepareSchedulersTest() + defer cancel() + tc.SetEnableUseJointConsensus(true) + tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.ConfChangeV2)) + tc.SetEnablePlacementRules(enablePlacementRules) + labels := []string{"zone", "host"} + tc.SetMaxReplicasWithLabel(enablePlacementRules, 3, labels...) + hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) + re.NoError(err) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + tc.SetHotRegionCacheHitsThreshold(0) + + tc.AddLabelsStore(1, 2, map[string]string{"zone": "z1", "host": "h1"}) + tc.AddLabelsStore(2, 2, map[string]string{"zone": "z1", "host": "h2"}) + tc.AddLabelsStore(3, 2, map[string]string{"zone": "z2", "host": "h3"}) + tc.AddLabelsStore(4, 2, map[string]string{"zone": "z2", "host": "h4"}) + tc.AddLabelsStore(5, 2, map[string]string{"zone": "z2", "host": "h5"}) + tc.AddLabelsStore(6, 2, map[string]string{"zone": "z2", "host": "h6"}) + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", ID: "leader", Role: placement.Leader, Count: 1, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z1"}}}, + }) + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", ID: "voter", Role: placement.Follower, Count: 2, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z2"}}}, + }) + tc.RuleManager.DeleteRule("pd", "default") + + tc.UpdateStorageWrittenBytes(1, 10*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(2, 0) + tc.UpdateStorageWrittenBytes(3, 6*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(4, 3*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(5, 3*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(6, 6*units.MiB*utils.StoreHeartBeatReportInterval) + + // Region 1, 2 and 3 are hot regions. + addRegionInfo(tc, utils.Write, []testRegionInfo{ + {1, []uint64{1, 3, 5}, 512 * units.KiB, 0, 0}, + {2, []uint64{1, 4, 6}, 512 * units.KiB, 0, 0}, + {3, []uint64{1, 3, 6}, 512 * units.KiB, 0, 0}, + }) + ops, _ := hb.Schedule(tc, false) + re.NotEmpty(ops) + re.NotContains(ops[0].Step(1).String(), "transfer leader") + clearPendingInfluence(hb.(*hotScheduler)) + + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", ID: "voter", Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z2"}}}, + }) + tc.RuleManager.DeleteRule("pd", "follower") + ops, _ = hb.Schedule(tc, false) + re.NotEmpty(ops) + // TODO: fix the test + // re.NotContains(ops[0].Step(1).String(), "transfer leader") +} + +>>>>>>> bbd3bdb56 (scheduler: make history-sample-interval and history-sample-duration configurable (#7878)) func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlacementRules bool) { cancel, opt, tc, oc := prepareSchedulersTest() defer cancel() @@ -402,6 +461,7 @@ func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlace tc.SetMaxReplicasWithLabel(enablePlacementRules, 3, labels...) hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) // Add stores 1, 2, 3, 4, 5, 6 with region counts 3, 2, 2, 2, 0, 0. @@ -606,6 +666,7 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { sche, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb := sche.(*hotScheduler) + hb.conf.SetHistorySampleDuration(0) // Add TiKV stores 1, 2, 3, 4, 5, 6, 7 (Down) with region counts 3, 3, 2, 2, 0, 0, 0. // Add TiFlash stores 8, 9, 10, 11 with region counts 3, 1, 1, 0. @@ -797,6 +858,7 @@ func TestHotWriteRegionScheduleWithQuery(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.QueryPriority, utils.BytePriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) @@ -834,6 +896,7 @@ func TestHotWriteRegionScheduleWithKeyRate(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) @@ -972,6 +1035,7 @@ func TestHotWriteRegionScheduleWithLeader(t *testing.T) { defer cancel() hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) re.NoError(err) tc.SetHotRegionCacheHitsThreshold(0) @@ -1040,6 +1104,7 @@ func checkHotWriteRegionScheduleWithPendingInfluence(re *require.Assertions, dim re.NoError(err) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) old := pendingAmpFactor pendingAmpFactor = 0.0 defer func() { @@ -1127,6 +1192,7 @@ func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) key, err := hex.DecodeString("") @@ -1210,6 +1276,7 @@ func TestHotReadRegionScheduleByteRateOnly(t *testing.T) { re.NoError(err) hb := scheduler.(*hotScheduler) hb.conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} + hb.conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) // Add stores 1, 2, 3, 4, 5 with region counts 3, 2, 2, 2, 0. @@ -1334,6 +1401,7 @@ func TestHotReadRegionScheduleWithQuery(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) @@ -1370,6 +1438,7 @@ func TestHotReadRegionScheduleWithKeyRate(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) @@ -1434,6 +1503,7 @@ func checkHotReadRegionScheduleWithPendingInfluence(re *require.Assertions, dim hb.(*hotScheduler).conf.MinorDecRatio = 1 hb.(*hotScheduler).conf.DstToleranceRatio = 1 hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) old := pendingAmpFactor pendingAmpFactor = 0.0 defer func() { @@ -2011,7 +2081,6 @@ func checkSortResult(re *require.Assertions, regions []uint64, hotPeers []*stati func TestInfluenceByRWType(t *testing.T) { re := require.New(t) - statistics.HistorySampleDuration = 0 originValue := schedulePeerPr defer func() { schedulePeerPr = originValue @@ -2025,6 +2094,7 @@ func TestInfluenceByRWType(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -2148,6 +2218,7 @@ func TestHotScheduleWithPriority(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1.05) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1.05) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) // skip stddev check origin := stddevThreshold stddevThreshold = -1.0 @@ -2196,6 +2267,7 @@ func TestHotScheduleWithPriority(t *testing.T) { addRegionInfo(tc, utils.Read, []testRegionInfo{ {1, []uint64{1, 2, 3}, 2 * units.MiB, 2 * units.MiB, 0}, }) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} ops, _ = hb.Schedule(tc, false) re.Len(ops, 1) @@ -2209,6 +2281,7 @@ func TestHotScheduleWithPriority(t *testing.T) { hb, err = CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) re.NoError(err) // assert loose store picking @@ -2263,6 +2336,7 @@ func TestHotScheduleWithStddev(t *testing.T) { tc.AddRegionStore(4, 20) tc.AddRegionStore(5, 20) hb.(*hotScheduler).conf.StrictPickingStore = false + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) // skip uniform cluster tc.UpdateStorageWrittenStats(1, 5*units.MiB*utils.StoreHeartBeatReportInterval, 5*units.MiB*utils.StoreHeartBeatReportInterval) @@ -2314,6 +2388,7 @@ func TestHotWriteLeaderScheduleWithPriority(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetHotRegionCacheHitsThreshold(0) diff --git a/pkg/schedule/schedulers/hot_region_v2_test.go b/pkg/schedule/schedulers/hot_region_v2_test.go index d11ac44dde9..7d2b0592ef7 100644 --- a/pkg/schedule/schedulers/hot_region_v2_test.go +++ b/pkg/schedule/schedulers/hot_region_v2_test.go @@ -35,13 +35,13 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { defer cancel() statistics.Denoising = false statisticsInterval = 0 - statistics.HistorySampleDuration = 0 sche, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) re.NoError(err) hb := sche.(*hotScheduler) hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) @@ -104,6 +104,7 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirst(t *testing.T) { hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) @@ -149,7 +150,6 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirstOnly(t *testing.T) { re := require.New(t) statistics.Denoising = false statisticsInterval = 0 - statistics.HistorySampleDuration = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -159,6 +159,7 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirstOnly(t *testing.T) { hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) @@ -213,7 +214,6 @@ func TestHotReadRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { re := require.New(t) statistics.Denoising = false statisticsInterval = 0 - statistics.HistorySampleDuration = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -223,6 +223,7 @@ func TestHotReadRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) @@ -276,7 +277,6 @@ func TestSkipUniformStore(t *testing.T) { re := require.New(t) statistics.Denoising = false statisticsInterval = 0 - statistics.HistorySampleDuration = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -286,6 +286,7 @@ func TestSkipUniformStore(t *testing.T) { hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetRankFormulaVersion("v2") hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) diff --git a/pkg/schedule/schedulers/shuffle_hot_region.go b/pkg/schedule/schedulers/shuffle_hot_region.go index d5264b90428..405df3745c8 100644 --- a/pkg/schedule/schedulers/shuffle_hot_region.go +++ b/pkg/schedule/schedulers/shuffle_hot_region.go @@ -57,7 +57,13 @@ type shuffleHotRegionScheduler struct { // newShuffleHotRegionScheduler creates an admin scheduler that random balance hot regions func newShuffleHotRegionScheduler(opController *operator.Controller, conf *shuffleHotRegionSchedulerConfig) Scheduler { +<<<<<<< HEAD base := newBaseHotScheduler(opController) +======= + base := newBaseHotScheduler(opController, + statistics.DefaultHistorySampleDuration, statistics.DefaultHistorySampleInterval) + handler := newShuffleHotRegionHandler(conf) +>>>>>>> bbd3bdb56 (scheduler: make history-sample-interval and history-sample-duration configurable (#7878)) ret := &shuffleHotRegionScheduler{ baseHotScheduler: base, conf: conf, diff --git a/pkg/statistics/store_collection_test.go b/pkg/statistics/store_collection_test.go index 054e55a9fda..02e6350ffa4 100644 --- a/pkg/statistics/store_collection_test.go +++ b/pkg/statistics/store_collection_test.go @@ -98,7 +98,7 @@ func TestSummaryStoreInfos(t *testing.T) { rw := utils.Read kind := constant.LeaderKind collector := newTikvCollector() - storeHistoryLoad := NewStoreHistoryLoads(utils.DimLen) + storeHistoryLoad := NewStoreHistoryLoads(utils.DimLen, DefaultHistorySampleDuration, DefaultHistorySampleInterval) storeInfos := make(map[uint64]*StoreSummaryInfo) storeLoads := make(map[uint64][]float64) for _, storeID := range []int{1, 3} { @@ -130,7 +130,7 @@ func TestSummaryStoreInfos(t *testing.T) { } // case 2: put many elements into history load - historySampleInterval = 0 + storeHistoryLoad.sampleDuration = 0 for i := 1; i < 10; i++ { details = summaryStoresLoadByEngine(storeInfos, storeLoads, storeHistoryLoad, nil, rw, kind, collector) expect := []float64{2, 4, 10} diff --git a/pkg/statistics/store_load.go b/pkg/statistics/store_load.go index 79417b65b7e..c468024e3d6 100644 --- a/pkg/statistics/store_load.go +++ b/pkg/statistics/store_load.go @@ -245,24 +245,29 @@ func MaxLoad(a, b *StoreLoad) *StoreLoad { } } -var ( - // historySampleInterval is the sampling interval for history load. - historySampleInterval = 30 * time.Second - // HistorySampleDuration is the duration for saving history load. - HistorySampleDuration = 5 * time.Minute - defaultSize = 10 +const ( + // DefaultHistorySampleInterval is the sampling interval for history load. + DefaultHistorySampleInterval = 30 * time.Second + // DefaultHistorySampleDuration is the duration for saving history load. + DefaultHistorySampleDuration = 5 * time.Minute ) // StoreHistoryLoads records the history load of a store. type StoreHistoryLoads struct { // loads[read/write][leader/follower]-->[store id]-->history load - loads [utils.RWTypeLen][constant.ResourceKindLen]map[uint64]*storeHistoryLoad - dim int + loads [utils.RWTypeLen][constant.ResourceKindLen]map[uint64]*storeHistoryLoad + dim int + sampleInterval time.Duration + sampleDuration time.Duration } // NewStoreHistoryLoads creates a StoreHistoryLoads. -func NewStoreHistoryLoads(dim int) *StoreHistoryLoads { - st := StoreHistoryLoads{dim: dim} +func NewStoreHistoryLoads(dim int, sampleDuration time.Duration, sampleInterval time.Duration) *StoreHistoryLoads { + st := StoreHistoryLoads{ + dim: dim, + sampleDuration: sampleDuration, + sampleInterval: sampleInterval, + } for i := utils.RWType(0); i < utils.RWTypeLen; i++ { for j := constant.ResourceKind(0); j < constant.ResourceKindLen; j++ { st.loads[i][j] = make(map[uint64]*storeHistoryLoad) @@ -272,20 +277,24 @@ func NewStoreHistoryLoads(dim int) *StoreHistoryLoads { } // Add adds the store load to the history. -func (s *StoreHistoryLoads) Add(storeID uint64, rwTp utils.RWType, kind constant.ResourceKind, loads []float64) { +func (s *StoreHistoryLoads) Add(storeID uint64, rwTp utils.RWType, kind constant.ResourceKind, pointLoad []float64) { load, ok := s.loads[rwTp][kind][storeID] if !ok { - size := defaultSize - if historySampleInterval != 0 { - size = int(HistorySampleDuration / historySampleInterval) + size := int(DefaultHistorySampleDuration / DefaultHistorySampleInterval) + if s.sampleInterval != 0 { + size = int(s.sampleDuration / s.sampleInterval) + } + if s.sampleDuration == 0 { + size = 0 } - load = newStoreHistoryLoad(size, s.dim) + load = newStoreHistoryLoad(size, s.dim, s.sampleInterval) s.loads[rwTp][kind][storeID] = load } - load.add(loads) + load.add(pointLoad) } // Get returns the store loads from the history, not one time point. +// In another word, the result is [dim][time]. func (s *StoreHistoryLoads) Get(storeID uint64, rwTp utils.RWType, kind constant.ResourceKind) [][]float64 { load, ok := s.loads[rwTp][kind][storeID] if !ok { @@ -294,36 +303,46 @@ func (s *StoreHistoryLoads) Get(storeID uint64, rwTp utils.RWType, kind constant return load.get() } +// UpdateConfig updates the sample duration and interval. +func (s *StoreHistoryLoads) UpdateConfig(sampleDuration time.Duration, sampleInterval time.Duration) *StoreHistoryLoads { + if s.sampleDuration == sampleDuration && s.sampleInterval == sampleInterval { + return s + } + return NewStoreHistoryLoads(s.dim, sampleDuration, sampleInterval) +} + type storeHistoryLoad struct { update time.Time // loads is a circular buffer. // [dim] --> [1,2,3...] - loads [][]float64 - size int - count int + loads [][]float64 + size int + count int + sampleInterval time.Duration } -func newStoreHistoryLoad(size int, dim int) *storeHistoryLoad { +func newStoreHistoryLoad(size int, dimLen int, sampleInterval time.Duration) *storeHistoryLoad { return &storeHistoryLoad{ - loads: make([][]float64, dim), - size: size, + loads: make([][]float64, dimLen), + size: size, + sampleInterval: sampleInterval, } } // add adds the store load to the history. // eg. add([1,2,3]) --> [][]float64{{1}, {2}, {3}} -func (s *storeHistoryLoad) add(loads []float64) { +func (s *storeHistoryLoad) add(pointLoad []float64) { // reject if the loads length is not equal to the dimension. - if time.Since(s.update) < historySampleInterval || s.size == 0 || len(loads) != len(s.loads) { + if time.Since(s.update) < s.sampleInterval || s.size == 0 || len(pointLoad) != len(s.loads) { return } if s.count == 0 { - for i := range s.loads { - s.loads[i] = make([]float64, s.size) + for dim := range s.loads { + s.loads[dim] = make([]float64, s.size) } } - for i, v := range loads { - s.loads[i][s.count%s.size] = v + for dim, v := range pointLoad { + s.loads[dim][s.count%s.size] = v } s.count++ s.update = time.Now() diff --git a/pkg/statistics/store_load_test.go b/pkg/statistics/store_load_test.go index 67f2dff9cf9..67c9e53482f 100644 --- a/pkg/statistics/store_load_test.go +++ b/pkg/statistics/store_load_test.go @@ -16,6 +16,7 @@ package statistics import ( "testing" + "time" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core/constant" @@ -24,8 +25,7 @@ import ( func TestHistoryLoads(t *testing.T) { re := require.New(t) - historySampleInterval = 0 - historyLoads := NewStoreHistoryLoads(utils.DimLen) + historyLoads := NewStoreHistoryLoads(utils.DimLen, DefaultHistorySampleDuration, 0) loads := []float64{1.0, 2.0, 3.0} rwTp := utils.Read kind := constant.LeaderKind @@ -43,4 +43,20 @@ func TestHistoryLoads(t *testing.T) { expectLoads[utils.QueryDim][i] = 3.0 } re.EqualValues(expectLoads, historyLoads.Get(1, rwTp, kind)) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, time.Millisecond, time.Millisecond) + historyLoads.Add(1, rwTp, kind, loads) + re.Len(historyLoads.Get(1, rwTp, kind)[0], 1) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, time.Millisecond, time.Second) + historyLoads.Add(1, rwTp, kind, loads) + re.Empty(historyLoads.Get(1, rwTp, kind)[0]) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, 0, time.Second) + historyLoads.Add(1, rwTp, kind, loads) + re.Empty(historyLoads.Get(1, rwTp, kind)[0]) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, 0, 0) + historyLoads.Add(1, rwTp, kind, loads) + re.Empty(historyLoads.Get(1, rwTp, kind)[0]) } diff --git a/tests/server/api/scheduler_test.go b/tests/server/api/scheduler_test.go index 95c4d936a8c..ab2e91f23dd 100644 --- a/tests/server/api/scheduler_test.go +++ b/tests/server/api/scheduler_test.go @@ -216,6 +216,8 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { "write-peer-priorities": []interface{}{"byte", "key"}, "enable-for-tiflash": "true", "strict-picking-store": "true", + "history-sample-duration": "5m0s", + "history-sample-interval": "30s", } re.Equal(len(expectMap), len(resp), "expect %v, got %v", expectMap, resp) for key := range expectMap { diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go new file mode 100644 index 00000000000..b5a2128752b --- /dev/null +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -0,0 +1,742 @@ +// Copyright 2019 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scheduler_test + +import ( + "encoding/json" + "fmt" + "reflect" + "strings" + "testing" + "time" + + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/spf13/cobra" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/slice" + "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/versioninfo" + pdTests "github.com/tikv/pd/tests" + ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" + "github.com/tikv/pd/tools/pd-ctl/tests" +) + +type schedulerTestSuite struct { + suite.Suite + env *pdTests.SchedulingTestEnvironment + defaultSchedulers []string +} + +func TestSchedulerTestSuite(t *testing.T) { + suite.Run(t, new(schedulerTestSuite)) +} + +func (suite *schedulerTestSuite) SetupSuite() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/skipStoreConfigSync", `return(true)`)) + suite.defaultSchedulers = []string{ + "balance-leader-scheduler", + "balance-region-scheduler", + "balance-hot-region-scheduler", + "evict-slow-store-scheduler", + } +} + +func (suite *schedulerTestSuite) SetupTest() { + // use a new environment to avoid affecting other tests + suite.env = pdTests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *schedulerTestSuite) TearDownSuite() { + re := suite.Require() + suite.env.Cleanup() + re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/skipStoreConfigSync")) +} + +func (suite *schedulerTestSuite) TearDownTest() { + cleanFunc := func(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + var currentSchedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, ¤tSchedulers) + for _, scheduler := range suite.defaultSchedulers { + if slice.NoneOf(currentSchedulers, func(i int) bool { + return currentSchedulers[i] == scheduler + }) { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", scheduler}, nil) + re.Contains(echo, "Success!") + } + } + for _, scheduler := range currentSchedulers { + if slice.NoneOf(suite.defaultSchedulers, func(i int) bool { + return suite.defaultSchedulers[i] == scheduler + }) { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", scheduler}, nil) + re.Contains(echo, "Success!") + } + } + } + suite.env.RunFuncInTwoModes(cleanFunc) + suite.env.Cleanup() +} + +func (suite *schedulerTestSuite) TestScheduler() { + suite.env.RunTestInTwoModes(suite.checkScheduler) +} + +func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + mustUsage := func(args []string) { + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Contains(string(output), "Usage") + } + + checkSchedulerCommand := func(args []string, expected map[string]bool) { + if args != nil { + echo := mustExec(re, cmd, args, nil) + re.Contains(echo, "Success!") + } + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) + if len(schedulers) != len(expected) { + return false + } + for _, scheduler := range schedulers { + if _, ok := expected[scheduler]; !ok { + return false + } + } + return true + }) + } + + checkSchedulerConfigCommand := func(expectedConfig map[string]any, schedulerName string) { + testutil.Eventually(re, func() bool { + configInfo := make(map[string]any) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) + return reflect.DeepEqual(expectedConfig, configInfo) + }) + } + + leaderServer := cluster.GetLeaderServer() + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + // note: because pdqsort is a unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + + // scheduler show command + expected := map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(nil, expected) + + // scheduler delete command + args := []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(args, expected) + + // avoid the influence of the scheduler order + schedulers := []string{"evict-leader-scheduler", "grant-leader-scheduler", "evict-leader-scheduler", "grant-leader-scheduler"} + + checkStorePause := func(changedStores []uint64, schedulerName string) { + status := func() string { + switch schedulerName { + case "evict-leader-scheduler": + return "paused" + case "grant-leader-scheduler": + return "resumed" + default: + re.Fail(fmt.Sprintf("unknown scheduler %s", schedulerName)) + return "" + } + }() + for _, store := range stores { + isStorePaused := !cluster.GetLeaderServer().GetRaftCluster().GetStore(store.GetId()).AllowLeaderTransfer() + if slice.AnyOf(changedStores, func(i int) bool { + return store.GetId() == changedStores[i] + }) { + re.True(isStorePaused, + fmt.Sprintf("store %d should be %s with %s", store.GetId(), status, schedulerName)) + } else { + re.False(isStorePaused, + fmt.Sprintf("store %d should not be %s with %s", store.GetId(), status, schedulerName)) + } + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + re.Equal(isStorePaused, !sche.GetCluster().GetStore(store.GetId()).AllowLeaderTransfer()) + } + } + } + + for idx := range schedulers { + checkStorePause([]uint64{}, schedulers[idx]) + // scheduler add command + args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(args, expected) + + // scheduler config show command + expectedConfig := make(map[string]any) + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2}, schedulers[idx]) + + // scheduler config update command + args = []string{"-u", pdAddr, "scheduler", "config", schedulers[idx], "add-store", "3"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + + // check update success + checkSchedulerCommand(args, expected) + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "3": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2, 3}, schedulers[idx]) + + // scheduler delete command + args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx]} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(args, expected) + checkStorePause([]uint64{}, schedulers[idx]) + + // scheduler add command + args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(args, expected) + checkStorePause([]uint64{2}, schedulers[idx]) + + // scheduler add command twice + args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "4"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(args, expected) + + // check add success + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "4": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2, 4}, schedulers[idx]) + + // scheduler remove command [old] + args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-4"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(args, expected) + + // check remove success + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2}, schedulers[idx]) + + // scheduler remove command, when remove the last store, it should remove whole scheduler + args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-2"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(args, expected) + checkStorePause([]uint64{}, schedulers[idx]) + } + + // test shuffle region config + checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "shuffle-region-scheduler"}, map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "shuffle-region-scheduler": true, + "evict-slow-store-scheduler": true, + }) + var roles []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) + re.Equal([]string{"leader", "follower", "learner"}, roles) + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "set-roles", "learner"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) + return reflect.DeepEqual([]string{"learner"}, roles) + }) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler"}, &roles) + re.Equal([]string{"learner"}, roles) + + // test grant hot region scheduler config + checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "shuffle-region-scheduler": true, + "grant-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + }) + var conf3 map[string]any + expected3 := map[string]any{ + "store-id": []any{float64(1), float64(2), float64(3)}, + "store-leader-id": float64(1), + } + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + re.Equal(expected3, conf3) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) + re.Contains(echo, "Success!") + expected3["store-leader-id"] = float64(2) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + return reflect.DeepEqual(expected3, conf3) + }) + + // test remove and add scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) + re.Contains(echo, "Success! The scheduler is created.") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}, nil) + re.Contains(echo, "Success! The scheduler has been applied to the store.") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-2"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + re.Contains(echo, "404") + testutil.Eventually(re, func() bool { // wait for removed scheduler to be synced to scheduling server. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) + return strings.Contains(echo, "[404] scheduler not found") + }) + + // test hot region config + expected1 := map[string]any{ + "min-hot-byte-rate": float64(100), + "min-hot-key-rate": float64(10), + "min-hot-query-rate": float64(10), + "max-zombie-rounds": float64(3), + "max-peer-number": float64(1000), + "byte-rate-rank-step-ratio": 0.05, + "key-rate-rank-step-ratio": 0.05, + "query-rate-rank-step-ratio": 0.05, + "count-rank-step-ratio": 0.01, + "great-dec-ratio": 0.95, + "minor-dec-ratio": 0.99, + "src-tolerance-ratio": 1.05, + "dst-tolerance-ratio": 1.05, + "read-priorities": []any{"byte", "key"}, + "write-leader-priorities": []any{"key", "byte"}, + "write-peer-priorities": []any{"byte", "key"}, + "strict-picking-store": "true", + "enable-for-tiflash": "true", + "rank-formula-version": "v2", + "split-thresholds": 0.2, + "history-sample-duration": "5m0s", + "history-sample-interval": "30s", + } + checkHotSchedulerConfig := func(expect map[string]any) { + testutil.Eventually(re, func() bool { + var conf1 map[string]any + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + return reflect.DeepEqual(expect, conf1) + }) + } + + var conf map[string]any + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) + re.Equal(expected1, conf) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "show"}, &conf) + re.Equal(expected1, conf) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) + re.Contains(echo, "Success!") + expected1["src-tolerance-ratio"] = 1.02 + checkHotSchedulerConfig(expected1) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) + re.Contains(echo, "Success!") + expected1["read-priorities"] = []any{"byte", "key"} + checkHotSchedulerConfig(expected1) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) + re.Contains(echo, "Success!") + expected1["read-priorities"] = []any{"key", "byte"} + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", ""}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,byte"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key,byte"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + + // write-priorities is divided into write-leader-priorities and write-peer-priorities + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-priorities", "key,byte"}, nil) + re.Contains(echo, "Failed!") + re.Contains(echo, "Config item is not found.") + checkHotSchedulerConfig(expected1) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v0"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + expected1["rank-formula-version"] = "v2" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v2"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + expected1["rank-formula-version"] = "v1" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v1"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["forbid-rw-type"] = "read" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "forbid-rw-type", "read"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-duration"] = "1m0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "1m"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-interval"] = "1s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "1s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-duration"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "0s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-interval"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "0s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + // test compatibility + re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) + for _, store := range stores { + version := versioninfo.HotScheduleWithQuery + store.Version = versioninfo.MinSupportedVersion(version).String() + store.LastHeartbeat = time.Now().UnixNano() + pdTests.MustPutStore(re, cluster, store) + } + re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) + // After upgrading, we can use query. + expected1["write-leader-priorities"] = []any{"query", "byte"} + checkHotSchedulerConfig(expected1) + // cannot set qps as write-peer-priorities + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) + re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") + checkHotSchedulerConfig(expected1) + + // test remove and add + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "Success") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "Success") + + // test balance leader config + conf = make(map[string]any) + conf1 := make(map[string]any) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "show"}, &conf) + re.Equal(4., conf["batch"]) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) + return conf1["batch"] == 3. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) + re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) + re.Contains(echo, "404") + re.Contains(echo, "PD:scheduler:ErrSchedulerNotFound]scheduler not found") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, nil) + re.Contains(echo, "404") + re.Contains(echo, "scheduler not found") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + + // test evict-slow-store && evict-slow-trend schedulers config + evictSlownessSchedulers := []string{"evict-slow-store-scheduler", "evict-slow-trend-scheduler"} + for _, schedulerName := range evictSlownessSchedulers { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", schedulerName}, nil) + if strings.Contains(echo, "Success!") { + re.Contains(echo, "Success!") + } else { + re.Contains(echo, "scheduler existed") + } + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "set", "recovery-duration", "100"}, nil) + re.Contains(echo, "Success! Config updated.") + conf = make(map[string]any) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) + return conf["recovery-duration"] == 100. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) + } + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-slow-store-scheduler"}, nil) + re.Contains(echo, "Success!") + + // test shuffle hot region scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "set", "limit", "127"}, nil) + re.Contains(echo, "Success!") + conf = make(map[string]any) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "show"}, &conf) + return conf["limit"] == 127. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-hot-region-scheduler"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + + // test show scheduler with paused and disabled status. + checkSchedulerWithStatusCommand := func(status string, expected []string) { + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) + return reflect.DeepEqual(expected, schedulers) + }) + } + + // test scatter range scheduler + for _, name := range []string{ + "test", "test#", "?test", + /* TODO: to handle case like "tes&t", we need to modify the server's JSON render to unescape the HTML characters */ + } { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "scatter-range", "--format=raw", "a", "b", name}, nil) + re.Contains(echo, "Success!") + schedulerName := fmt.Sprintf("scatter-range-%s", name) + // test show scheduler + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) + // test remove scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) + } + + mustUsage([]string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler"}) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") + checkSchedulerWithStatusCommand("paused", []string{ + "balance-leader-scheduler", + }) + result := make(map[string]any) + testutil.Eventually(re, func() bool { + mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", "balance-leader-scheduler"}, &result) + return len(result) != 0 && result["status"] == "paused" && result["summary"] == "" + }, testutil.WithWaitFor(30*time.Second)) + + mustUsage([]string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler", "60"}) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerWithStatusCommand("paused", []string{}) + + // set label scheduler to disabled manually. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "label-scheduler"}, nil) + re.Contains(echo, "Success!") + cfg := leaderServer.GetServer().GetScheduleConfig() + origin := cfg.Schedulers + cfg.Schedulers = sc.SchedulerConfigs{{Type: "label", Disable: true}} + err := leaderServer.GetServer().SetScheduleConfig(*cfg) + re.NoError(err) + checkSchedulerWithStatusCommand("disabled", []string{"label-scheduler"}) + // reset Schedulers in ScheduleConfig + cfg.Schedulers = origin + err = leaderServer.GetServer().SetScheduleConfig(*cfg) + re.NoError(err) + checkSchedulerWithStatusCommand("disabled", []string{}) +} + +func (suite *schedulerTestSuite) TestSchedulerDiagnostic() { + suite.env.RunTestInTwoModes(suite.checkSchedulerDiagnostic) +} + +func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + checkSchedulerDescribeCommand := func(schedulerName, expectedStatus, expectedSummary string) { + result := make(map[string]any) + testutil.Eventually(re, func() bool { + mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", schedulerName}, &result) + return len(result) != 0 && expectedStatus == result["status"] && expectedSummary == result["summary"] + }, testutil.WithTickInterval(50*time.Millisecond)) + } + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + + echo := mustExec(re, cmd, []string{"-u", pdAddr, "config", "set", "enable-diagnostic", "true"}, nil) + re.Contains(echo, "Success!") + checkSchedulerDescribeCommand("balance-region-scheduler", "pending", "1 store(s) RegionNotMatchRule; ") + + // scheduler delete command + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerDescribeCommand("balance-region-scheduler", "disabled", "") + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerDescribeCommand("balance-leader-scheduler", "normal", "") +} + +func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) string { + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + if v == nil { + return string(output) + } + re.NoError(json.Unmarshal(output, v), string(output)) + return "" +} + +func mightExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) { + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + if v == nil { + return + } + json.Unmarshal(output, v) +} From ed125533af29f60bc127ffc79b639b5dae062533 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 7 Mar 2024 14:25:21 +0800 Subject: [PATCH 2/3] fix conflict Signed-off-by: lhy1024 --- pkg/schedule/schedulers/hot_region.go | 3 - pkg/schedule/schedulers/hot_region_test.go | 58 ------------------- pkg/schedule/schedulers/shuffle_hot_region.go | 5 -- 3 files changed, 66 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 3169ece2672..2a38ef399c8 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -262,8 +262,6 @@ func (h *hotScheduler) EncodeConfig() ([]byte, error) { return h.conf.EncodeConfig() } -<<<<<<< HEAD -======= func (h *hotScheduler) ReloadConfig() error { h.conf.Lock() defer h.conf.Unlock() @@ -304,7 +302,6 @@ func (h *hotScheduler) ReloadConfig() error { return nil } ->>>>>>> bbd3bdb56 (scheduler: make history-sample-interval and history-sample-duration configurable (#7878)) func (h *hotScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { h.conf.ServeHTTP(w, r) } diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 6bd70537b45..d6337453a30 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -394,64 +394,6 @@ func TestSplitBucketsByLoad(t *testing.T) { } } -<<<<<<< HEAD -======= -func checkHotWriteRegionPlacement(re *require.Assertions, enablePlacementRules bool) { - cancel, _, tc, oc := prepareSchedulersTest() - defer cancel() - tc.SetEnableUseJointConsensus(true) - tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.ConfChangeV2)) - tc.SetEnablePlacementRules(enablePlacementRules) - labels := []string{"zone", "host"} - tc.SetMaxReplicasWithLabel(enablePlacementRules, 3, labels...) - hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) - re.NoError(err) - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) - tc.SetHotRegionCacheHitsThreshold(0) - - tc.AddLabelsStore(1, 2, map[string]string{"zone": "z1", "host": "h1"}) - tc.AddLabelsStore(2, 2, map[string]string{"zone": "z1", "host": "h2"}) - tc.AddLabelsStore(3, 2, map[string]string{"zone": "z2", "host": "h3"}) - tc.AddLabelsStore(4, 2, map[string]string{"zone": "z2", "host": "h4"}) - tc.AddLabelsStore(5, 2, map[string]string{"zone": "z2", "host": "h5"}) - tc.AddLabelsStore(6, 2, map[string]string{"zone": "z2", "host": "h6"}) - tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", ID: "leader", Role: placement.Leader, Count: 1, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z1"}}}, - }) - tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", ID: "voter", Role: placement.Follower, Count: 2, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z2"}}}, - }) - tc.RuleManager.DeleteRule("pd", "default") - - tc.UpdateStorageWrittenBytes(1, 10*units.MiB*utils.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(2, 0) - tc.UpdateStorageWrittenBytes(3, 6*units.MiB*utils.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(4, 3*units.MiB*utils.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(5, 3*units.MiB*utils.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(6, 6*units.MiB*utils.StoreHeartBeatReportInterval) - - // Region 1, 2 and 3 are hot regions. - addRegionInfo(tc, utils.Write, []testRegionInfo{ - {1, []uint64{1, 3, 5}, 512 * units.KiB, 0, 0}, - {2, []uint64{1, 4, 6}, 512 * units.KiB, 0, 0}, - {3, []uint64{1, 3, 6}, 512 * units.KiB, 0, 0}, - }) - ops, _ := hb.Schedule(tc, false) - re.NotEmpty(ops) - re.NotContains(ops[0].Step(1).String(), "transfer leader") - clearPendingInfluence(hb.(*hotScheduler)) - - tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", ID: "voter", Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z2"}}}, - }) - tc.RuleManager.DeleteRule("pd", "follower") - ops, _ = hb.Schedule(tc, false) - re.NotEmpty(ops) - // TODO: fix the test - // re.NotContains(ops[0].Step(1).String(), "transfer leader") -} - ->>>>>>> bbd3bdb56 (scheduler: make history-sample-interval and history-sample-duration configurable (#7878)) func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlacementRules bool) { cancel, opt, tc, oc := prepareSchedulersTest() defer cancel() diff --git a/pkg/schedule/schedulers/shuffle_hot_region.go b/pkg/schedule/schedulers/shuffle_hot_region.go index 405df3745c8..ac975eb4e39 100644 --- a/pkg/schedule/schedulers/shuffle_hot_region.go +++ b/pkg/schedule/schedulers/shuffle_hot_region.go @@ -57,13 +57,8 @@ type shuffleHotRegionScheduler struct { // newShuffleHotRegionScheduler creates an admin scheduler that random balance hot regions func newShuffleHotRegionScheduler(opController *operator.Controller, conf *shuffleHotRegionSchedulerConfig) Scheduler { -<<<<<<< HEAD - base := newBaseHotScheduler(opController) -======= base := newBaseHotScheduler(opController, statistics.DefaultHistorySampleDuration, statistics.DefaultHistorySampleInterval) - handler := newShuffleHotRegionHandler(conf) ->>>>>>> bbd3bdb56 (scheduler: make history-sample-interval and history-sample-duration configurable (#7878)) ret := &shuffleHotRegionScheduler{ baseHotScheduler: base, conf: conf, From 5268549d0e0637315889d8dc096e618d02eeaa04 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 7 Mar 2024 14:38:13 +0800 Subject: [PATCH 3/3] move test Signed-off-by: lhy1024 --- tests/pdctl/scheduler/scheduler_test.go | 26 + .../pd-ctl/tests/scheduler/scheduler_test.go | 742 ------------------ 2 files changed, 26 insertions(+), 742 deletions(-) delete mode 100644 tools/pd-ctl/tests/scheduler/scheduler_test.go diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 27f5f5b5135..f7fc3dfc6a1 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -306,6 +306,8 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { "enable-for-tiflash": "true", "rank-formula-version": "v2", "split-thresholds": 0.2, + "history-sample-duration": "5m0s", + "history-sample-interval": "30s", } var conf map[string]interface{} mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) @@ -367,6 +369,30 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) re.Equal(expected1, conf1) + expected1["history-sample-duration"] = "1m0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "1m"}, nil) + re.Contains(echo, "Success!") + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + expected1["history-sample-interval"] = "1s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "1s"}, nil) + re.Contains(echo, "Success!") + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + expected1["history-sample-duration"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "0s"}, nil) + re.Contains(echo, "Success!") + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + expected1["history-sample-interval"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "0s"}, nil) + re.Contains(echo, "Success!") + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + // test compatibility re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) for _, store := range stores { diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go deleted file mode 100644 index b5a2128752b..00000000000 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ /dev/null @@ -1,742 +0,0 @@ -// Copyright 2019 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package scheduler_test - -import ( - "encoding/json" - "fmt" - "reflect" - "strings" - "testing" - "time" - - "github.com/pingcap/failpoint" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/spf13/cobra" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" - "github.com/tikv/pd/pkg/core" - sc "github.com/tikv/pd/pkg/schedule/config" - "github.com/tikv/pd/pkg/slice" - "github.com/tikv/pd/pkg/utils/testutil" - "github.com/tikv/pd/pkg/versioninfo" - pdTests "github.com/tikv/pd/tests" - ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" - "github.com/tikv/pd/tools/pd-ctl/tests" -) - -type schedulerTestSuite struct { - suite.Suite - env *pdTests.SchedulingTestEnvironment - defaultSchedulers []string -} - -func TestSchedulerTestSuite(t *testing.T) { - suite.Run(t, new(schedulerTestSuite)) -} - -func (suite *schedulerTestSuite) SetupSuite() { - re := suite.Require() - re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/skipStoreConfigSync", `return(true)`)) - suite.defaultSchedulers = []string{ - "balance-leader-scheduler", - "balance-region-scheduler", - "balance-hot-region-scheduler", - "evict-slow-store-scheduler", - } -} - -func (suite *schedulerTestSuite) SetupTest() { - // use a new environment to avoid affecting other tests - suite.env = pdTests.NewSchedulingTestEnvironment(suite.T()) -} - -func (suite *schedulerTestSuite) TearDownSuite() { - re := suite.Require() - suite.env.Cleanup() - re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/skipStoreConfigSync")) -} - -func (suite *schedulerTestSuite) TearDownTest() { - cleanFunc := func(cluster *pdTests.TestCluster) { - re := suite.Require() - pdAddr := cluster.GetConfig().GetClientURL() - cmd := ctl.GetRootCmd() - - var currentSchedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, ¤tSchedulers) - for _, scheduler := range suite.defaultSchedulers { - if slice.NoneOf(currentSchedulers, func(i int) bool { - return currentSchedulers[i] == scheduler - }) { - echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", scheduler}, nil) - re.Contains(echo, "Success!") - } - } - for _, scheduler := range currentSchedulers { - if slice.NoneOf(suite.defaultSchedulers, func(i int) bool { - return suite.defaultSchedulers[i] == scheduler - }) { - echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", scheduler}, nil) - re.Contains(echo, "Success!") - } - } - } - suite.env.RunFuncInTwoModes(cleanFunc) - suite.env.Cleanup() -} - -func (suite *schedulerTestSuite) TestScheduler() { - suite.env.RunTestInTwoModes(suite.checkScheduler) -} - -func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { - re := suite.Require() - pdAddr := cluster.GetConfig().GetClientURL() - cmd := ctl.GetRootCmd() - - stores := []*metapb.Store{ - { - Id: 1, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - { - Id: 2, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - { - Id: 3, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - { - Id: 4, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - } - - mustUsage := func(args []string) { - output, err := tests.ExecuteCommand(cmd, args...) - re.NoError(err) - re.Contains(string(output), "Usage") - } - - checkSchedulerCommand := func(args []string, expected map[string]bool) { - if args != nil { - echo := mustExec(re, cmd, args, nil) - re.Contains(echo, "Success!") - } - testutil.Eventually(re, func() bool { - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) - if len(schedulers) != len(expected) { - return false - } - for _, scheduler := range schedulers { - if _, ok := expected[scheduler]; !ok { - return false - } - } - return true - }) - } - - checkSchedulerConfigCommand := func(expectedConfig map[string]any, schedulerName string) { - testutil.Eventually(re, func() bool { - configInfo := make(map[string]any) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) - return reflect.DeepEqual(expectedConfig, configInfo) - }) - } - - leaderServer := cluster.GetLeaderServer() - for _, store := range stores { - pdTests.MustPutStore(re, cluster, store) - } - - // note: because pdqsort is a unstable sort algorithm, set ApproximateSize for this region. - pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) - - // scheduler show command - expected := map[string]bool{ - "balance-region-scheduler": true, - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(nil, expected) - - // scheduler delete command - args := []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(args, expected) - - // avoid the influence of the scheduler order - schedulers := []string{"evict-leader-scheduler", "grant-leader-scheduler", "evict-leader-scheduler", "grant-leader-scheduler"} - - checkStorePause := func(changedStores []uint64, schedulerName string) { - status := func() string { - switch schedulerName { - case "evict-leader-scheduler": - return "paused" - case "grant-leader-scheduler": - return "resumed" - default: - re.Fail(fmt.Sprintf("unknown scheduler %s", schedulerName)) - return "" - } - }() - for _, store := range stores { - isStorePaused := !cluster.GetLeaderServer().GetRaftCluster().GetStore(store.GetId()).AllowLeaderTransfer() - if slice.AnyOf(changedStores, func(i int) bool { - return store.GetId() == changedStores[i] - }) { - re.True(isStorePaused, - fmt.Sprintf("store %d should be %s with %s", store.GetId(), status, schedulerName)) - } else { - re.False(isStorePaused, - fmt.Sprintf("store %d should not be %s with %s", store.GetId(), status, schedulerName)) - } - if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { - re.Equal(isStorePaused, !sche.GetCluster().GetStore(store.GetId()).AllowLeaderTransfer()) - } - } - } - - for idx := range schedulers { - checkStorePause([]uint64{}, schedulers[idx]) - // scheduler add command - args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(args, expected) - - // scheduler config show command - expectedConfig := make(map[string]any) - expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} - checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) - checkStorePause([]uint64{2}, schedulers[idx]) - - // scheduler config update command - args = []string{"-u", pdAddr, "scheduler", "config", schedulers[idx], "add-store", "3"} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "evict-slow-store-scheduler": true, - } - - // check update success - checkSchedulerCommand(args, expected) - expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "3": []any{map[string]any{"end-key": "", "start-key": ""}}} - checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) - checkStorePause([]uint64{2, 3}, schedulers[idx]) - - // scheduler delete command - args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx]} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(args, expected) - checkStorePause([]uint64{}, schedulers[idx]) - - // scheduler add command - args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(args, expected) - checkStorePause([]uint64{2}, schedulers[idx]) - - // scheduler add command twice - args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "4"} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(args, expected) - - // check add success - expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "4": []any{map[string]any{"end-key": "", "start-key": ""}}} - checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) - checkStorePause([]uint64{2, 4}, schedulers[idx]) - - // scheduler remove command [old] - args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-4"} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(args, expected) - - // check remove success - expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} - checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) - checkStorePause([]uint64{2}, schedulers[idx]) - - // scheduler remove command, when remove the last store, it should remove whole scheduler - args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-2"} - expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(args, expected) - checkStorePause([]uint64{}, schedulers[idx]) - } - - // test shuffle region config - checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "shuffle-region-scheduler"}, map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "shuffle-region-scheduler": true, - "evict-slow-store-scheduler": true, - }) - var roles []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) - re.Equal([]string{"leader", "follower", "learner"}, roles) - echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "set-roles", "learner"}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) - return reflect.DeepEqual([]string{"learner"}, roles) - }) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler"}, &roles) - re.Equal([]string{"learner"}, roles) - - // test grant hot region scheduler config - checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "shuffle-region-scheduler": true, - "grant-hot-region-scheduler": true, - "evict-slow-store-scheduler": true, - }) - var conf3 map[string]any - expected3 := map[string]any{ - "store-id": []any{float64(1), float64(2), float64(3)}, - "store-leader-id": float64(1), - } - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) - re.Equal(expected3, conf3) - - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) - re.Contains(echo, "Success!") - expected3["store-leader-id"] = float64(2) - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) - return reflect.DeepEqual(expected3, conf3) - }) - - // test remove and add scheduler - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) - re.NotContains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) - re.Contains(echo, "Success! The scheduler is created.") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}, nil) - re.Contains(echo, "Success! The scheduler has been applied to the store.") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-2"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) - re.Contains(echo, "404") - testutil.Eventually(re, func() bool { // wait for removed scheduler to be synced to scheduling server. - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) - return strings.Contains(echo, "[404] scheduler not found") - }) - - // test hot region config - expected1 := map[string]any{ - "min-hot-byte-rate": float64(100), - "min-hot-key-rate": float64(10), - "min-hot-query-rate": float64(10), - "max-zombie-rounds": float64(3), - "max-peer-number": float64(1000), - "byte-rate-rank-step-ratio": 0.05, - "key-rate-rank-step-ratio": 0.05, - "query-rate-rank-step-ratio": 0.05, - "count-rank-step-ratio": 0.01, - "great-dec-ratio": 0.95, - "minor-dec-ratio": 0.99, - "src-tolerance-ratio": 1.05, - "dst-tolerance-ratio": 1.05, - "read-priorities": []any{"byte", "key"}, - "write-leader-priorities": []any{"key", "byte"}, - "write-peer-priorities": []any{"byte", "key"}, - "strict-picking-store": "true", - "enable-for-tiflash": "true", - "rank-formula-version": "v2", - "split-thresholds": 0.2, - "history-sample-duration": "5m0s", - "history-sample-interval": "30s", - } - checkHotSchedulerConfig := func(expect map[string]any) { - testutil.Eventually(re, func() bool { - var conf1 map[string]any - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - return reflect.DeepEqual(expect, conf1) - }) - } - - var conf map[string]any - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) - re.Equal(expected1, conf) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "show"}, &conf) - re.Equal(expected1, conf) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) - re.Contains(echo, "Success!") - expected1["src-tolerance-ratio"] = 1.02 - checkHotSchedulerConfig(expected1) - - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) - re.Contains(echo, "Success!") - expected1["read-priorities"] = []any{"byte", "key"} - checkHotSchedulerConfig(expected1) - - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) - re.Contains(echo, "Failed!") - checkHotSchedulerConfig(expected1) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) - re.Contains(echo, "Success!") - expected1["read-priorities"] = []any{"key", "byte"} - checkHotSchedulerConfig(expected1) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) - re.Contains(echo, "Failed!") - checkHotSchedulerConfig(expected1) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", ""}, nil) - re.Contains(echo, "Failed!") - checkHotSchedulerConfig(expected1) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key"}, nil) - re.Contains(echo, "Failed!") - checkHotSchedulerConfig(expected1) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,byte"}, nil) - re.Contains(echo, "Failed!") - checkHotSchedulerConfig(expected1) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key,byte"}, nil) - re.Contains(echo, "Failed!") - checkHotSchedulerConfig(expected1) - - // write-priorities is divided into write-leader-priorities and write-peer-priorities - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-priorities", "key,byte"}, nil) - re.Contains(echo, "Failed!") - re.Contains(echo, "Config item is not found.") - checkHotSchedulerConfig(expected1) - - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v0"}, nil) - re.Contains(echo, "Failed!") - checkHotSchedulerConfig(expected1) - expected1["rank-formula-version"] = "v2" - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v2"}, nil) - re.Contains(echo, "Success!") - checkHotSchedulerConfig(expected1) - expected1["rank-formula-version"] = "v1" - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v1"}, nil) - re.Contains(echo, "Success!") - checkHotSchedulerConfig(expected1) - - expected1["forbid-rw-type"] = "read" - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "forbid-rw-type", "read"}, nil) - re.Contains(echo, "Success!") - checkHotSchedulerConfig(expected1) - - expected1["history-sample-duration"] = "1m0s" - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "1m"}, nil) - re.Contains(echo, "Success!") - checkHotSchedulerConfig(expected1) - - expected1["history-sample-interval"] = "1s" - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "1s"}, nil) - re.Contains(echo, "Success!") - checkHotSchedulerConfig(expected1) - - expected1["history-sample-duration"] = "0s" - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "0s"}, nil) - re.Contains(echo, "Success!") - checkHotSchedulerConfig(expected1) - - expected1["history-sample-interval"] = "0s" - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "0s"}, nil) - re.Contains(echo, "Success!") - checkHotSchedulerConfig(expected1) - - // test compatibility - re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) - for _, store := range stores { - version := versioninfo.HotScheduleWithQuery - store.Version = versioninfo.MinSupportedVersion(version).String() - store.LastHeartbeat = time.Now().UnixNano() - pdTests.MustPutStore(re, cluster, store) - } - re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) - // After upgrading, we can use query. - expected1["write-leader-priorities"] = []any{"query", "byte"} - checkHotSchedulerConfig(expected1) - // cannot set qps as write-peer-priorities - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) - re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") - checkHotSchedulerConfig(expected1) - - // test remove and add - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) - re.Contains(echo, "Success") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) - re.Contains(echo, "Success") - - // test balance leader config - conf = make(map[string]any) - conf1 := make(map[string]any) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "show"}, &conf) - re.Equal(4., conf["batch"]) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) - return conf1["batch"] == 3. - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) - re.NotContains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) - re.Contains(echo, "404") - re.Contains(echo, "PD:scheduler:ErrSchedulerNotFound]scheduler not found") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, nil) - re.Contains(echo, "404") - re.Contains(echo, "scheduler not found") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) - re.Contains(echo, "Success!") - - // test evict-slow-store && evict-slow-trend schedulers config - evictSlownessSchedulers := []string{"evict-slow-store-scheduler", "evict-slow-trend-scheduler"} - for _, schedulerName := range evictSlownessSchedulers { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", schedulerName}, nil) - if strings.Contains(echo, "Success!") { - re.Contains(echo, "Success!") - } else { - re.Contains(echo, "scheduler existed") - } - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return strings.Contains(echo, schedulerName) - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "set", "recovery-duration", "100"}, nil) - re.Contains(echo, "Success! Config updated.") - conf = make(map[string]any) - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) - return conf["recovery-duration"] == 100. - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return !strings.Contains(echo, schedulerName) - }) - } - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-slow-store-scheduler"}, nil) - re.Contains(echo, "Success!") - - // test shuffle hot region scheduler - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return strings.Contains(echo, "shuffle-hot-region-scheduler") - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "set", "limit", "127"}, nil) - re.Contains(echo, "Success!") - conf = make(map[string]any) - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "show"}, &conf) - return conf["limit"] == 127. - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-hot-region-scheduler"}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return !strings.Contains(echo, "shuffle-hot-region-scheduler") - }) - - // test show scheduler with paused and disabled status. - checkSchedulerWithStatusCommand := func(status string, expected []string) { - testutil.Eventually(re, func() bool { - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) - return reflect.DeepEqual(expected, schedulers) - }) - } - - // test scatter range scheduler - for _, name := range []string{ - "test", "test#", "?test", - /* TODO: to handle case like "tes&t", we need to modify the server's JSON render to unescape the HTML characters */ - } { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "scatter-range", "--format=raw", "a", "b", name}, nil) - re.Contains(echo, "Success!") - schedulerName := fmt.Sprintf("scatter-range-%s", name) - // test show scheduler - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return strings.Contains(echo, schedulerName) - }) - // test remove scheduler - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return !strings.Contains(echo, schedulerName) - }) - } - - mustUsage([]string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler"}) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) - re.Contains(echo, "Success!") - checkSchedulerWithStatusCommand("paused", []string{ - "balance-leader-scheduler", - }) - result := make(map[string]any) - testutil.Eventually(re, func() bool { - mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", "balance-leader-scheduler"}, &result) - return len(result) != 0 && result["status"] == "paused" && result["summary"] == "" - }, testutil.WithWaitFor(30*time.Second)) - - mustUsage([]string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler", "60"}) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) - re.Contains(echo, "Success!") - checkSchedulerWithStatusCommand("paused", []string{}) - - // set label scheduler to disabled manually. - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "label-scheduler"}, nil) - re.Contains(echo, "Success!") - cfg := leaderServer.GetServer().GetScheduleConfig() - origin := cfg.Schedulers - cfg.Schedulers = sc.SchedulerConfigs{{Type: "label", Disable: true}} - err := leaderServer.GetServer().SetScheduleConfig(*cfg) - re.NoError(err) - checkSchedulerWithStatusCommand("disabled", []string{"label-scheduler"}) - // reset Schedulers in ScheduleConfig - cfg.Schedulers = origin - err = leaderServer.GetServer().SetScheduleConfig(*cfg) - re.NoError(err) - checkSchedulerWithStatusCommand("disabled", []string{}) -} - -func (suite *schedulerTestSuite) TestSchedulerDiagnostic() { - suite.env.RunTestInTwoModes(suite.checkSchedulerDiagnostic) -} - -func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *pdTests.TestCluster) { - re := suite.Require() - pdAddr := cluster.GetConfig().GetClientURL() - cmd := ctl.GetRootCmd() - - checkSchedulerDescribeCommand := func(schedulerName, expectedStatus, expectedSummary string) { - result := make(map[string]any) - testutil.Eventually(re, func() bool { - mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", schedulerName}, &result) - return len(result) != 0 && expectedStatus == result["status"] && expectedSummary == result["summary"] - }, testutil.WithTickInterval(50*time.Millisecond)) - } - - stores := []*metapb.Store{ - { - Id: 1, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - { - Id: 2, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - { - Id: 3, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - { - Id: 4, - State: metapb.StoreState_Up, - LastHeartbeat: time.Now().UnixNano(), - }, - } - for _, store := range stores { - pdTests.MustPutStore(re, cluster, store) - } - - // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. - pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) - - echo := mustExec(re, cmd, []string{"-u", pdAddr, "config", "set", "enable-diagnostic", "true"}, nil) - re.Contains(echo, "Success!") - checkSchedulerDescribeCommand("balance-region-scheduler", "pending", "1 store(s) RegionNotMatchRule; ") - - // scheduler delete command - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) - re.Contains(echo, "Success!") - checkSchedulerDescribeCommand("balance-region-scheduler", "disabled", "") - - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) - re.Contains(echo, "Success!") - checkSchedulerDescribeCommand("balance-leader-scheduler", "normal", "") -} - -func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) string { - output, err := tests.ExecuteCommand(cmd, args...) - re.NoError(err) - if v == nil { - return string(output) - } - re.NoError(json.Unmarshal(output, v), string(output)) - return "" -} - -func mightExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) { - output, err := tests.ExecuteCommand(cmd, args...) - re.NoError(err) - if v == nil { - return - } - json.Unmarshal(output, v) -}