From 21a333c435ffb7acfa0342beb2b9e7854f28c3dd Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 30 Mar 2023 18:30:55 +0800 Subject: [PATCH 01/16] filter: add new filter for send snapshot (#6146) close tikv/pd#6145 1. create new filter to pick the store who's snapshot sender is available. 2. store limit level is same with the operator level , the level is defined in this [pr](https://github.com/tikv/pd/pull/5575) Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- pkg/core/store.go | 4 +-- pkg/core/store_option.go | 9 ++++++ pkg/schedule/checker/replica_strategy.go | 15 ++++++++-- pkg/schedule/filter/filters.go | 12 ++++++-- pkg/schedule/filter/filters_test.go | 15 ++++++++++ pkg/schedule/filter/region_filters.go | 28 +++++++++++++++++++ pkg/schedule/filter/status.go | 13 +++++---- pkg/schedule/plan/status.go | 2 ++ pkg/schedule/region_scatterer.go | 2 +- pkg/schedule/schedulers/balance_leader.go | 2 +- pkg/schedule/schedulers/balance_region.go | 5 ++-- pkg/schedule/schedulers/balance_witness.go | 2 +- pkg/schedule/schedulers/evict_leader.go | 2 +- pkg/schedule/schedulers/grant_hot_region.go | 4 +-- pkg/schedule/schedulers/hot_region.go | 20 ++++++++----- pkg/schedule/schedulers/label.go | 3 +- pkg/schedule/schedulers/random_merge.go | 2 +- pkg/schedule/schedulers/shuffle_hot_region.go | 2 +- pkg/schedule/schedulers/shuffle_leader.go | 2 +- pkg/schedule/schedulers/shuffle_region.go | 2 +- .../schedulers/transfer_witness_leader.go | 2 +- plugin/scheduler_example/evict_leader.go | 2 +- 22 files changed, 115 insertions(+), 35 deletions(-) diff --git a/pkg/core/store.go b/pkg/core/store.go index 18e857f5d08d..c1a6410b0bb7 100644 --- a/pkg/core/store.go +++ b/pkg/core/store.go @@ -137,10 +137,10 @@ func (s *StoreInfo) IsEvictedAsSlowTrend() bool { } // IsAvailable returns if the store bucket of limitation is available -func (s *StoreInfo) IsAvailable(limitType storelimit.Type) bool { +func (s *StoreInfo) IsAvailable(limitType storelimit.Type, level constant.PriorityLevel) bool { s.mu.RLock() defer s.mu.RUnlock() - return s.limiter.Available(storelimit.RegionInfluence[limitType], limitType, constant.Low) + return s.limiter.Available(storelimit.RegionInfluence[limitType], limitType, level) } // IsTiFlash returns true if the store is tiflash. diff --git a/pkg/core/store_option.go b/pkg/core/store_option.go index e663fb5d37e9..0cb9628ff982 100644 --- a/pkg/core/store_option.go +++ b/pkg/core/store_option.go @@ -259,6 +259,15 @@ func ResetStoreLimit(limitType storelimit.Type, ratePerSec ...float64) StoreCrea } } +// SetStoreLimit set the store for a store, it may switch the store limit mode. +func SetStoreLimit(limit storelimit.StoreLimit) StoreCreateOption { + return func(store *StoreInfo) { + store.mu.Lock() + defer store.mu.Unlock() + store.limiter = limit + } +} + // SetLastAwakenTime sets last awaken time for the store. func SetLastAwakenTime(lastAwaken time.Time) StoreCreateOption { return func(store *StoreInfo) { diff --git a/pkg/schedule/checker/replica_strategy.go b/pkg/schedule/checker/replica_strategy.go index 8d3353dbe0cd..a8882c83a36f 100644 --- a/pkg/schedule/checker/replica_strategy.go +++ b/pkg/schedule/checker/replica_strategy.go @@ -17,6 +17,7 @@ package checker import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/schedule" "github.com/tikv/pd/pkg/schedule/filter" "go.uber.org/zap" @@ -53,11 +54,15 @@ func (s *ReplicaStrategy) SelectStoreToAdd(coLocationStores []*core.StoreInfo, e // // The reason for it is to prevent the non-optimal replica placement due // to the short-term state, resulting in redundant scheduling. + level := constant.High + if s.fastFailover { + level = constant.Urgent + } filters := []filter.Filter{ filter.NewExcludedFilter(s.checkerName, nil, s.region.GetStoreIDs()), filter.NewStorageThresholdFilter(s.checkerName), filter.NewSpecialUseFilter(s.checkerName), - &filter.StoreStateFilter{ActionScope: s.checkerName, MoveRegion: true, AllowTemporaryStates: true}, + &filter.StoreStateFilter{ActionScope: s.checkerName, MoveRegion: true, AllowTemporaryStates: true, OperatorLevel: level}, } if len(s.locationLabels) > 0 && s.isolationLevel != "" { filters = append(filters, filter.NewIsolationFilter(s.checkerName, s.isolationLevel, s.locationLabels, coLocationStores)) @@ -70,7 +75,7 @@ func (s *ReplicaStrategy) SelectStoreToAdd(coLocationStores []*core.StoreInfo, e } isolationComparer := filter.IsolationComparer(s.locationLabels, coLocationStores) - strictStateFilter := &filter.StoreStateFilter{ActionScope: s.checkerName, MoveRegion: true, AllowFastFailover: s.fastFailover} + strictStateFilter := &filter.StoreStateFilter{ActionScope: s.checkerName, MoveRegion: true, AllowFastFailover: s.fastFailover, OperatorLevel: level} targetCandidate := filter.NewCandidates(s.cluster.GetStores()). FilterTarget(s.cluster.GetOpts(), nil, nil, filters...). KeepTheTopStores(isolationComparer, false) // greater isolation score is better @@ -123,8 +128,12 @@ func (s *ReplicaStrategy) swapStoreToFirst(stores []*core.StoreInfo, id uint64) // SelectStoreToRemove returns the best option to remove from the region. func (s *ReplicaStrategy) SelectStoreToRemove(coLocationStores []*core.StoreInfo) uint64 { isolationComparer := filter.IsolationComparer(s.locationLabels, coLocationStores) + level := constant.High + if s.fastFailover { + level = constant.Urgent + } source := filter.NewCandidates(coLocationStores). - FilterSource(s.cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: replicaCheckerName, MoveRegion: true}). + FilterSource(s.cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: replicaCheckerName, MoveRegion: true, OperatorLevel: level}). KeepTheTopStores(isolationComparer, true). PickTheTopStore(filter.RegionScoreComparer(s.cluster.GetOpts()), false) if source == nil { diff --git a/pkg/schedule/filter/filters.go b/pkg/schedule/filter/filters.go index 9e239905c7b0..ca8d96a9d32d 100644 --- a/pkg/schedule/filter/filters.go +++ b/pkg/schedule/filter/filters.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/placement" @@ -325,6 +326,12 @@ type StoreStateFilter struct { AllowFastFailover bool // Set true if allows temporary states. AllowTemporaryStates bool + // Set the priority level of the filter, it should be same with the operator level. + // The priority level can be higher than the operator level in checker, + // the operator controller should check it again by using the actual operator level. + // If it checks failed, the operator will be put back to the waiting queue util the limit is available. + // But the scheduler should keep the same with the operator level. + OperatorLevel constant.PriorityLevel // Reason is used to distinguish the reason of store state filter Reason filterType } @@ -417,7 +424,7 @@ func (f *StoreStateFilter) isBusy(_ config.Config, store *core.StoreInfo) *plan. } func (f *StoreStateFilter) exceedRemoveLimit(_ config.Config, store *core.StoreInfo) *plan.Status { - if !f.AllowTemporaryStates && !store.IsAvailable(storelimit.RemovePeer) { + if !f.AllowTemporaryStates && !store.IsAvailable(storelimit.RemovePeer, f.OperatorLevel) { f.Reason = storeStateExceedRemoveLimit return statusStoreRemoveLimit } @@ -426,7 +433,7 @@ func (f *StoreStateFilter) exceedRemoveLimit(_ config.Config, store *core.StoreI } func (f *StoreStateFilter) exceedAddLimit(_ config.Config, store *core.StoreInfo) *plan.Status { - if !f.AllowTemporaryStates && !store.IsAvailable(storelimit.AddPeer) { + if !f.AllowTemporaryStates && !store.IsAvailable(storelimit.AddPeer, f.OperatorLevel) { f.Reason = storeStateExceedAddLimit return statusStoreAddLimit } @@ -531,6 +538,7 @@ func (f *StoreStateFilter) Source(conf config.Config, store *core.StoreInfo) (st return } } + return statusOK } diff --git a/pkg/schedule/filter/filters_test.go b/pkg/schedule/filter/filters_test.go index 75b6b95e8193..bdf88177c420 100644 --- a/pkg/schedule/filter/filters_test.go +++ b/pkg/schedule/filter/filters_test.go @@ -23,6 +23,8 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" + "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/mock/mockcluster" "github.com/tikv/pd/pkg/mock/mockconfig" "github.com/tikv/pd/pkg/schedule/placement" @@ -148,6 +150,19 @@ func TestRuleFitFilter(t *testing.T) { re.False(leaderFilter.Target(testCluster.GetOpts(), testCluster.GetStore(6)).IsOK()) } +func TestSendStateFilter(t *testing.T) { + re := require.New(t) + store := core.NewStoreInfoWithLabel(1, map[string]string{}).Clone(core.SetStoreLimit(storelimit.NewSlidingWindows(1000))) + region := core.NewTestRegionInfo(1, 1, []byte(""), []byte("")) + + snapshotFilter := NewSnapshotSendFilter([]*core.StoreInfo{store}, constant.Medium) + re.NotNil(SelectOneRegion([]*core.RegionInfo{region}, nil, snapshotFilter)) + re.True(store.GetStoreLimit().Take(1000, storelimit.SendSnapshot, constant.Medium)) + re.True(store.GetStoreLimit().Take(1000, storelimit.SendSnapshot, constant.Medium)) + snapshotFilter = NewSnapshotSendFilter([]*core.StoreInfo{store}, constant.Medium) + re.Nil(SelectOneRegion([]*core.RegionInfo{region}, nil, snapshotFilter)) +} + func TestStoreStateFilter(t *testing.T) { re := require.New(t) filters := []Filter{ diff --git a/pkg/schedule/filter/region_filters.go b/pkg/schedule/filter/region_filters.go index 2a390d4d9c04..dce0aeaa8163 100644 --- a/pkg/schedule/filter/region_filters.go +++ b/pkg/schedule/filter/region_filters.go @@ -16,6 +16,8 @@ package filter import ( "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" + "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/slice" @@ -164,3 +166,29 @@ func (f *regionWitnessFilter) Select(region *core.RegionInfo) *plan.Status { } return statusOK } + +// SnapshotSenderFilter filer the region who's leader store reaches the limit. +type SnapshotSenderFilter struct { + senders map[uint64]struct{} +} + +// NewSnapshotSendFilter returns creates a RegionFilter that filters regions with witness peer on the specific store. +// level should be set as same with the operator priority level. +func NewSnapshotSendFilter(stores []*core.StoreInfo, level constant.PriorityLevel) RegionFilter { + senders := make(map[uint64]struct{}) + for _, store := range stores { + if store.IsAvailable(storelimit.SendSnapshot, level) && !store.IsBusy() { + senders[store.GetID()] = struct{}{} + } + } + return &SnapshotSenderFilter{senders: senders} +} + +// Select returns ok if the region leader in the senders. +func (f *SnapshotSenderFilter) Select(region *core.RegionInfo) *plan.Status { + leaderStoreID := region.GetLeader().GetStoreId() + if _, ok := f.senders[leaderStoreID]; ok { + return statusOK + } + return statusRegionLeaderSendSnapshotThrottled +} diff --git a/pkg/schedule/filter/status.go b/pkg/schedule/filter/status.go index 95a596cefa66..930c59e3ba87 100644 --- a/pkg/schedule/filter/status.go +++ b/pkg/schedule/filter/status.go @@ -43,10 +43,11 @@ var ( statusStoreNotMatchIsolation = plan.NewStatus(plan.StatusStoreNotMatchIsolation) // region filter status - statusRegionPendingPeer = plan.NewStatus(plan.StatusRegionUnhealthy) - statusRegionDownPeer = plan.NewStatus(plan.StatusRegionUnhealthy) - statusRegionEmpty = plan.NewStatus(plan.StatusRegionEmpty) - statusRegionNotMatchRule = plan.NewStatus(plan.StatusRegionNotMatchRule) - statusRegionNotReplicated = plan.NewStatus(plan.StatusRegionNotReplicated) - statusRegionWitnessPeer = plan.NewStatus(plan.StatusRegionNotMatchRule) + statusRegionPendingPeer = plan.NewStatus(plan.StatusRegionUnhealthy) + statusRegionDownPeer = plan.NewStatus(plan.StatusRegionUnhealthy) + statusRegionEmpty = plan.NewStatus(plan.StatusRegionEmpty) + statusRegionNotMatchRule = plan.NewStatus(plan.StatusRegionNotMatchRule) + statusRegionNotReplicated = plan.NewStatus(plan.StatusRegionNotReplicated) + statusRegionWitnessPeer = plan.NewStatus(plan.StatusRegionNotMatchRule) + statusRegionLeaderSendSnapshotThrottled = plan.NewStatus(plan.StatusRegionSendSnapshotThrottled) ) diff --git a/pkg/schedule/plan/status.go b/pkg/schedule/plan/status.go index abd8c1256c5c..1170e92b32cb 100644 --- a/pkg/schedule/plan/status.go +++ b/pkg/schedule/plan/status.go @@ -92,6 +92,8 @@ const ( StatusNoTargetRegion // StatusRegionLabelReject represents the plan conflicts with region label. StatusRegionLabelReject + // StatusRegionSendSnapshotThrottled represents the plan conflicts with send snapshot. + StatusRegionSendSnapshotThrottled ) const ( diff --git a/pkg/schedule/region_scatterer.go b/pkg/schedule/region_scatterer.go index 877dc8b2af41..d5e53965ad42 100644 --- a/pkg/schedule/region_scatterer.go +++ b/pkg/schedule/region_scatterer.go @@ -161,7 +161,7 @@ type engineContext struct { func newEngineContext(ctx context.Context, filterFuncs ...filterFunc) engineContext { filterFuncs = append(filterFuncs, func() filter.Filter { - return &filter.StoreStateFilter{ActionScope: regionScatterName, MoveRegion: true, ScatterRegion: true} + return &filter.StoreStateFilter{ActionScope: regionScatterName, MoveRegion: true, ScatterRegion: true, OperatorLevel: constant.High} }) return engineContext{ filterFuncs: filterFuncs, diff --git a/pkg/schedule/schedulers/balance_leader.go b/pkg/schedule/schedulers/balance_leader.go index 71b923dcbaf0..6769a78d5029 100644 --- a/pkg/schedule/schedulers/balance_leader.go +++ b/pkg/schedule/schedulers/balance_leader.go @@ -185,7 +185,7 @@ func newBalanceLeaderScheduler(opController *schedule.OperatorController, conf * option(s) } s.filters = []filter.Filter{ - &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true}, + &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.High}, filter.NewSpecialUseFilter(s.GetName()), } return s diff --git a/pkg/schedule/schedulers/balance_region.go b/pkg/schedule/schedulers/balance_region.go index 95e051bc313d..8b014a42067f 100644 --- a/pkg/schedule/schedulers/balance_region.go +++ b/pkg/schedule/schedulers/balance_region.go @@ -80,7 +80,7 @@ func newBalanceRegionScheduler(opController *schedule.OperatorController, conf * setOption(scheduler) } scheduler.filters = []filter.Filter{ - &filter.StoreStateFilter{ActionScope: scheduler.GetName(), MoveRegion: true}, + &filter.StoreStateFilter{ActionScope: scheduler.GetName(), MoveRegion: true, OperatorLevel: constant.Medium}, filter.NewSpecialUseFilter(scheduler.GetName()), } return scheduler @@ -132,6 +132,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) balanceRegionScheduleCounter.Inc() stores := cluster.GetStores() opts := cluster.GetOpts() + snapshotFilter := filter.NewSnapshotSendFilter(stores, constant.Medium) faultTargets := filter.SelectUnavailableTargetStores(stores, s.filters, opts, collector, s.filterCounter) sourceStores := filter.SelectSourceStores(stores, s.filters, opts, collector, s.filterCounter) opInfluence := s.opController.GetOpInfluence(cluster) @@ -149,7 +150,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) pendingFilter := filter.NewRegionPendingFilter() downFilter := filter.NewRegionDownFilter() replicaFilter := filter.NewRegionReplicatedFilter(cluster) - baseRegionFilters := []filter.RegionFilter{downFilter, replicaFilter} + baseRegionFilters := []filter.RegionFilter{downFilter, replicaFilter, snapshotFilter} switch cluster.(type) { case *schedule.RangeCluster: // allow empty region to be scheduled in range cluster diff --git a/pkg/schedule/schedulers/balance_witness.go b/pkg/schedule/schedulers/balance_witness.go index 1db5149f419e..f481cbbc8b5f 100644 --- a/pkg/schedule/schedulers/balance_witness.go +++ b/pkg/schedule/schedulers/balance_witness.go @@ -170,7 +170,7 @@ func newBalanceWitnessScheduler(opController *schedule.OperatorController, conf option(s) } s.filters = []filter.Filter{ - &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true}, + &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true, OperatorLevel: constant.Medium}, filter.NewSpecialUseFilter(s.GetName()), } return s diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go index 0db548bc93c3..55d4fdd405bf 100644 --- a/pkg/schedule/schedulers/evict_leader.go +++ b/pkg/schedule/schedulers/evict_leader.go @@ -303,7 +303,7 @@ func scheduleEvictLeaderOnce(name, typ string, cluster schedule.Cluster, conf ev filters = append(filters, filter.NewExcludedFilter(name, nil, unhealthyPeerStores)) } - filters = append(filters, &filter.StoreStateFilter{ActionScope: name, TransferLeader: true}) + filters = append(filters, &filter.StoreStateFilter{ActionScope: name, TransferLeader: true, OperatorLevel: constant.Urgent}) candidates := filter.NewCandidates(cluster.GetFollowerStores(region)). FilterTarget(cluster.GetOpts(), nil, nil, filters...) // Compatible with old TiKV transfer leader logic. diff --git a/pkg/schedule/schedulers/grant_hot_region.go b/pkg/schedule/schedulers/grant_hot_region.go index e04d91b1a9b4..093141bdc5ff 100644 --- a/pkg/schedule/schedulers/grant_hot_region.go +++ b/pkg/schedule/schedulers/grant_hot_region.go @@ -294,10 +294,10 @@ func (s *grantHotRegionScheduler) transfer(cluster schedule.Cluster, regionID ui destStoreIDs := make([]uint64, 0, len(s.conf.StoreIDs)) var candidate []uint64 if isLeader { - filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true}) + filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.High}) candidate = []uint64{s.conf.GetStoreLeaderID()} } else { - filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true}, + filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true, OperatorLevel: constant.High}, filter.NewExcludedFilter(s.GetName(), srcRegion.GetStoreIDs(), srcRegion.GetStoreIDs())) candidate = s.conf.StoreIDs } diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 49e302cfd46c..ee50080f1a57 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -52,6 +52,7 @@ var ( hotSchedulerAbnormalReplicaCounter = schedulerCounter.WithLabelValues(HotRegionName, "abnormal_replica") hotSchedulerCreateOperatorFailedCounter = schedulerCounter.WithLabelValues(HotRegionName, "create_operator_failed") hotSchedulerNewOperatorCounter = schedulerCounter.WithLabelValues(HotRegionName, "new_operator") + hotSchedulerSnapshotSenderLimit = schedulerCounter.WithLabelValues(HotRegionName, "snapshot_sender_limit") hotSchedulerMoveLeaderCounter = schedulerCounter.WithLabelValues(HotRegionName, moveLeader.String()) hotSchedulerMovePeerCounter = schedulerCounter.WithLabelValues(HotRegionName, movePeer.String()) @@ -625,16 +626,21 @@ func (bs *balanceSolver) solve() []*operator.Operator { return region.GetStorePeer(srcStoreID) == nil } } - + snapshotFilter := filter.NewSnapshotSendFilter(bs.GetStores(), constant.Medium) for _, srcStore := range bs.filterSrcStores() { bs.cur.srcStore = srcStore srcStoreID := srcStore.GetID() for _, mainPeerStat := range bs.filterHotPeers(srcStore) { if bs.cur.region = bs.getRegion(mainPeerStat, srcStoreID); bs.cur.region == nil { continue - } else if bs.opTy == movePeer && bs.cur.region.GetApproximateSize() > bs.GetOpts().GetMaxMovableHotPeerSize() { - hotSchedulerNeedSplitBeforeScheduleCounter.Inc() - continue + } else if bs.opTy == movePeer { + if bs.cur.region.GetApproximateSize() > bs.GetOpts().GetMaxMovableHotPeerSize() { + hotSchedulerNeedSplitBeforeScheduleCounter.Inc() + continue + } else if !snapshotFilter.Select(bs.cur.region).IsOK() { + hotSchedulerSnapshotSenderLimit.Inc() + continue + } } bs.cur.mainPeerStat = mainPeerStat @@ -915,7 +921,7 @@ func (bs *balanceSolver) filterDstStores() map[uint64]*statistics.StoreLoadDetai return nil } filters = []filter.Filter{ - &filter.StoreStateFilter{ActionScope: bs.sche.GetName(), MoveRegion: true}, + &filter.StoreStateFilter{ActionScope: bs.sche.GetName(), MoveRegion: true, OperatorLevel: constant.High}, filter.NewExcludedFilter(bs.sche.GetName(), bs.cur.region.GetStoreIDs(), bs.cur.region.GetStoreIDs()), filter.NewSpecialUseFilter(bs.sche.GetName(), filter.SpecialUseHotRegion), filter.NewPlacementSafeguard(bs.sche.GetName(), bs.GetOpts(), bs.GetBasicCluster(), bs.GetRuleManager(), bs.cur.region, srcStore, nil), @@ -929,12 +935,12 @@ func (bs *balanceSolver) filterDstStores() map[uint64]*statistics.StoreLoadDetai return nil } filters = []filter.Filter{ - &filter.StoreStateFilter{ActionScope: bs.sche.GetName(), TransferLeader: true}, + &filter.StoreStateFilter{ActionScope: bs.sche.GetName(), TransferLeader: true, OperatorLevel: constant.High}, filter.NewSpecialUseFilter(bs.sche.GetName(), filter.SpecialUseHotRegion), } if bs.rwTy == statistics.Read { peers := bs.cur.region.GetPeers() - moveLeaderFilters := []filter.Filter{&filter.StoreStateFilter{ActionScope: bs.sche.GetName(), MoveRegion: true}} + moveLeaderFilters := []filter.Filter{&filter.StoreStateFilter{ActionScope: bs.sche.GetName(), MoveRegion: true, OperatorLevel: constant.High}} if leaderFilter := filter.NewPlacementLeaderSafeguard(bs.sche.GetName(), bs.GetOpts(), bs.GetBasicCluster(), bs.GetRuleManager(), bs.cur.region, srcStore, true /*allowMoveLeader*/); leaderFilter != nil { filters = append(filters, leaderFilter) } diff --git a/pkg/schedule/schedulers/label.go b/pkg/schedule/schedulers/label.go index a78e3fa35d84..e150e6530fcd 100644 --- a/pkg/schedule/schedulers/label.go +++ b/pkg/schedule/schedulers/label.go @@ -17,6 +17,7 @@ package schedulers import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/schedule" "github.com/tikv/pd/pkg/schedule/config" @@ -109,7 +110,7 @@ func (s *labelScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ([]*ope f := filter.NewExcludedFilter(s.GetName(), nil, excludeStores) target := filter.NewCandidates(cluster.GetFollowerStores(region)). - FilterTarget(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: LabelName, TransferLeader: true}, f). + FilterTarget(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: LabelName, TransferLeader: true, OperatorLevel: constant.Medium}, f). RandomPick() if target == nil { log.Debug("label scheduler no target found for region", zap.Uint64("region-id", region.GetID())) diff --git a/pkg/schedule/schedulers/random_merge.go b/pkg/schedule/schedulers/random_merge.go index be6aa40c20fc..f324dbc1599a 100644 --- a/pkg/schedule/schedulers/random_merge.go +++ b/pkg/schedule/schedulers/random_merge.go @@ -89,7 +89,7 @@ func (s *randomMergeScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ( randomMergeCounter.Inc() store := filter.NewCandidates(cluster.GetStores()). - FilterSource(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: s.conf.Name, MoveRegion: true}). + FilterSource(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: s.conf.Name, MoveRegion: true, OperatorLevel: constant.Low}). RandomPick() if store == nil { randomMergeNoSourceStoreCounter.Inc() diff --git a/pkg/schedule/schedulers/shuffle_hot_region.go b/pkg/schedule/schedulers/shuffle_hot_region.go index dfe4970bd6b0..1bb7179fa9c1 100644 --- a/pkg/schedule/schedulers/shuffle_hot_region.go +++ b/pkg/schedule/schedulers/shuffle_hot_region.go @@ -120,7 +120,7 @@ func (s *shuffleHotRegionScheduler) randomSchedule(cluster schedule.Cluster, loa } filters := []filter.Filter{ - &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true}, + &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true, OperatorLevel: constant.Low}, filter.NewExcludedFilter(s.GetName(), srcRegion.GetStoreIDs(), srcRegion.GetStoreIDs()), filter.NewPlacementSafeguard(s.GetName(), cluster.GetOpts(), cluster.GetBasicCluster(), cluster.GetRuleManager(), srcRegion, srcStore, nil), } diff --git a/pkg/schedule/schedulers/shuffle_leader.go b/pkg/schedule/schedulers/shuffle_leader.go index ff6d7debdd65..42b8dcd325d7 100644 --- a/pkg/schedule/schedulers/shuffle_leader.go +++ b/pkg/schedule/schedulers/shuffle_leader.go @@ -55,7 +55,7 @@ type shuffleLeaderScheduler struct { // between stores. func newShuffleLeaderScheduler(opController *schedule.OperatorController, conf *shuffleLeaderSchedulerConfig) schedule.Scheduler { filters := []filter.Filter{ - &filter.StoreStateFilter{ActionScope: conf.Name, TransferLeader: true}, + &filter.StoreStateFilter{ActionScope: conf.Name, TransferLeader: true, OperatorLevel: constant.Low}, filter.NewSpecialUseFilter(conf.Name), } base := NewBaseScheduler(opController) diff --git a/pkg/schedule/schedulers/shuffle_region.go b/pkg/schedule/schedulers/shuffle_region.go index d29147a3c024..3555dc151609 100644 --- a/pkg/schedule/schedulers/shuffle_region.go +++ b/pkg/schedule/schedulers/shuffle_region.go @@ -53,7 +53,7 @@ type shuffleRegionScheduler struct { // between stores. func newShuffleRegionScheduler(opController *schedule.OperatorController, conf *shuffleRegionSchedulerConfig) schedule.Scheduler { filters := []filter.Filter{ - &filter.StoreStateFilter{ActionScope: ShuffleRegionName, MoveRegion: true}, + &filter.StoreStateFilter{ActionScope: ShuffleRegionName, MoveRegion: true, OperatorLevel: constant.Low}, filter.NewSpecialUseFilter(ShuffleRegionName), } base := NewBaseScheduler(opController) diff --git a/pkg/schedule/schedulers/transfer_witness_leader.go b/pkg/schedule/schedulers/transfer_witness_leader.go index b14d0bc7d419..ada7a026e1bc 100644 --- a/pkg/schedule/schedulers/transfer_witness_leader.go +++ b/pkg/schedule/schedulers/transfer_witness_leader.go @@ -107,7 +107,7 @@ func (s *trasferWitnessLeaderScheduler) scheduleTransferWitnessLeader(name, typ for _, peer := range region.GetPendingPeers() { unhealthyPeerStores[peer.GetStoreId()] = struct{}{} } - filters = append(filters, filter.NewExcludedFilter(name, nil, unhealthyPeerStores), &filter.StoreStateFilter{ActionScope: name, TransferLeader: true}) + filters = append(filters, filter.NewExcludedFilter(name, nil, unhealthyPeerStores), &filter.StoreStateFilter{ActionScope: name, TransferLeader: true, OperatorLevel: constant.Urgent}) candidates := filter.NewCandidates(cluster.GetFollowerStores(region)).FilterTarget(cluster.GetOpts(), nil, nil, filters...) // Compatible with old TiKV transfer leader logic. target := candidates.RandomPick() diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 5e7125563c9a..348f8ca971ff 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -226,7 +226,7 @@ func (s *evictLeaderScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ( continue } target := filter.NewCandidates(cluster.GetFollowerStores(region)). - FilterTarget(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: EvictLeaderName, TransferLeader: true}). + FilterTarget(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: EvictLeaderName, TransferLeader: true, OperatorLevel: constant.Urgent}). RandomPick() if target == nil { continue From 1c288c441a26c24de7912b75416cbb03d9bcf6e9 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Mon, 3 Apr 2023 11:20:57 +0800 Subject: [PATCH 02/16] mcs: fix ctx in tso forward stream (#6255) close tikv/pd#6254 Signed-off-by: lhy1024 --- server/grpc_service.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server/grpc_service.go b/server/grpc_service.go index 5f323b667b8d..42f85e643365 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1766,7 +1766,7 @@ func (s *GrpcServer) getGlobalTSOFromTSOServer(ctx context.Context) (pdpb.Timest if !ok || forwardedHost == "" { return pdpb.Timestamp{}, ErrNotFoundTSOAddr } - forwardStream, err := s.getTSOForwardStream(ctx, forwardedHost) + forwardStream, err := s.getTSOForwardStream(forwardedHost) if err != nil { return pdpb.Timestamp{}, err } @@ -1780,12 +1780,13 @@ func (s *GrpcServer) getGlobalTSOFromTSOServer(ctx context.Context) (pdpb.Timest }) ts, err := forwardStream.Recv() if err != nil { + log.Error("get global tso from tso server failed", zap.Error(err)) return pdpb.Timestamp{}, err } return *ts.GetTimestamp(), nil } -func (s *GrpcServer) getTSOForwardStream(ctx context.Context, forwardedHost string) (tsopb.TSO_TsoClient, error) { +func (s *GrpcServer) getTSOForwardStream(forwardedHost string) (tsopb.TSO_TsoClient, error) { s.tsoClientPool.RLock() forwardStream, ok := s.tsoClientPool.clients[forwardedHost] s.tsoClientPool.RUnlock() @@ -1804,18 +1805,18 @@ func (s *GrpcServer) getTSOForwardStream(ctx context.Context, forwardedHost stri } // Now let's create the client connection and the forward stream - client, err := s.getDelegateClient(ctx, forwardedHost) + client, err := s.getDelegateClient(s.ctx, forwardedHost) if err != nil { return nil, err } done := make(chan struct{}) - ctx, cancel := context.WithTimeout(s.ctx, tsoutil.DefaultTSOProxyTimeout) + ctx, cancel := context.WithCancel(s.ctx) go checkStream(ctx, cancel, done) forwardStream, err = tsopb.NewTSOClient(client).Tso(ctx) + done <- struct{}{} if err != nil { return nil, err } - done <- struct{}{} s.tsoClientPool.clients[forwardedHost] = forwardStream return forwardStream, nil } From d083fd6937ef1ee45cf779e2e7e889e72f61be92 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 3 Apr 2023 15:08:57 +0800 Subject: [PATCH 03/16] *: remove cluster dependency from keyspace (#6249) ref tikv/pd#6231 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- {server => pkg}/keyspace/keyspace.go | 42 +++++++++++-------- {server => pkg}/keyspace/keyspace_test.go | 14 +++++-- .../keyspace/tso_keyspace_group.go | 0 .../keyspace/tso_keyspace_group_test.go | 0 {server => pkg}/keyspace/util.go | 0 {server => pkg}/keyspace/util_test.go | 0 server/api/region_label_test.go | 4 +- server/apiv2/handlers/keyspace.go | 2 +- server/config/config.go | 5 +++ server/keyspace_service.go | 2 +- server/server.go | 12 +++--- tests/cluster.go | 2 +- tests/integrations/client/keyspace_test.go | 2 +- tests/server/apiv2/handlers/keyspace_test.go | 2 +- tests/server/keyspace/keyspace_test.go | 2 +- 15 files changed, 54 insertions(+), 35 deletions(-) rename {server => pkg}/keyspace/keyspace.go (94%) rename {server => pkg}/keyspace/keyspace_test.go (96%) rename {server => pkg}/keyspace/tso_keyspace_group.go (100%) rename {server => pkg}/keyspace/tso_keyspace_group_test.go (100%) rename {server => pkg}/keyspace/util.go (100%) rename {server => pkg}/keyspace/util_test.go (100%) diff --git a/server/keyspace/keyspace.go b/pkg/keyspace/keyspace.go similarity index 94% rename from server/keyspace/keyspace.go rename to pkg/keyspace/keyspace.go index bf64f3e680b9..1d6fe227891e 100644 --- a/server/keyspace/keyspace.go +++ b/pkg/keyspace/keyspace.go @@ -23,12 +23,12 @@ import ( "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/id" + "github.com/tikv/pd/pkg/schedule" + "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/syncutil" - "github.com/tikv/pd/server/cluster" - "github.com/tikv/pd/server/config" "go.uber.org/zap" ) @@ -48,6 +48,11 @@ const ( regionLabelKey = "id" ) +// Config is the interface for keyspace config. +type Config interface { + GetPreAlloc() []string +} + // Manager manages keyspace related data. // It validates requests and provides concurrency control. type Manager struct { @@ -58,11 +63,11 @@ type Manager struct { // store is the storage for keyspace related information. store endpoint.KeyspaceStorage // rc is the raft cluster of the server. - rc *cluster.RaftCluster + cluster schedule.Cluster // ctx is the context of the manager, to be used in transaction. ctx context.Context // config is the configurations of the manager. - config config.KeyspaceConfig + config Config } // CreateKeyspaceRequest represents necessary arguments to create a keyspace. @@ -77,15 +82,15 @@ type CreateKeyspaceRequest struct { // NewKeyspaceManager creates a Manager of keyspace related data. func NewKeyspaceManager(store endpoint.KeyspaceStorage, - rc *cluster.RaftCluster, + cluster schedule.Cluster, idAllocator id.Allocator, - config config.KeyspaceConfig, + config Config, ) *Manager { return &Manager{ metaLock: syncutil.NewLockGroup(syncutil.WithHash(keyspaceIDHash)), idAllocator: idAllocator, store: store, - rc: rc, + cluster: cluster, ctx: context.TODO(), config: config, } @@ -113,7 +118,7 @@ func (manager *Manager) Bootstrap() error { } // Initialize pre-alloc keyspace. - preAlloc := manager.config.PreAlloc + preAlloc := manager.config.GetPreAlloc() for _, keyspaceName := range preAlloc { _, err = manager.CreateKeyspace(&CreateKeyspaceRequest{ Name: keyspaceName, @@ -207,18 +212,21 @@ func (manager *Manager) splitKeyspaceRegion(id uint32) error { }) keyspaceRule := makeLabelRule(id) - err := manager.rc.GetRegionLabeler().SetLabelRule(keyspaceRule) - if err != nil { - log.Warn("[keyspace] failed to add region label for keyspace", + if cl, ok := manager.cluster.(interface{ GetRegionLabeler() *labeler.RegionLabeler }); ok { + err := cl.GetRegionLabeler().SetLabelRule(keyspaceRule) + if err != nil { + log.Warn("[keyspace] failed to add region label for keyspace", + zap.Uint32("keyspaceID", id), + zap.Error(err), + ) + } + log.Info("[keyspace] added region label for keyspace", zap.Uint32("keyspaceID", id), - zap.Error(err), + zap.Any("LabelRule", keyspaceRule), ) + return nil } - log.Info("[keyspace] added region label for keyspace", - zap.Uint32("keyspaceID", id), - zap.Any("LabelRule", keyspaceRule), - ) - return nil + return errors.New("cluster does not support region label") } // LoadKeyspace returns the keyspace specified by name. diff --git a/server/keyspace/keyspace_test.go b/pkg/keyspace/keyspace_test.go similarity index 96% rename from server/keyspace/keyspace_test.go rename to pkg/keyspace/keyspace_test.go index d478736052e8..1fc7252bc6a2 100644 --- a/server/keyspace/keyspace_test.go +++ b/pkg/keyspace/keyspace_test.go @@ -29,7 +29,6 @@ import ( "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" - "github.com/tikv/pd/server/config" ) const ( @@ -47,18 +46,25 @@ func TestKeyspaceTestSuite(t *testing.T) { suite.Run(t, new(keyspaceTestSuite)) } +type mockConfig struct { + PreAlloc []string +} + +func (m *mockConfig) GetPreAlloc() []string { return m.PreAlloc } + func (suite *keyspaceTestSuite) SetupTest() { store := endpoint.NewStorageEndpoint(kv.NewMemoryKV(), nil) allocator := mockid.NewIDAllocator() - suite.manager = NewKeyspaceManager(store, nil, allocator, config.KeyspaceConfig{}) + suite.manager = NewKeyspaceManager(store, nil, allocator, &mockConfig{}) suite.NoError(suite.manager.Bootstrap()) } func (suite *keyspaceTestSuite) SetupSuite() { - suite.NoError(failpoint.Enable("github.com/tikv/pd/server/keyspace/skipSplitRegion", "return(true)")) + suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion", "return(true)")) } + func (suite *keyspaceTestSuite) TearDownSuite() { - suite.NoError(failpoint.Disable("github.com/tikv/pd/server/keyspace/skipSplitRegion")) + suite.NoError(failpoint.Disable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion")) } func makeCreateKeyspaceRequests(count int) []*CreateKeyspaceRequest { diff --git a/server/keyspace/tso_keyspace_group.go b/pkg/keyspace/tso_keyspace_group.go similarity index 100% rename from server/keyspace/tso_keyspace_group.go rename to pkg/keyspace/tso_keyspace_group.go diff --git a/server/keyspace/tso_keyspace_group_test.go b/pkg/keyspace/tso_keyspace_group_test.go similarity index 100% rename from server/keyspace/tso_keyspace_group_test.go rename to pkg/keyspace/tso_keyspace_group_test.go diff --git a/server/keyspace/util.go b/pkg/keyspace/util.go similarity index 100% rename from server/keyspace/util.go rename to pkg/keyspace/util.go diff --git a/server/keyspace/util_test.go b/pkg/keyspace/util_test.go similarity index 100% rename from server/keyspace/util_test.go rename to pkg/keyspace/util_test.go diff --git a/server/api/region_label_test.go b/server/api/region_label_test.go index e377249561ea..021ec7f1359e 100644 --- a/server/api/region_label_test.go +++ b/server/api/region_label_test.go @@ -47,13 +47,13 @@ func (suite *regionLabelTestSuite) SetupSuite() { addr := suite.svr.GetAddr() suite.urlPrefix = fmt.Sprintf("%s%s/api/v1/config/region-label/", addr, apiPrefix) - suite.NoError(failpoint.Enable("github.com/tikv/pd/server/keyspace/skipSplitRegion", "return(true)")) + suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion", "return(true)")) mustBootstrapCluster(re, suite.svr) } func (suite *regionLabelTestSuite) TearDownSuite() { suite.cleanup() - suite.NoError(failpoint.Disable("github.com/tikv/pd/server/keyspace/skipSplitRegion")) + suite.NoError(failpoint.Disable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion")) } func (suite *regionLabelTestSuite) TestGetSet() { diff --git a/server/apiv2/handlers/keyspace.go b/server/apiv2/handlers/keyspace.go index 1d607ab6017f..2ac5235831b8 100644 --- a/server/apiv2/handlers/keyspace.go +++ b/server/apiv2/handlers/keyspace.go @@ -25,9 +25,9 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/keyspace" "github.com/tikv/pd/server" "github.com/tikv/pd/server/apiv2/middlewares" - "github.com/tikv/pd/server/keyspace" ) // RegisterKeyspace register keyspace related handlers to router paths. diff --git a/server/config/config.go b/server/config/config.go index 2142ad7ce413..070d6163266b 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -1389,3 +1389,8 @@ type KeyspaceConfig struct { // PreAlloc contains the keyspace to be allocated during keyspace manager initialization. PreAlloc []string `toml:"pre-alloc" json:"pre-alloc"` } + +// GetPreAlloc returns the keyspace to be allocated during keyspace manager initialization. +func (c *KeyspaceConfig) GetPreAlloc() []string { + return c.PreAlloc +} diff --git a/server/keyspace_service.go b/server/keyspace_service.go index 5fb06c38bdbf..5255d7258155 100644 --- a/server/keyspace_service.go +++ b/server/keyspace_service.go @@ -22,8 +22,8 @@ import ( "github.com/gogo/protobuf/proto" "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/tikv/pd/pkg/keyspace" "github.com/tikv/pd/pkg/storage/endpoint" - "github.com/tikv/pd/server/keyspace" "go.etcd.io/etcd/clientv3" ) diff --git a/server/server.go b/server/server.go index 7586cc23fe28..28568b2fcf5c 100644 --- a/server/server.go +++ b/server/server.go @@ -46,6 +46,7 @@ import ( "github.com/tikv/pd/pkg/encryption" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/id" + "github.com/tikv/pd/pkg/keyspace" ms_server "github.com/tikv/pd/pkg/mcs/meta_storage/server" "github.com/tikv/pd/pkg/mcs/registry" rm_server "github.com/tikv/pd/pkg/mcs/resource_manager/server" @@ -73,7 +74,6 @@ import ( "github.com/tikv/pd/server/cluster" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/gc" - "github.com/tikv/pd/server/keyspace" syncer "github.com/tikv/pd/server/region_syncer" "go.etcd.io/etcd/clientv3" "go.etcd.io/etcd/embed" @@ -440,7 +440,7 @@ func (s *Server) startServer(ctx context.Context) error { Member: s.member.MemberValue(), Step: keyspace.AllocStep, }) - s.keyspaceManager = keyspace.NewKeyspaceManager(s.storage, s.cluster, keyspaceIDAllocator, s.cfg.Keyspace) + s.keyspaceManager = keyspace.NewKeyspaceManager(s.storage, s.cluster, keyspaceIDAllocator, &s.cfg.Keyspace) s.keyspaceGroupManager = keyspace.NewKeyspaceGroupManager(s.ctx, s.storage) s.hbStreams = hbstream.NewHeartbeatStreams(ctx, s.clusterID, s.cluster) // initial hot_region_storage in here. @@ -698,12 +698,12 @@ func (s *Server) bootstrapCluster(req *pdpb.BootstrapRequest) (*pdpb.BootstrapRe return nil, err } - if err = s.GetKeyspaceManager().Bootstrap(); err != nil { - log.Warn("bootstrap keyspace manager failed", errs.ZapError(err)) + if err := s.GetKeyspaceGroupManager().Bootstrap(); err != nil { + log.Warn("bootstrapping keyspace group manager failed", errs.ZapError(err)) } - if err = s.GetKeyspaceGroupManager().Bootstrap(); err != nil { - log.Warn("bootstrap keyspace group manager failed", errs.ZapError(err)) + if err = s.GetKeyspaceManager().Bootstrap(); err != nil { + log.Warn("bootstrapping keyspace manager failed", errs.ZapError(err)) } return &pdpb.BootstrapResponse{ diff --git a/tests/cluster.go b/tests/cluster.go index 1f27e72bcada..b0a5d5299988 100644 --- a/tests/cluster.go +++ b/tests/cluster.go @@ -32,6 +32,7 @@ import ( "github.com/tikv/pd/pkg/dashboard" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/id" + "github.com/tikv/pd/pkg/keyspace" "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/swaggerserver" @@ -44,7 +45,6 @@ import ( "github.com/tikv/pd/server/cluster" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/join" - "github.com/tikv/pd/server/keyspace" "go.etcd.io/etcd/clientv3" ) diff --git a/tests/integrations/client/keyspace_test.go b/tests/integrations/client/keyspace_test.go index a9fb953cdf57..7cb35820bc6f 100644 --- a/tests/integrations/client/keyspace_test.go +++ b/tests/integrations/client/keyspace_test.go @@ -20,9 +20,9 @@ import ( "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/keyspace" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/server" - "github.com/tikv/pd/server/keyspace" ) const ( diff --git a/tests/server/apiv2/handlers/keyspace_test.go b/tests/server/apiv2/handlers/keyspace_test.go index e13e13737d77..b9b9742b2dc9 100644 --- a/tests/server/apiv2/handlers/keyspace_test.go +++ b/tests/server/apiv2/handlers/keyspace_test.go @@ -26,9 +26,9 @@ import ( "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/keyspace" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server/apiv2/handlers" - "github.com/tikv/pd/server/keyspace" "github.com/tikv/pd/tests" "go.uber.org/goleak" ) diff --git a/tests/server/keyspace/keyspace_test.go b/tests/server/keyspace/keyspace_test.go index 4f958d1f892e..e108879f3c47 100644 --- a/tests/server/keyspace/keyspace_test.go +++ b/tests/server/keyspace/keyspace_test.go @@ -27,10 +27,10 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/codec" + "github.com/tikv/pd/pkg/keyspace" "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/server/config" - "github.com/tikv/pd/server/keyspace" "github.com/tikv/pd/tests" ) From 39314b10bd0b4d08b5ada210beb5f35534f952ff Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Mon, 3 Apr 2023 17:46:58 +0800 Subject: [PATCH 04/16] *: make code clear by rename `isServing` to `isRunning` (#6258) ref tikv/pd#4399 Signed-off-by: lhy1024 Co-authored-by: Ti Chi Robot --- pkg/mcs/resource_manager/server/server.go | 10 +++++----- pkg/mcs/tso/server/server.go | 12 ++++++------ server/server.go | 10 +++++----- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pkg/mcs/resource_manager/server/server.go b/pkg/mcs/resource_manager/server/server.go index 939bd37d4d1b..a2ce35edf564 100644 --- a/pkg/mcs/resource_manager/server/server.go +++ b/pkg/mcs/resource_manager/server/server.go @@ -52,8 +52,8 @@ import ( // Server is the resource manager server, and it implements bs.Server. type Server struct { - // Server state. 0 is not serving, 1 is serving. - isServing int64 + // Server state. 0 is not running, 1 is running. + isRunning int64 ctx context.Context serverLoopCtx context.Context @@ -196,7 +196,7 @@ func (s *Server) campaignLeader() { // Close closes the server. func (s *Server) Close() { - if !atomic.CompareAndSwapInt64(&s.isServing, 1, 0) { + if !atomic.CompareAndSwapInt64(&s.isRunning, 1, 0) { // server is already closed return } @@ -247,7 +247,7 @@ func (s *Server) IsServing() bool { // IsClosed checks if the server loop is closed func (s *Server) IsClosed() bool { - return s != nil && atomic.LoadInt64(&s.isServing) == 0 + return s != nil && atomic.LoadInt64(&s.isRunning) == 0 } // AddServiceReadyCallback adds callbacks when the server becomes the leader, if there is embedded etcd, or the primary otherwise. @@ -411,7 +411,7 @@ func (s *Server) startServer() (err error) { log.Error("failed to regiser the service", zap.String("service-name", utils.ResourceManagerServiceName), errs.ZapError(err)) return err } - atomic.StoreInt64(&s.isServing, 1) + atomic.StoreInt64(&s.isRunning, 1) return nil } diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index 5a9ae2e92b69..db8e8a239f2c 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -75,8 +75,8 @@ var _ tso.ElectionMember = (*member.Participant)(nil) type Server struct { diagnosticspb.DiagnosticsServer - // Server state. 0 is not serving, 1 is serving. - isServing int64 + // Server state. 0 is not running, 1 is running. + isRunning int64 // Server start timestamp startTimestamp int64 @@ -157,7 +157,7 @@ func (s *Server) Run() error { // Close closes the server. func (s *Server) Close() { - if !atomic.CompareAndSwapInt64(&s.isServing, 1, 0) { + if !atomic.CompareAndSwapInt64(&s.isRunning, 1, 0) { // server is already closed return } @@ -200,7 +200,7 @@ func (s *Server) AddStartCallback(callbacks ...func()) { // IsServing implements basicserver. It returns whether the server is the leader // if there is embedded etcd, or the primary otherwise. func (s *Server) IsServing() bool { - return atomic.LoadInt64(&s.isServing) == 1 && s.keyspaceGroupManager.GetElectionMember(mcsutils.DefaultKeySpaceGroupID).IsLeader() + return atomic.LoadInt64(&s.isRunning) == 1 && s.keyspaceGroupManager.GetElectionMember(mcsutils.DefaultKeySpaceGroupID).IsLeader() } // GetLeaderListenUrls gets service endpoints from the leader in election group. @@ -225,7 +225,7 @@ func (s *Server) ClusterID() uint64 { // IsClosed checks if the server loop is closed func (s *Server) IsClosed() bool { - return atomic.LoadInt64(&s.isServing) == 0 + return atomic.LoadInt64(&s.isRunning) == 0 } // GetTSOAllocatorManager returns the manager of TSO Allocator. @@ -460,7 +460,7 @@ func (s *Server) startServer() (err error) { return err } - atomic.StoreInt64(&s.isServing, 1) + atomic.StoreInt64(&s.isRunning, 1) return nil } diff --git a/server/server.go b/server/server.go index 28568b2fcf5c..23be7dba8266 100644 --- a/server/server.go +++ b/server/server.go @@ -121,8 +121,8 @@ var ( type Server struct { diagnosticspb.DiagnosticsServer - // Server state. - isServing int64 + // Server state. 0 is not running, 1 is running. + isRunning int64 // Server start timestamp startTimestamp int64 @@ -456,7 +456,7 @@ func (s *Server) startServer(ctx context.Context) error { } // Server has started. - atomic.StoreInt64(&s.isServing, 1) + atomic.StoreInt64(&s.isRunning, 1) serverMaxProcs.Set(float64(runtime.GOMAXPROCS(0))) return nil } @@ -468,7 +468,7 @@ func (s *Server) AddCloseCallback(callbacks ...func()) { // Close closes the server. func (s *Server) Close() { - if !atomic.CompareAndSwapInt64(&s.isServing, 1, 0) { + if !atomic.CompareAndSwapInt64(&s.isRunning, 1, 0) { // server is already closed return } @@ -513,7 +513,7 @@ func (s *Server) Close() { // IsClosed checks whether server is closed or not. func (s *Server) IsClosed() bool { - return atomic.LoadInt64(&s.isServing) == 0 + return atomic.LoadInt64(&s.isRunning) == 0 } // Run runs the pd server. From f058d5e4401363ec6ba835bb12990881457c5b12 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Tue, 4 Apr 2023 11:44:58 +0800 Subject: [PATCH 05/16] *: upgrade aws sdk from v1 to v2 (#6260) close tikv/pd#6259 *: upgrade aws sdk from v1 to v2 Signed-off-by: nolouch Co-authored-by: Ti Chi Robot --- go.mod | 15 +++++- go.sum | 33 +++++++++--- pkg/encryption/kms.go | 88 +++++++++++++------------------- tests/integrations/client/go.mod | 15 +++++- tests/integrations/client/go.sum | 33 +++++++++--- tests/integrations/mcs/go.mod | 15 +++++- tests/integrations/mcs/go.sum | 33 +++++++++--- tests/integrations/tso/go.mod | 15 +++++- tests/integrations/tso/go.sum | 33 +++++++++--- 9 files changed, 195 insertions(+), 85 deletions(-) diff --git a/go.mod b/go.mod index f2a0b1f36ebc..4c4090694b94 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,10 @@ go 1.20 require ( github.com/AlekSi/gocov-xml v1.0.0 github.com/BurntSushi/toml v0.3.1 - github.com/aws/aws-sdk-go v1.35.3 + github.com/aws/aws-sdk-go-v2/config v1.18.19 + github.com/aws/aws-sdk-go-v2/credentials v1.13.18 + github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 + github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 github.com/axw/gocov v1.0.0 github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e @@ -63,6 +66,15 @@ require ( github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/VividCortex/mysqlerr v1.0.0 // indirect github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502 // indirect + github.com/aws/aws-sdk-go-v2 v1.17.7 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 // indirect + github.com/aws/smithy-go v1.13.5 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch // indirect @@ -108,7 +120,6 @@ require ( github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect diff --git a/go.sum b/go.sum index f39a24338721..6a35ba8ca078 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,32 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/aws/aws-sdk-go v1.35.3 h1:r0puXncSaAfRt7Btml2swUo74Kao+vKhO3VLjwDjK54= -github.com/aws/aws-sdk-go v1.35.3/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= +github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= +github.com/aws/aws-sdk-go-v2/config v1.18.19 h1:AqFK6zFNtq4i1EYu+eC7lcKHYnZagMn6SW171la0bGw= +github.com/aws/aws-sdk-go-v2/config v1.18.19/go.mod h1:XvTmGMY8d52ougvakOv1RpiTLPz9dlG/OQHsKU/cMmY= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18 h1:EQMdtHwz0ILTW1hoP+EwuWhwCG1hD6l3+RWFQABET4c= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18/go.mod h1:vnwlwjIe+3XJPBYKu1et30ZPABG3VaXJYr8ryohpIyM= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 h1:gt57MN3liKiyGopcqgNzJb2+d9MJaKT/q1OksHNXVE4= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1/go.mod h1:lfUx8puBRdM5lVVMQlwt2v+ofiG/X6Ms+dy0UkG/kXw= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 h1:sJLYcS+eZn5EeNINGHSCRAwUJMFVqklwkH36Vbyai7M= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31/go.mod h1:QT0BqUvX1Bh2ABdTGnjqEjvjzrCfIniM9Sc8zn9Yndo= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 h1:1mnRASEKnkqsntcxHaysxwgVoUUp5dkiB+l3llKnqyg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25/go.mod h1:zBHOPwhBc3FlQjQJE/D3IfPWiWaQmT06Vq9aNukDo0k= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 h1:p5luUImdIqywn6JpQsW3tq5GNOxKmOnEpybzPx+d1lk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32/go.mod h1:XGhIBZDEgfqmFIugclZ6FU7v75nHhBDtzuB4xB/tEi4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 h1:5LHn8JQ0qvjD9L9JhMtylnkcw7j05GDZqM9Oin6hpr0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25/go.mod h1:/95IA+0lMnzW6XzqYJRpjjsAbKEORVeO0anQqjd2CNU= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 h1:R5f4VOFi3ScTe7TtePyxLqEhNqTJIAxL57MzrXFNs6I= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8/go.mod h1:OtP3pBOgmJM+acQyQcQXtQHets3yJoVuanCx2T5M7v4= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 h1:5V7DWLBd7wTELVz5bPpwzYy/sikk0gsgZfj40X+l5OI= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6/go.mod h1:Y1VOmit/Fn6Tz1uFAeCO6Q7M2fmfXSCLeL5INVYsLuY= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 h1:B8cauxOH1W1v7rd8RdI/MWnoR4Ze0wIHWrb90qczxj4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6/go.mod h1:Lh/bc9XUf8CfOY6Jp5aIkQtN+j1mc+nExc+KXj9jx2s= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 h1:bWNgNdRko2x6gqa0blfATqAZKZokPIeM1vfmQt2pnvM= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7/go.mod h1:JuTnSoeePXmMVe9G8NcjjwgOKEfZ4cOjMuT2IBT/2eI= +github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= +github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/axw/gocov v1.0.0 h1:YsqYR66hUmilVr23tu8USgnJIJvnwh3n7j5zRn7x4LU= github.com/axw/gocov v1.0.0/go.mod h1:LvQpEYiwwIb2nYkXY2fDWhg9/AsYqkhmrCshjlUJECE= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= @@ -173,7 +197,6 @@ github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -228,6 +251,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -280,9 +304,7 @@ github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/ github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= @@ -652,7 +674,6 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190611141213-3f473d35a33a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= diff --git a/pkg/encryption/kms.go b/pkg/encryption/kms.go index 95df9543c193..3e70b2deeb52 100644 --- a/pkg/encryption/kms.go +++ b/pkg/encryption/kms.go @@ -15,14 +15,13 @@ package encryption import ( + "context" "os" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/credentials/stscreds" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/aws/aws-sdk-go/service/kms" - "github.com/aws/aws-sdk-go/service/sts" + sdkconfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials/stscreds" + "github.com/aws/aws-sdk-go-v2/service/kms" + "github.com/aws/aws-sdk-go-v2/service/sts" "github.com/pingcap/kvproto/pkg/encryptionpb" "github.com/tikv/pd/pkg/errs" ) @@ -48,24 +47,40 @@ func newMasterKeyFromKMS( if config.Vendor != kmsVendorAWS { return nil, errs.ErrEncryptionKMS.GenWithStack("unsupported KMS vendor: %s", config.Vendor) } - credentials, err := newAwsCredentials() - if err != nil { - return nil, err - } - session, err := session.NewSession(&aws.Config{ - Credentials: credentials, - Region: &config.Region, - Endpoint: &config.Endpoint, - }) + + cfg, err := sdkconfig.LoadDefaultConfig(context.TODO(), + sdkconfig.WithRegion(config.Region), + ) if err != nil { return nil, errs.ErrEncryptionKMS.Wrap(err).GenWithStack( - "fail to create AWS session to access KMS CMK") + "fail to load default config") } - client := kms.New(session) + + // Credentials from K8S IAM role. + roleArn := os.Getenv(envAwsRoleArn) + tokenFile := os.Getenv(envAwsWebIdentityTokenFile) + sessionName := os.Getenv(envAwsRoleSessionName) + optFn := func(options *kms.Options) {} + // Session name is optional. + if roleArn != "" && tokenFile != "" { + client := sts.NewFromConfig(cfg) + webIdentityRoleProvider := stscreds.NewWebIdentityRoleProvider( + client, + roleArn, + stscreds.IdentityTokenFile(tokenFile), + func(o *stscreds.WebIdentityRoleOptions) { + o.RoleSessionName = sessionName + }, + ) + optFn = func(options *kms.Options) { + options.Credentials = webIdentityRoleProvider + } + } + client := kms.NewFromConfig(cfg, optFn) if len(ciphertextKey) == 0 { - numberOfBytes := int64(masterKeyLength) + numberOfBytes := int32(masterKeyLength) // Create a new data key. - output, err := client.GenerateDataKey(&kms.GenerateDataKeyInput{ + output, err := client.GenerateDataKey(context.Background(), &kms.GenerateDataKeyInput{ KeyId: &config.KeyId, NumberOfBytes: &numberOfBytes, }) @@ -84,7 +99,7 @@ func newMasterKeyFromKMS( } } else { // Decrypt existing data key. - output, err := client.Decrypt(&kms.DecryptInput{ + output, err := client.Decrypt(context.Background(), &kms.DecryptInput{ KeyId: &config.KeyId, CiphertextBlob: ciphertextKey, }) @@ -104,36 +119,3 @@ func newMasterKeyFromKMS( } return } - -func newAwsCredentials() (*credentials.Credentials, error) { - var providers []credentials.Provider - - // Credentials from K8S IAM role. - roleArn := os.Getenv(envAwsRoleArn) - tokenFile := os.Getenv(envAwsWebIdentityTokenFile) - sessionName := os.Getenv(envAwsRoleSessionName) - // Session name is optional. - if roleArn != "" && tokenFile != "" { - session, err := session.NewSession() - if err != nil { - return nil, errs.ErrEncryptionKMS.Wrap(err).GenWithStack( - "fail to create AWS session to create a WebIdentityRoleProvider") - } - webIdentityProvider := stscreds.NewWebIdentityRoleProvider( - sts.New(session), roleArn, sessionName, tokenFile) - providers = append(providers, webIdentityProvider) - } - - providers = append(providers, - // Credentials from AWS environment variables. - &credentials.EnvProvider{}, - // Credentials from default AWS credentials file. - &credentials.SharedCredentialsProvider{ - Filename: "", - Profile: "", - }, - ) - - credentials := credentials.NewChainCredentials(providers) - return credentials, nil -} diff --git a/tests/integrations/client/go.mod b/tests/integrations/client/go.mod index cc220d4bd21d..d2019d25f91b 100644 --- a/tests/integrations/client/go.mod +++ b/tests/integrations/client/go.mod @@ -33,7 +33,19 @@ require ( github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/VividCortex/mysqlerr v1.0.0 // indirect github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502 // indirect - github.com/aws/aws-sdk-go v1.35.3 // indirect + github.com/aws/aws-sdk-go-v2 v1.17.7 // indirect + github.com/aws/aws-sdk-go-v2/config v1.18.19 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.13.18 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 // indirect + github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 // indirect + github.com/aws/smithy-go v1.13.5 // indirect github.com/benbjohnson/clock v1.1.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect @@ -83,7 +95,6 @@ require ( github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/joho/godotenv v1.4.0 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/joomcode/errorx v1.0.1 // indirect diff --git a/tests/integrations/client/go.sum b/tests/integrations/client/go.sum index 7d9967689c48..d6d55c268152 100644 --- a/tests/integrations/client/go.sum +++ b/tests/integrations/client/go.sum @@ -30,8 +30,32 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/aws/aws-sdk-go v1.35.3 h1:r0puXncSaAfRt7Btml2swUo74Kao+vKhO3VLjwDjK54= -github.com/aws/aws-sdk-go v1.35.3/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= +github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= +github.com/aws/aws-sdk-go-v2/config v1.18.19 h1:AqFK6zFNtq4i1EYu+eC7lcKHYnZagMn6SW171la0bGw= +github.com/aws/aws-sdk-go-v2/config v1.18.19/go.mod h1:XvTmGMY8d52ougvakOv1RpiTLPz9dlG/OQHsKU/cMmY= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18 h1:EQMdtHwz0ILTW1hoP+EwuWhwCG1hD6l3+RWFQABET4c= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18/go.mod h1:vnwlwjIe+3XJPBYKu1et30ZPABG3VaXJYr8ryohpIyM= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 h1:gt57MN3liKiyGopcqgNzJb2+d9MJaKT/q1OksHNXVE4= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1/go.mod h1:lfUx8puBRdM5lVVMQlwt2v+ofiG/X6Ms+dy0UkG/kXw= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 h1:sJLYcS+eZn5EeNINGHSCRAwUJMFVqklwkH36Vbyai7M= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31/go.mod h1:QT0BqUvX1Bh2ABdTGnjqEjvjzrCfIniM9Sc8zn9Yndo= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 h1:1mnRASEKnkqsntcxHaysxwgVoUUp5dkiB+l3llKnqyg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25/go.mod h1:zBHOPwhBc3FlQjQJE/D3IfPWiWaQmT06Vq9aNukDo0k= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 h1:p5luUImdIqywn6JpQsW3tq5GNOxKmOnEpybzPx+d1lk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32/go.mod h1:XGhIBZDEgfqmFIugclZ6FU7v75nHhBDtzuB4xB/tEi4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 h1:5LHn8JQ0qvjD9L9JhMtylnkcw7j05GDZqM9Oin6hpr0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25/go.mod h1:/95IA+0lMnzW6XzqYJRpjjsAbKEORVeO0anQqjd2CNU= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 h1:R5f4VOFi3ScTe7TtePyxLqEhNqTJIAxL57MzrXFNs6I= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8/go.mod h1:OtP3pBOgmJM+acQyQcQXtQHets3yJoVuanCx2T5M7v4= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 h1:5V7DWLBd7wTELVz5bPpwzYy/sikk0gsgZfj40X+l5OI= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6/go.mod h1:Y1VOmit/Fn6Tz1uFAeCO6Q7M2fmfXSCLeL5INVYsLuY= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 h1:B8cauxOH1W1v7rd8RdI/MWnoR4Ze0wIHWrb90qczxj4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6/go.mod h1:Lh/bc9XUf8CfOY6Jp5aIkQtN+j1mc+nExc+KXj9jx2s= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 h1:bWNgNdRko2x6gqa0blfATqAZKZokPIeM1vfmQt2pnvM= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7/go.mod h1:JuTnSoeePXmMVe9G8NcjjwgOKEfZ4cOjMuT2IBT/2eI= +github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= +github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -143,7 +167,6 @@ github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -198,6 +221,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -248,9 +272,7 @@ github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/ github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= @@ -601,7 +623,6 @@ golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= diff --git a/tests/integrations/mcs/go.mod b/tests/integrations/mcs/go.mod index e582d69942e8..bfbe31a98788 100644 --- a/tests/integrations/mcs/go.mod +++ b/tests/integrations/mcs/go.mod @@ -32,7 +32,19 @@ require ( github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/VividCortex/mysqlerr v1.0.0 // indirect github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502 // indirect - github.com/aws/aws-sdk-go v1.35.3 // indirect + github.com/aws/aws-sdk-go-v2 v1.17.7 // indirect + github.com/aws/aws-sdk-go-v2/config v1.18.19 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.13.18 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 // indirect + github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 // indirect + github.com/aws/smithy-go v1.13.5 // indirect github.com/benbjohnson/clock v1.1.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect @@ -83,7 +95,6 @@ require ( github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/joho/godotenv v1.4.0 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/joomcode/errorx v1.0.1 // indirect diff --git a/tests/integrations/mcs/go.sum b/tests/integrations/mcs/go.sum index c47e1f082ffe..996cd2f0d7a1 100644 --- a/tests/integrations/mcs/go.sum +++ b/tests/integrations/mcs/go.sum @@ -30,8 +30,32 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/aws/aws-sdk-go v1.35.3 h1:r0puXncSaAfRt7Btml2swUo74Kao+vKhO3VLjwDjK54= -github.com/aws/aws-sdk-go v1.35.3/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= +github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= +github.com/aws/aws-sdk-go-v2/config v1.18.19 h1:AqFK6zFNtq4i1EYu+eC7lcKHYnZagMn6SW171la0bGw= +github.com/aws/aws-sdk-go-v2/config v1.18.19/go.mod h1:XvTmGMY8d52ougvakOv1RpiTLPz9dlG/OQHsKU/cMmY= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18 h1:EQMdtHwz0ILTW1hoP+EwuWhwCG1hD6l3+RWFQABET4c= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18/go.mod h1:vnwlwjIe+3XJPBYKu1et30ZPABG3VaXJYr8ryohpIyM= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 h1:gt57MN3liKiyGopcqgNzJb2+d9MJaKT/q1OksHNXVE4= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1/go.mod h1:lfUx8puBRdM5lVVMQlwt2v+ofiG/X6Ms+dy0UkG/kXw= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 h1:sJLYcS+eZn5EeNINGHSCRAwUJMFVqklwkH36Vbyai7M= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31/go.mod h1:QT0BqUvX1Bh2ABdTGnjqEjvjzrCfIniM9Sc8zn9Yndo= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 h1:1mnRASEKnkqsntcxHaysxwgVoUUp5dkiB+l3llKnqyg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25/go.mod h1:zBHOPwhBc3FlQjQJE/D3IfPWiWaQmT06Vq9aNukDo0k= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 h1:p5luUImdIqywn6JpQsW3tq5GNOxKmOnEpybzPx+d1lk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32/go.mod h1:XGhIBZDEgfqmFIugclZ6FU7v75nHhBDtzuB4xB/tEi4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 h1:5LHn8JQ0qvjD9L9JhMtylnkcw7j05GDZqM9Oin6hpr0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25/go.mod h1:/95IA+0lMnzW6XzqYJRpjjsAbKEORVeO0anQqjd2CNU= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 h1:R5f4VOFi3ScTe7TtePyxLqEhNqTJIAxL57MzrXFNs6I= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8/go.mod h1:OtP3pBOgmJM+acQyQcQXtQHets3yJoVuanCx2T5M7v4= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 h1:5V7DWLBd7wTELVz5bPpwzYy/sikk0gsgZfj40X+l5OI= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6/go.mod h1:Y1VOmit/Fn6Tz1uFAeCO6Q7M2fmfXSCLeL5INVYsLuY= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 h1:B8cauxOH1W1v7rd8RdI/MWnoR4Ze0wIHWrb90qczxj4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6/go.mod h1:Lh/bc9XUf8CfOY6Jp5aIkQtN+j1mc+nExc+KXj9jx2s= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 h1:bWNgNdRko2x6gqa0blfATqAZKZokPIeM1vfmQt2pnvM= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7/go.mod h1:JuTnSoeePXmMVe9G8NcjjwgOKEfZ4cOjMuT2IBT/2eI= +github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= +github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -145,7 +169,6 @@ github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -200,6 +223,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -250,9 +274,7 @@ github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/ github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= @@ -602,7 +624,6 @@ golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= diff --git a/tests/integrations/tso/go.mod b/tests/integrations/tso/go.mod index b35bddd5e5c6..4e831ba39198 100644 --- a/tests/integrations/tso/go.mod +++ b/tests/integrations/tso/go.mod @@ -31,7 +31,19 @@ require ( github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/VividCortex/mysqlerr v1.0.0 // indirect github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502 // indirect - github.com/aws/aws-sdk-go v1.35.3 // indirect + github.com/aws/aws-sdk-go-v2 v1.17.7 // indirect + github.com/aws/aws-sdk-go-v2/config v1.18.19 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.13.18 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 // indirect + github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 // indirect + github.com/aws/smithy-go v1.13.5 // indirect github.com/benbjohnson/clock v1.1.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect @@ -81,7 +93,6 @@ require ( github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/joho/godotenv v1.4.0 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/joomcode/errorx v1.0.1 // indirect diff --git a/tests/integrations/tso/go.sum b/tests/integrations/tso/go.sum index f8b9898a4c6d..5b2f409ebcc7 100644 --- a/tests/integrations/tso/go.sum +++ b/tests/integrations/tso/go.sum @@ -30,8 +30,32 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/aws/aws-sdk-go v1.35.3 h1:r0puXncSaAfRt7Btml2swUo74Kao+vKhO3VLjwDjK54= -github.com/aws/aws-sdk-go v1.35.3/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= +github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= +github.com/aws/aws-sdk-go-v2/config v1.18.19 h1:AqFK6zFNtq4i1EYu+eC7lcKHYnZagMn6SW171la0bGw= +github.com/aws/aws-sdk-go-v2/config v1.18.19/go.mod h1:XvTmGMY8d52ougvakOv1RpiTLPz9dlG/OQHsKU/cMmY= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18 h1:EQMdtHwz0ILTW1hoP+EwuWhwCG1hD6l3+RWFQABET4c= +github.com/aws/aws-sdk-go-v2/credentials v1.13.18/go.mod h1:vnwlwjIe+3XJPBYKu1et30ZPABG3VaXJYr8ryohpIyM= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1 h1:gt57MN3liKiyGopcqgNzJb2+d9MJaKT/q1OksHNXVE4= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.1/go.mod h1:lfUx8puBRdM5lVVMQlwt2v+ofiG/X6Ms+dy0UkG/kXw= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31 h1:sJLYcS+eZn5EeNINGHSCRAwUJMFVqklwkH36Vbyai7M= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.31/go.mod h1:QT0BqUvX1Bh2ABdTGnjqEjvjzrCfIniM9Sc8zn9Yndo= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25 h1:1mnRASEKnkqsntcxHaysxwgVoUUp5dkiB+l3llKnqyg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.25/go.mod h1:zBHOPwhBc3FlQjQJE/D3IfPWiWaQmT06Vq9aNukDo0k= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32 h1:p5luUImdIqywn6JpQsW3tq5GNOxKmOnEpybzPx+d1lk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.32/go.mod h1:XGhIBZDEgfqmFIugclZ6FU7v75nHhBDtzuB4xB/tEi4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25 h1:5LHn8JQ0qvjD9L9JhMtylnkcw7j05GDZqM9Oin6hpr0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.25/go.mod h1:/95IA+0lMnzW6XzqYJRpjjsAbKEORVeO0anQqjd2CNU= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8 h1:R5f4VOFi3ScTe7TtePyxLqEhNqTJIAxL57MzrXFNs6I= +github.com/aws/aws-sdk-go-v2/service/kms v1.20.8/go.mod h1:OtP3pBOgmJM+acQyQcQXtQHets3yJoVuanCx2T5M7v4= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6 h1:5V7DWLBd7wTELVz5bPpwzYy/sikk0gsgZfj40X+l5OI= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.6/go.mod h1:Y1VOmit/Fn6Tz1uFAeCO6Q7M2fmfXSCLeL5INVYsLuY= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6 h1:B8cauxOH1W1v7rd8RdI/MWnoR4Ze0wIHWrb90qczxj4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.6/go.mod h1:Lh/bc9XUf8CfOY6Jp5aIkQtN+j1mc+nExc+KXj9jx2s= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7 h1:bWNgNdRko2x6gqa0blfATqAZKZokPIeM1vfmQt2pnvM= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.7/go.mod h1:JuTnSoeePXmMVe9G8NcjjwgOKEfZ4cOjMuT2IBT/2eI= +github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= +github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -143,7 +167,6 @@ github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -198,6 +221,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -248,9 +272,7 @@ github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/ github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= @@ -601,7 +623,6 @@ golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= From 39558b311065f27e69bf42c35393764ebbf4633d Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 4 Apr 2023 13:08:57 +0800 Subject: [PATCH 06/16] cgroup: fix the path problem due to special container name (#6267) close tikv/pd#6266 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- pkg/cgroup/cgroup.go | 11 ++++- pkg/cgroup/cgroup_mock_test.go | 80 ++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/pkg/cgroup/cgroup.go b/pkg/cgroup/cgroup.go index 316383c6ad50..72e718611820 100644 --- a/pkg/cgroup/cgroup.go +++ b/pkg/cgroup/cgroup.go @@ -174,7 +174,7 @@ func detectControlPath(cgroupFilePath string, controller string) (string, error) var unifiedPathIfFound string for scanner.Scan() { fields := bytes.Split(scanner.Bytes(), []byte{':'}) - if len(fields) != 3 { + if len(fields) < 3 { // The lines should always have three fields, there's something fishy here. continue } @@ -186,7 +186,14 @@ func detectControlPath(cgroupFilePath string, controller string) (string, error) if f0 == "0" && f1 == "" { unifiedPathIfFound = string(fields[2]) } else if f1 == controller { - return string(fields[2]), nil + var result []byte + // In some case, the cgroup path contains `:`. We need to join them back. + if len(fields) > 3 { + result = bytes.Join(fields[2:], []byte(":")) + } else { + result = fields[2] + } + return string(result), nil } } diff --git a/pkg/cgroup/cgroup_mock_test.go b/pkg/cgroup/cgroup_mock_test.go index becedaa9c963..4d0f4c9f5b1c 100644 --- a/pkg/cgroup/cgroup_mock_test.go +++ b/pkg/cgroup/cgroup_mock_test.go @@ -168,6 +168,14 @@ func TestCgroupsGetMemoryInactiveFileUsage(t *testing.T) { }, value: 1363746816, }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithEccentricMemoryController, + "/proc/self/mountinfo": v1MountsWithEccentricMemController, + "/sys/fs/cgroup/memory/memory.stat": v1MemoryStat, + }, + value: 1363746816, + }, { paths: map[string]string{ "/proc/self/cgroup": v2CgroupWithMemoryController, @@ -280,6 +288,14 @@ func TestCgroupsGetMemoryLimit(t *testing.T) { }, limit: 9223372036854775807, }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithEccentricMemoryController, + "/proc/self/mountinfo": v1MountsWithEccentricMemController, + "/sys/fs/cgroup/memory/memory.stat": v1MemoryStat, + }, + limit: 2936016896, + }, } { dir := createFiles(t, tc.paths) limit, err := getCgroupMemLimit(dir) @@ -289,6 +305,70 @@ func TestCgroupsGetMemoryLimit(t *testing.T) { } } +//nolint:gosec +const ( + v1CgroupWithEccentricMemoryController = ` +13:devices:/system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +12:freezer:/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +11:cpu,cpuacct:/system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +10:perf_event:/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +9:rdma:/ +8:pids:/system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +7:blkio:/system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +6:hugetlb:/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +5:memory:/system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +4:cpuset:/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +3:files:/ +2:net_cls,net_prio:/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +1:name=systemd:/system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 +0::/ +` + v1MountsWithEccentricMemController = ` +1421 1021 0:133 / / rw,relatime master:412 - overlay overlay rw,lowerdir=/apps/data/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1285288/fs:/apps/data/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1285287/fs:/apps/data/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1285286/fs:/apps/data/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1285285/fs:/apps/data/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1283928/fs,upperdir=/apps/data/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1287880/fs,workdir=/apps/data/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/1287880/work +1442 1421 0:136 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw +1443 1421 0:137 / /dev rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +1444 1443 0:138 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666 +2303 1443 0:119 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw +2304 1421 0:129 / /sys ro,nosuid,nodev,noexec,relatime - sysfs sysfs ro +2305 2304 0:139 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755 +2306 2305 0:25 /system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/systemd ro,nosuid,nodev,noexec,relatime master:5 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd +2307 2305 0:28 /kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/net_cls,net_prio ro,nosuid,nodev,noexec,relatime master:6 - cgroup cgroup rw,net_cls,net_prio +2308 2305 0:29 / /sys/fs/cgroup/files ro,nosuid,nodev,noexec,relatime master:7 - cgroup cgroup rw,files +2309 2305 0:30 /kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/cpuset ro,nosuid,nodev,noexec,relatime master:8 - cgroup cgroup rw,cpuset +2310 2305 0:31 /system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime master:9 - cgroup cgroup rw,memory +2311 2305 0:32 /kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/hugetlb ro,nosuid,nodev,noexec,relatime master:10 - cgroup cgroup rw,hugetlb +2312 2305 0:33 /system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/blkio ro,nosuid,nodev,noexec,relatime master:11 - cgroup cgroup rw,blkio +2313 2305 0:34 /system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/pids ro,nosuid,nodev,noexec,relatime master:12 - cgroup cgroup rw,pids +2314 2305 0:35 / /sys/fs/cgroup/rdma ro,nosuid,nodev,noexec,relatime master:13 - cgroup cgroup rw,rdma +2315 2305 0:36 /kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/perf_event ro,nosuid,nodev,noexec,relatime master:14 - cgroup cgroup rw,perf_event +2316 2305 0:37 /system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/cpu,cpuacct ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,cpu,cpuacct +2317 2305 0:38 /kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/freezer ro,nosuid,nodev,noexec,relatime master:16 - cgroup cgroup rw,freezer +2318 2305 0:39 /system.slice/containerd.service/kubepods-burstable-pod94598a35_ad1e_4a00_91b1_1db37e8f52f6.slice:cri-containerd:0ac322a00cf64a4d58144a1974b993d91537f3ceec12928b10d881af6be8bbb2 /sys/fs/cgroup/devices ro,nosuid,nodev,noexec,relatime master:17 - cgroup cgroup rw,devices +2319 1421 0:101 / /etc/podinfo ro,relatime - tmpfs tmpfs rw +2320 1421 253:3 /data/containerd/io.containerd.grpc.v1.cri/sandboxes/22c18c845c47667097eb8973fd0ec05256be685cd1b1a8b0fe7c748a04401cdb/hostname /etc/hostname rw,relatime - xfs /dev/mapper/vg1-lvm1k8s rw,attr2,inode64,sunit=512,swidth=512,noquota +2321 1421 253:3 /data/kubelet/pods/94598a35-ad1e-4a00-91b1-1db37e8f52f6/volumes/kubernetes.io~configmap/config /etc/tikv ro,relatime - xfs /dev/mapper/vg1-lvm1k8s rw,attr2,inode64,sunit=512,swidth=512,noquota +2322 1443 0:104 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k +2323 1421 253:3 /data/kubelet/pods/94598a35-ad1e-4a00-91b1-1db37e8f52f6/etc-hosts /etc/hosts rw,relatime - xfs /dev/mapper/vg1-lvm1k8s rw,attr2,inode64,sunit=512,swidth=512,noquota +2324 1443 253:3 /data/kubelet/pods/94598a35-ad1e-4a00-91b1-1db37e8f52f6/containers/tikv/0981845c /dev/termination-log rw,relatime - xfs /dev/mapper/vg1-lvm1k8s rw,attr2,inode64,sunit=512,swidth=512,noquota +2325 1421 253:3 /data/containerd/io.containerd.grpc.v1.cri/sandboxes/22c18c845c47667097eb8973fd0ec05256be685cd1b1a8b0fe7c748a04401cdb/resolv.conf /etc/resolv.conf rw,relatime - xfs /dev/mapper/vg1-lvm1k8s rw,attr2,inode64,sunit=512,swidth=512,noquota +2326 1421 253:2 /pv03 /var/lib/tikv rw,relatime - xfs /dev/mapper/vg2-lvm2k8s rw,attr2,inode64,sunit=512,swidth=512,noquota +2327 1421 253:3 /data/kubelet/pods/94598a35-ad1e-4a00-91b1-1db37e8f52f6/volumes/kubernetes.io~configmap/startup-script /usr/local/bin ro,relatime - xfs /dev/mapper/vg1-lvm1k8s rw,attr2,inode64,sunit=512,swidth=512,noquota +2328 1421 0:102 / /run/secrets/kubernetes.io/serviceaccount ro,relatime - tmpfs tmpfs rw +1022 1442 0:136 /bus /proc/bus ro,nosuid,nodev,noexec,relatime - proc proc rw +1034 1442 0:136 /fs /proc/fs ro,nosuid,nodev,noexec,relatime - proc proc rw +1035 1442 0:136 /irq /proc/irq ro,nosuid,nodev,noexec,relatime - proc proc rw +1036 1442 0:136 /sys /proc/sys ro,nosuid,nodev,noexec,relatime - proc proc rw +1037 1442 0:136 /sysrq-trigger /proc/sysrq-trigger ro,nosuid,nodev,noexec,relatime - proc proc rw +1038 1442 0:161 / /proc/acpi ro,relatime - tmpfs tmpfs ro +1039 1442 0:137 /null /proc/kcore rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +1040 1442 0:137 /null /proc/keys rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +1041 1442 0:137 /null /proc/timer_list rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +1042 1442 0:137 /null /proc/sched_debug rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +1043 1442 0:162 / /proc/scsi ro,relatime - tmpfs tmpfs ro +1044 2304 0:163 / /sys/firmware ro,relatime - tmpfs tmpfs ro +` +) + func TestCgroupsGetCPU(t *testing.T) { for _, tc := range []struct { name string From 19f48a49db3ffeb1f0d7a047992715ae5ec43e1e Mon Sep 17 00:00:00 2001 From: Hu# Date: Tue, 4 Apr 2023 16:52:59 +0800 Subject: [PATCH 07/16] store: ignore the store if the capacity is 0B (#6253) close tikv/pd#6252 ignore the store if the capacity is 0B Signed-off-by: husharp --- pkg/core/store.go | 3 ++- pkg/core/store_test.go | 2 ++ pkg/schedule/checker/checker_controller.go | 2 +- pkg/schedule/filter/filters.go | 4 ++-- pkg/schedule/plan/status.go | 2 +- pkg/statistics/store_collection_test.go | 10 +++++++++- tests/integrations/client/client_test.go | 9 +++++++++ tests/integrations/client/go.mod | 2 +- tests/pdctl/helper.go | 11 ++++++++++- 9 files changed, 37 insertions(+), 8 deletions(-) diff --git a/pkg/core/store.go b/pkg/core/store.go index c1a6410b0bb7..776f4222fe43 100644 --- a/pkg/core/store.go +++ b/pkg/core/store.go @@ -441,7 +441,8 @@ func (s *StoreInfo) IsLowSpace(lowSpaceRatio float64) bool { } // See https://github.com/tikv/pd/issues/3444 and https://github.com/tikv/pd/issues/5391 // TODO: we need find a better way to get the init region number when starting a new cluster. - if s.regionCount < InitClusterRegionThreshold && s.GetAvailable() > initialMinSpace { + // We don't need to consider the store as low space when the capacity is 0. + if s.regionCount < InitClusterRegionThreshold && s.GetAvailable() > initialMinSpace || s.GetCapacity() == 0 { return false } return s.AvailableRatio() < 1-lowSpaceRatio diff --git a/pkg/core/store_test.go b/pkg/core/store_test.go index 33451a22935d..f6f2518c241f 100644 --- a/pkg/core/store_test.go +++ b/pkg/core/store_test.go @@ -141,6 +141,8 @@ func TestLowSpaceRatio(t *testing.T) { re.True(store.IsLowSpace(0.8)) store.rawStats.Available = store.rawStats.Capacity >> 2 re.False(store.IsLowSpace(0.8)) + store.rawStats.Capacity = 0 + re.False(store.IsLowSpace(0.8)) } func TestLowSpaceScoreV2(t *testing.T) { diff --git a/pkg/schedule/checker/checker_controller.go b/pkg/schedule/checker/checker_controller.go index 7d2fda4a8947..9f02ede6ab8f 100644 --- a/pkg/schedule/checker/checker_controller.go +++ b/pkg/schedule/checker/checker_controller.go @@ -72,7 +72,7 @@ func NewController(ctx context.Context, cluster schedule.Cluster, conf config.Co // CheckRegion will check the region and add a new operator if needed. func (c *Controller) CheckRegion(region *core.RegionInfo) []*operator.Operator { - // If PD has restarted, it need to check learners added before and promote them. + // If PD has restarted, it needs to check learners added before and promote them. // Don't check isRaftLearnerEnabled cause it maybe disable learner feature but there are still some learners to promote. opController := c.opController diff --git a/pkg/schedule/filter/filters.go b/pkg/schedule/filter/filters.go index ca8d96a9d32d..c197ba4d2a8a 100644 --- a/pkg/schedule/filter/filters.go +++ b/pkg/schedule/filter/filters.go @@ -132,9 +132,9 @@ type Filter interface { // Scope is used to indicate where the filter will act on. Scope() string Type() filterType - // Return plan.Status to show whether be filtered as source + // Source Return plan.Status to show whether be filtered as source Source(conf config.Config, store *core.StoreInfo) *plan.Status - // Return plan.Status to show whether be filtered as target + // Target Return plan.Status to show whether be filtered as target Target(conf config.Config, store *core.StoreInfo) *plan.Status } diff --git a/pkg/schedule/plan/status.go b/pkg/schedule/plan/status.go index 1170e92b32cb..4242b6314939 100644 --- a/pkg/schedule/plan/status.go +++ b/pkg/schedule/plan/status.go @@ -29,7 +29,7 @@ const ( StatusStoreScoreDisallowed = iota + 100 // StatusStoreAlreadyHasPeer represents the store is excluded due to the existed region peer. StatusStoreAlreadyHasPeer - // StatusNotMatchRule represents the placement rule cannot satisfy the requirement. + // StatusStoreNotMatchRule represents the placement rule cannot satisfy the requirement. StatusStoreNotMatchRule ) diff --git a/pkg/statistics/store_collection_test.go b/pkg/statistics/store_collection_test.go index 878ebf656274..2d701dbf1691 100644 --- a/pkg/statistics/store_collection_test.go +++ b/pkg/statistics/store_collection_test.go @@ -18,7 +18,9 @@ import ( "testing" "time" + "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/mock/mockconfig" @@ -54,6 +56,12 @@ func TestStoreStatistics(t *testing.T) { stores[3] = store3 store4 := stores[4].Clone(core.SetLastHeartbeatTS(stores[4].GetLastHeartbeatTS().Add(-time.Hour))) stores[4] = store4 + store5 := stores[5].Clone(core.SetStoreStats(&pdpb.StoreStats{ + Capacity: 512 * units.MiB, + Available: 100 * units.MiB, + UsedSize: 0, + })) + stores[5] = store5 storeStats := NewStoreStatisticsMap(opt, nil) for _, store := range stores { storeStats.Observe(store, storesStats) @@ -72,7 +80,7 @@ func TestStoreStatistics(t *testing.T) { re.Equal(0, stats.Unhealthy) re.Equal(0, stats.Disconnect) re.Equal(1, stats.Tombstone) - re.Equal(8, stats.LowSpace) + re.Equal(1, stats.LowSpace) re.Equal(2, stats.LabelCounter["zone:z1"]) re.Equal(2, stats.LabelCounter["zone:z2"]) re.Equal(2, stats.LabelCounter["zone:z3"]) diff --git a/tests/integrations/client/client_test.go b/tests/integrations/client/client_test.go index 48f6b8eaeb53..e6cddc7eb1a8 100644 --- a/tests/integrations/client/client_test.go +++ b/tests/integrations/client/client_test.go @@ -28,6 +28,7 @@ import ( "testing" "time" + "github.com/docker/go-units" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/meta_storagepb" "github.com/pingcap/kvproto/pkg/metapb" @@ -786,6 +787,14 @@ func (suite *clientTestSuite) SetupSuite() { LastHeartbeat: now, }, }) + + storeInfo := suite.grpcSvr.GetRaftCluster().GetStore(store.GetId()) + newStore := storeInfo.Clone(core.SetStoreStats(&pdpb.StoreStats{ + Capacity: uint64(10 * units.GiB), + UsedSize: uint64(9 * units.GiB), + Available: uint64(1 * units.GiB), + })) + suite.grpcSvr.GetRaftCluster().GetBasicCluster().PutStore(newStore) } cluster.GetStoreConfig().SetRegionBucketEnabled(true) } diff --git a/tests/integrations/client/go.mod b/tests/integrations/client/go.mod index d2019d25f91b..67d1a6725df9 100644 --- a/tests/integrations/client/go.mod +++ b/tests/integrations/client/go.mod @@ -11,6 +11,7 @@ replace ( replace google.golang.org/grpc v1.51.0 => google.golang.org/grpc v1.26.0 require ( + github.com/docker/go-units v0.4.0 github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 github.com/pingcap/kvproto v0.0.0-20230317010544-b47a4830141f github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 @@ -57,7 +58,6 @@ require ( github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/docker/go-units v0.4.0 // indirect github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 // indirect github.com/elliotchance/pie/v2 v2.1.0 // indirect github.com/fogleman/gg v1.3.0 // indirect diff --git a/tests/pdctl/helper.go b/tests/pdctl/helper.go index 2b958a6d41f0..67729fa5ce28 100644 --- a/tests/pdctl/helper.go +++ b/tests/pdctl/helper.go @@ -20,6 +20,7 @@ import ( "fmt" "sort" + "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/spf13/cobra" @@ -35,7 +36,7 @@ import ( // ExecuteCommand is used for test purpose. func ExecuteCommand(root *cobra.Command, args ...string) (output []byte, err error) { buf := new(bytes.Buffer) - root.SetOutput(buf) + root.SetOut(buf) root.SetArgs(args) err = root.Execute() return buf.Bytes(), err @@ -101,6 +102,14 @@ func MustPutStore(re *require.Assertions, svr *server.Server, store *metapb.Stor Store: store, }) re.NoError(err) + + storeInfo := grpcServer.GetRaftCluster().GetStore(store.GetId()) + newStore := storeInfo.Clone(core.SetStoreStats(&pdpb.StoreStats{ + Capacity: uint64(10 * units.GiB), + UsedSize: uint64(9 * units.GiB), + Available: uint64(1 * units.GiB), + })) + grpcServer.GetRaftCluster().GetBasicCluster().PutStore(newStore) } // MustPutRegion is used for test purpose. From f448b61a5750efeee1272e1ff29f05773cf2a810 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Tue, 4 Apr 2023 17:12:57 +0800 Subject: [PATCH 08/16] config: enable diagnosis by default (#6265) ref tikv/pd#5257 config: enable diagnosis by default Signed-off-by: nolouch Co-authored-by: Ti Chi Robot --- server/api/diagnostic_test.go | 2 +- server/config/config.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/api/diagnostic_test.go b/server/api/diagnostic_test.go index ae23d77a3f91..64e3d589607c 100644 --- a/server/api/diagnostic_test.go +++ b/server/api/diagnostic_test.go @@ -81,7 +81,7 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { suite.NoError(err) suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, cfg)) - suite.False(cfg.Schedule.EnableDiagnostic) + suite.True(cfg.Schedule.EnableDiagnostic) ms := map[string]interface{}{ "enable-diagnostic": "true", diff --git a/server/config/config.go b/server/config/config.go index 070d6163266b..420222c2f230 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -698,7 +698,7 @@ const ( defaultMaxMergeRegionSize = 20 defaultSplitMergeInterval = time.Hour defaultSwitchWitnessInterval = time.Hour - defaultEnableDiagnostic = false + defaultEnableDiagnostic = true defaultPatrolRegionInterval = 10 * time.Millisecond defaultMaxStoreDownTime = 30 * time.Minute defaultLeaderScheduleLimit = 4 From da43376badee1063d4c6ca0ce56a39ffef69aeb7 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 4 Apr 2023 17:52:58 +0800 Subject: [PATCH 09/16] server: fix watch keyspace revision (#6251) ref tikv/pd#5895 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- server/keyspace_service.go | 52 +++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/server/keyspace_service.go b/server/keyspace_service.go index 5255d7258155..64e646119ed7 100644 --- a/server/keyspace_service.go +++ b/server/keyspace_service.go @@ -22,9 +22,12 @@ import ( "github.com/gogo/protobuf/proto" "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/keyspace" "github.com/tikv/pd/pkg/storage/endpoint" "go.etcd.io/etcd/clientv3" + "go.uber.org/zap" ) // KeyspaceServer wraps GrpcServer to provide keyspace service. @@ -82,18 +85,32 @@ func (s *KeyspaceServer) WatchKeyspaces(request *keyspacepb.WatchKeyspacesReques ctx, cancel := context.WithCancel(s.Context()) defer cancel() - err := s.sendAllKeyspaceMeta(ctx, stream) + revision, err := s.sendAllKeyspaceMeta(ctx, stream) if err != nil { return err } - watchChan := s.client.Watch(ctx, path.Join(s.rootPath, endpoint.KeyspaceMetaPrefix()), clientv3.WithPrefix()) + + watcher := clientv3.NewWatcher(s.client) + defer watcher.Close() + for { - select { - case <-ctx.Done(): - return nil - case res := <-watchChan: - keyspaces := make([]*keyspacepb.KeyspaceMeta, 0, len(res.Events)) - for _, event := range res.Events { + rch := watcher.Watch(ctx, path.Join(s.rootPath, endpoint.KeyspaceMetaPrefix()), clientv3.WithPrefix(), clientv3.WithRev(revision)) + for wresp := range rch { + if wresp.CompactRevision != 0 { + log.Warn("required revision has been compacted, use the compact revision", + zap.Int64("required-revision", revision), + zap.Int64("compact-revision", wresp.CompactRevision)) + revision = wresp.CompactRevision + break + } + if wresp.Canceled { + log.Error("watcher is canceled with", + zap.Int64("revision", revision), + errs.ZapError(errs.ErrEtcdWatcherCancel, wresp.Err())) + return wresp.Err() + } + keyspaces := make([]*keyspacepb.KeyspaceMeta, 0, len(wresp.Events)) + for _, event := range wresp.Events { if event.Type != clientv3.EventTypePut { continue } @@ -109,23 +126,34 @@ func (s *KeyspaceServer) WatchKeyspaces(request *keyspacepb.WatchKeyspacesReques } } } + select { + case <-ctx.Done(): + // server closed, return + return nil + default: + } } } -func (s *KeyspaceServer) sendAllKeyspaceMeta(ctx context.Context, stream keyspacepb.Keyspace_WatchKeyspacesServer) error { +func (s *KeyspaceServer) sendAllKeyspaceMeta(ctx context.Context, stream keyspacepb.Keyspace_WatchKeyspacesServer) (int64, error) { getResp, err := s.client.Get(ctx, path.Join(s.rootPath, endpoint.KeyspaceMetaPrefix()), clientv3.WithPrefix()) if err != nil { - return err + return 0, err } metas := make([]*keyspacepb.KeyspaceMeta, getResp.Count) for i, kv := range getResp.Kvs { meta := &keyspacepb.KeyspaceMeta{} if err = proto.Unmarshal(kv.Value, meta); err != nil { - return err + return 0, err } metas[i] = meta } - return stream.Send(&keyspacepb.WatchKeyspacesResponse{Header: s.header(), Keyspaces: metas}) + var revision int64 + if getResp.Header != nil { + // start from the next revision + revision = getResp.Header.GetRevision() + 1 + } + return revision, stream.Send(&keyspacepb.WatchKeyspacesResponse{Header: s.header(), Keyspaces: metas}) } // UpdateKeyspaceState updates the state of keyspace specified in the request. From 33daef0457b99f3a3980a56c5865fba41fe387a8 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 6 Apr 2023 10:02:58 +0700 Subject: [PATCH 10/16] tso, server: refine the TSO allocator manager parameters (#6269) ref tikv/pd#5895 - Refine the TSO allocator manager parameters. - Always run `tsoAllocatorLoop` to advance the Global TSO. Signed-off-by: JmPotato --- pkg/tso/allocator_manager.go | 39 +++++++++++++------------- pkg/tso/keyspace_group_manager.go | 11 ++------ server/server.go | 46 +++++++++++++++++-------------- 3 files changed, 49 insertions(+), 47 deletions(-) diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index 3c93eae8a854..8709f0d76450 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -143,6 +143,16 @@ type ElectionMember interface { PrecheckLeader() error } +// ConfigProvider is used to provide TSO configuration. +type ConfigProvider interface { + IsLocalTSOEnabled() bool + GetLeaderLease() int64 + GetTSOSaveInterval() time.Duration + GetTSOUpdatePhysicalInterval() time.Duration + GetMaxResetTSGap() time.Duration + GetTLSConfig() *grpcutil.TLSConfig +} + // AllocatorManager is used to manage the TSO Allocators a PD server holds. // It is in charge of maintaining TSO allocators' leadership, checking election // priority, and forwarding TSO allocation requests to correct TSO Allocators. @@ -193,17 +203,12 @@ type AllocatorManager struct { // NewAllocatorManager creates a new TSO Allocator Manager. func NewAllocatorManager( ctx context.Context, - startGlobalLeaderLoop bool, keyspaceGroupID uint32, member ElectionMember, rootPath string, storage endpoint.TSOStorage, - enableLocalTSO bool, - saveInterval time.Duration, - updatePhysicalInterval time.Duration, - leaderLease int64, - tlsConfig *grpcutil.TLSConfig, - maxResetTSGap func() time.Duration, + configProvider ConfigProvider, + startGlobalLeaderLoop bool, ) *AllocatorManager { ctx, cancel := context.WithCancel(ctx) am := &AllocatorManager{ @@ -213,12 +218,12 @@ func NewAllocatorManager( member: member, rootPath: rootPath, storage: storage, - enableLocalTSO: enableLocalTSO, - saveInterval: saveInterval, - updatePhysicalInterval: updatePhysicalInterval, - leaderLease: leaderLease, - maxResetTSGap: maxResetTSGap, - securityConfig: tlsConfig, + enableLocalTSO: configProvider.IsLocalTSOEnabled(), + saveInterval: configProvider.GetTSOSaveInterval(), + updatePhysicalInterval: configProvider.GetTSOUpdatePhysicalInterval(), + leaderLease: configProvider.GetLeaderLease(), + maxResetTSGap: configProvider.GetMaxResetTSGap, + securityConfig: configProvider.GetTLSConfig(), } am.mu.allocatorGroups = make(map[string]*allocatorGroup) am.mu.clusterDCLocations = make(map[string]*DCLocationInfo) @@ -226,6 +231,8 @@ func NewAllocatorManager( // Set up the Global TSO Allocator here, it will be initialized once the member campaigns leader successfully. am.SetUpGlobalAllocator(am.ctx, am.member.GetLeadership(), startGlobalLeaderLoop) + am.svcLoopWG.Add(1) + go am.tsoAllocatorLoop() return am } @@ -247,11 +254,6 @@ func (am *AllocatorManager) SetUpGlobalAllocator(ctx context.Context, leadership leadership: leadership, allocator: allocator, } - - if startGlobalLeaderLoop { - am.svcLoopWG.Add(1) - go am.tsoAllocatorLoop() - } } // setUpLocalAllocator is used to set up an allocator, which will initialize the allocator and put it into allocator daemon. @@ -279,7 +281,6 @@ func (am *AllocatorManager) setUpLocalAllocator(parentCtx context.Context, dcLoc } // tsoAllocatorLoop is used to run the TSO Allocator updating daemon. -// tso service starts the loop here, but pd starts its own loop. func (am *AllocatorManager) tsoAllocatorLoop() { defer logutil.LogPanic() defer am.svcLoopWG.Done() diff --git a/pkg/tso/keyspace_group_manager.go b/pkg/tso/keyspace_group_manager.go index ccd5ddce1692..2598da4a6b01 100644 --- a/pkg/tso/keyspace_group_manager.go +++ b/pkg/tso/keyspace_group_manager.go @@ -18,7 +18,6 @@ import ( "context" "fmt" "path" - "time" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" @@ -74,8 +73,7 @@ type KeyspaceGroupManager struct { // Note: The {group} is 5 digits integer with leading zeros. tsoSvcRootPath string // cfg is the TSO config - cfg ServiceConfig - maxResetTSGap func() time.Duration + cfg ServiceConfig } // NewKeyspaceGroupManager creates a new Keyspace Group Manager. @@ -102,7 +100,6 @@ func NewKeyspaceGroupManager( defaultKsgStorageTSRootPath: defaultKsgStorageTSRootPath, tsoSvcRootPath: tsoSvcRootPath, cfg: cfg, - maxResetTSGap: func() time.Duration { return cfg.GetMaxResetTSGap() }, } return ksgMgr @@ -127,11 +124,9 @@ func (kgm *KeyspaceGroupManager) initDefaultKeyspaceGroup() { defaultKsgGroupStorage := endpoint.NewStorageEndpoint(kv.NewEtcdKVBase(kgm.etcdClient, kgm.defaultKsgStorageTSRootPath), nil) kgm.ksgAllocatorManagers[mcsutils.DefaultKeySpaceGroupID] = NewAllocatorManager( - kgm.ctx, true, mcsutils.DefaultKeySpaceGroupID, participant, + kgm.ctx, mcsutils.DefaultKeySpaceGroupID, participant, kgm.defaultKsgStorageTSRootPath, defaultKsgGroupStorage, - kgm.cfg.IsLocalTSOEnabled(), kgm.cfg.GetTSOSaveInterval(), - kgm.cfg.GetTSOUpdatePhysicalInterval(), kgm.cfg.GetLeaderLease(), - kgm.cfg.GetTLSConfig(), kgm.maxResetTSGap) + kgm.cfg, true) } // GetAllocatorManager returns the AllocatorManager of the given keyspace group diff --git a/server/server.go b/server/server.go index 23be7dba8266..b2573032d7ff 100644 --- a/server/server.go +++ b/server/server.go @@ -405,12 +405,7 @@ func (s *Server) startServer(ctx context.Context) error { s.tsoProtoFactory = &tsoutil.TSOProtoFactory{} s.pdProtoFactory = &tsoutil.PDProtoFactory{} if !s.IsAPIServiceMode() { - s.tsoAllocatorManager = tso.NewAllocatorManager( - s.ctx, false, mcs.DefaultKeySpaceGroupID, s.member, s.rootPath, s.storage, s.cfg.IsLocalTSOEnabled(), - s.cfg.GetTSOSaveInterval(), s.cfg.GetTSOUpdatePhysicalInterval(), s.cfg.GetLeaderLease(), s.cfg.GetTLSConfig(), - func() time.Duration { return s.persistOptions.GetMaxResetTSGap() }) - // Set up the Global TSO Allocator here, it will be initialized once the PD campaigns leader successfully. - s.tsoAllocatorManager.SetUpGlobalAllocator(ctx, s.member.GetLeadership(), false) + s.tsoAllocatorManager = tso.NewAllocatorManager(s.ctx, mcs.DefaultKeySpaceGroupID, s.member, s.rootPath, s.storage, s, false) // When disabled the Local TSO, we should clean up the Local TSO Allocator's meta info written in etcd if it exists. if !s.cfg.EnableLocalTSO { if err = s.tsoAllocatorManager.CleanUpDCLocation(); err != nil { @@ -564,9 +559,6 @@ func (s *Server) startServerLoop(ctx context.Context) { if s.IsAPIServiceMode() { // disable tso service in api server s.serverLoopWg.Add(1) go s.watchServicePrimaryAddrLoop(mcs.TSOServiceName) - } else { // enable tso service - s.serverLoopWg.Add(1) - go s.tsoAllocatorLoop() } } @@ -592,17 +584,6 @@ func (s *Server) serverMetricsLoop() { } } -// tsoAllocatorLoop is used to run the TSO Allocator updating daemon. -func (s *Server) tsoAllocatorLoop() { - defer logutil.LogPanic() - defer s.serverLoopWg.Done() - - ctx, cancel := context.WithCancel(s.serverLoopCtx) - defer cancel() - s.tsoAllocatorManager.AllocatorDaemon(ctx) - log.Info("server is closed, exit allocator loop") -} - // encryptionKeyManagerLoop is used to start monitor encryption key changes. func (s *Server) encryptionKeyManagerLoop() { defer logutil.LogPanic() @@ -1813,3 +1794,28 @@ func (s *Server) SetExternalTS(externalTS, globalTS uint64) error { s.GetRaftCluster().SetExternalTS(externalTS) return nil } + +// IsLocalTSOEnabled returns if the local TSO is enabled. +func (s *Server) IsLocalTSOEnabled() bool { + return s.cfg.IsLocalTSOEnabled() +} + +// GetLeaderLease returns the leader lease. +func (s *Server) GetLeaderLease() int64 { + return s.cfg.GetLeaderLease() +} + +// GetTSOSaveInterval returns TSO save interval. +func (s *Server) GetTSOSaveInterval() time.Duration { + return s.cfg.GetTSOSaveInterval() +} + +// GetTSOUpdatePhysicalInterval returns TSO update physical interval. +func (s *Server) GetTSOUpdatePhysicalInterval() time.Duration { + return s.cfg.GetTSOUpdatePhysicalInterval() +} + +// GetMaxResetTSGap gets the max gap to reset the tso. +func (s *Server) GetMaxResetTSGap() time.Duration { + return s.persistOptions.GetMaxResetTSGap() +} From 9aa3e974bad10b35fc03976b1c1581cc31735805 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 6 Apr 2023 10:38:57 +0700 Subject: [PATCH 11/16] tso: unify the TSO ServiceConfig and ConfigProvider interfaces (#6272) ref tikv/pd#5895 Unify the TSO `ServiceConfig` and `ConfigProvider` interfaces. Signed-off-by: JmPotato --- pkg/tso/allocator_manager.go | 24 +++++++----------------- pkg/tso/config.go | 6 ++++++ 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index 8709f0d76450..362ff6763b47 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -143,16 +143,6 @@ type ElectionMember interface { PrecheckLeader() error } -// ConfigProvider is used to provide TSO configuration. -type ConfigProvider interface { - IsLocalTSOEnabled() bool - GetLeaderLease() int64 - GetTSOSaveInterval() time.Duration - GetTSOUpdatePhysicalInterval() time.Duration - GetMaxResetTSGap() time.Duration - GetTLSConfig() *grpcutil.TLSConfig -} - // AllocatorManager is used to manage the TSO Allocators a PD server holds. // It is in charge of maintaining TSO allocators' leadership, checking election // priority, and forwarding TSO allocation requests to correct TSO Allocators. @@ -207,7 +197,7 @@ func NewAllocatorManager( member ElectionMember, rootPath string, storage endpoint.TSOStorage, - configProvider ConfigProvider, + cfg Config, startGlobalLeaderLoop bool, ) *AllocatorManager { ctx, cancel := context.WithCancel(ctx) @@ -218,12 +208,12 @@ func NewAllocatorManager( member: member, rootPath: rootPath, storage: storage, - enableLocalTSO: configProvider.IsLocalTSOEnabled(), - saveInterval: configProvider.GetTSOSaveInterval(), - updatePhysicalInterval: configProvider.GetTSOUpdatePhysicalInterval(), - leaderLease: configProvider.GetLeaderLease(), - maxResetTSGap: configProvider.GetMaxResetTSGap, - securityConfig: configProvider.GetTLSConfig(), + enableLocalTSO: cfg.IsLocalTSOEnabled(), + saveInterval: cfg.GetTSOSaveInterval(), + updatePhysicalInterval: cfg.GetTSOUpdatePhysicalInterval(), + leaderLease: cfg.GetLeaderLease(), + maxResetTSGap: cfg.GetMaxResetTSGap, + securityConfig: cfg.GetTLSConfig(), } am.mu.allocatorGroups = make(map[string]*allocatorGroup) am.mu.clusterDCLocations = make(map[string]*DCLocationInfo) diff --git a/pkg/tso/config.go b/pkg/tso/config.go index dae60715fb95..598f76004b1e 100644 --- a/pkg/tso/config.go +++ b/pkg/tso/config.go @@ -30,6 +30,12 @@ type ServiceConfig interface { GetListenAddr() string // GetAdvertiseListenAddr returns the AdvertiseListenAddr GetAdvertiseListenAddr() string + // TSO-related configuration + Config +} + +// Config is used to provide TSO configuration. +type Config interface { // GetLeaderLease returns the leader lease. GetLeaderLease() int64 // IsLocalTSOEnabled returns if the local TSO is enabled. From 9fcde123fd7c7baddc6d1898dfc07cf2a461f6f7 Mon Sep 17 00:00:00 2001 From: Bin Shi <39923490+binshi-bing@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:08:58 -0700 Subject: [PATCH 12/16] Load initial assignment and dynamically watch/apply keyspace groups' membership/distribution change (#6247) ref tikv/pd#6232 Load initial keyspace group assignment. Dynamically watch/apply keyspace groups' membership/distribution change. Signed-off-by: Bin Shi --- client/client.go | 4 +- client/errs/errno.go | 14 +- errors.toml | 20 + go.mod | 2 +- pkg/errs/errno.go | 32 +- pkg/mcs/tso/server/handler.go | 9 +- pkg/mcs/tso/server/server.go | 43 +- pkg/storage/endpoint/key_path.go | 17 + pkg/storage/endpoint/key_path_test.go | 48 +++ pkg/storage/endpoint/tso_keyspace_group.go | 10 +- pkg/tso/allocator_manager.go | 5 +- pkg/tso/global_allocator.go | 6 +- pkg/tso/keyspace_group_manager.go | 462 ++++++++++++++++++--- pkg/tso/keyspace_group_manager_test.go | 414 ++++++++++++++++-- server/server.go | 2 +- 15 files changed, 974 insertions(+), 114 deletions(-) diff --git a/client/client.go b/client/client.go index 22b037d1ede7..249caa4d1496 100644 --- a/client/client.go +++ b/client/client.go @@ -1167,7 +1167,9 @@ func IsLeaderChange(err error) bool { return true } errMsg := err.Error() - return strings.Contains(errMsg, errs.NotLeaderErr) || strings.Contains(errMsg, errs.MismatchLeaderErr) + return strings.Contains(errMsg, errs.NotLeaderErr) || + strings.Contains(errMsg, errs.MismatchLeaderErr) || + strings.Contains(errMsg, errs.NotServedErr) } func trimHTTPPrefix(str string) string { diff --git a/client/errs/errno.go b/client/errs/errno.go index 43c08396f1ea..9ed860681bed 100644 --- a/client/errs/errno.go +++ b/client/errs/errno.go @@ -21,11 +21,19 @@ import ( ) const ( - // NotLeaderErr indicates the the non-leader member received the requests which should be received by leader. + // NotLeaderErr indicates the non-leader member received the requests which should be received by leader. + // Note: keep the same as the ones defined on the server side, because the client side checks if an error message + // contains this string to judge whether the leader is changed. NotLeaderErr = "is not leader" - // MismatchLeaderErr indicates the the non-leader member received the requests which should be received by leader. + // MismatchLeaderErr indicates the non-leader member received the requests which should be received by leader. + // Note: keep the same as the ones defined on the server side, because the client side checks if an error message + // contains this string to judge whether the leader is changed. MismatchLeaderErr = "mismatch leader id" - RetryTimeoutErr = "retry timeout" + // NotServedErr indicates an tso node/pod received the requests for the keyspace groups which are not served by it. + // Note: keep the same as the ones defined on the server side, because the client side checks if an error message + // contains this string to judge whether the leader is changed. + NotServedErr = "is not served" + RetryTimeoutErr = "retry timeout" ) // client errors diff --git a/errors.toml b/errors.toml index 440015b7bf55..5b9ecd0a3458 100644 --- a/errors.toml +++ b/errors.toml @@ -1,6 +1,16 @@ # AUTOGENERATED BY github.com/pingcap/errors/errdoc-gen # YOU CAN CHANGE THE 'description'/'workaround' FIELDS IF THEM ARE IMPROPER. +["ErrLoadKeyspaceGroupsTerminated"] +error = ''' +load keyspace groups terminated +''' + +["ErrLoadKeyspaceGroupsTimeout"] +error = ''' +load keyspace groups timeout +''' + ["PD:ErrEncryptionKMS"] error = ''' KMS error @@ -731,11 +741,21 @@ error = ''' get allocator failed, %s ''' +["PD:tso:ErrGetAllocatorManager"] +error = ''' +get allocator manager failed, %s +''' + ["PD:tso:ErrGetLocalAllocator"] error = ''' get local allocator failed, %s ''' +["PD:tso:ErrKeyspaceGroupIDInvalid"] +error = ''' +the keyspace group id is invalid, %s +''' + ["PD:tso:ErrLogicOverflow"] error = ''' logic part overflow diff --git a/go.mod b/go.mod index 4c4090694b94..fd7a07c0d534 100644 --- a/go.mod +++ b/go.mod @@ -111,7 +111,7 @@ require ( github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20211122183932-1daafda22083 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/google/uuid v1.3.0 // indirect + github.com/google/uuid v1.3.0 github.com/gorilla/websocket v1.4.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 84793917ca92..1deb285df948 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -17,10 +17,18 @@ package errs import "github.com/pingcap/errors" const ( - // NotLeaderErr indicates the the non-leader member received the requests which should be received by leader. + // NotLeaderErr indicates the non-leader member received the requests which should be received by leader. + // Note: keep the same as the ones defined on the client side, because the client side checks if an error message + // contains this string to judge whether the leader is changed. NotLeaderErr = "is not leader" - // MismatchLeaderErr indicates the the non-leader member received the requests which should be received by leader. + // MismatchLeaderErr indicates the non-leader member received the requests which should be received by leader. + // Note: keep the same as the ones defined on the client side, because the client side checks if an error message + // contains this string to judge whether the leader is changed. MismatchLeaderErr = "mismatch leader id" + // NotServedErr indicates an tso node/pod received the requests for the keyspace groups which are not served by it. + // Note: keep the same as the ones defined on the client side, because the client side checks if an error message + // contains this string to judge whether the leader is changed. + NotServedErr = "is not served" ) // common error in multiple packages @@ -31,14 +39,18 @@ var ( // tso errors var ( - ErrSetLocalTSOConfig = errors.Normalize("set local tso config failed, %s", errors.RFCCodeText("PD:tso:ErrSetLocalTSOConfig")) - ErrGetAllocator = errors.Normalize("get allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetAllocator")) - ErrGetLocalAllocator = errors.Normalize("get local allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetLocalAllocator")) - ErrSyncMaxTS = errors.Normalize("sync max ts failed, %s", errors.RFCCodeText("PD:tso:ErrSyncMaxTS")) - ErrResetUserTimestamp = errors.Normalize("reset user timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrResetUserTimestamp")) - ErrGenerateTimestamp = errors.Normalize("generate timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrGenerateTimestamp")) - ErrLogicOverflow = errors.Normalize("logic part overflow", errors.RFCCodeText("PD:tso:ErrLogicOverflow")) - ErrProxyTSOTimeout = errors.Normalize("proxy tso timeout", errors.RFCCodeText("PD:tso:ErrProxyTSOTimeout")) + ErrSetLocalTSOConfig = errors.Normalize("set local tso config failed, %s", errors.RFCCodeText("PD:tso:ErrSetLocalTSOConfig")) + ErrGetAllocator = errors.Normalize("get allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetAllocator")) + ErrGetLocalAllocator = errors.Normalize("get local allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetLocalAllocator")) + ErrSyncMaxTS = errors.Normalize("sync max ts failed, %s", errors.RFCCodeText("PD:tso:ErrSyncMaxTS")) + ErrResetUserTimestamp = errors.Normalize("reset user timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrResetUserTimestamp")) + ErrGenerateTimestamp = errors.Normalize("generate timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrGenerateTimestamp")) + ErrLogicOverflow = errors.Normalize("logic part overflow", errors.RFCCodeText("PD:tso:ErrLogicOverflow")) + ErrProxyTSOTimeout = errors.Normalize("proxy tso timeout", errors.RFCCodeText("PD:tso:ErrProxyTSOTimeout")) + ErrKeyspaceGroupIDInvalid = errors.Normalize("the keyspace group id is invalid, %s", errors.RFCCodeText("PD:tso:ErrKeyspaceGroupIDInvalid")) + ErrGetAllocatorManager = errors.Normalize("get allocator manager failed, %s", errors.RFCCodeText("PD:tso:ErrGetAllocatorManager")) + ErrLoadKeyspaceGroupsTimeout = errors.Normalize("load keyspace groups timeout", errors.RFCCodeText("ErrLoadKeyspaceGroupsTimeout")) + ErrLoadKeyspaceGroupsTerminated = errors.Normalize("load keyspace groups terminated", errors.RFCCodeText("ErrLoadKeyspaceGroupsTerminated")) ) // member errors diff --git a/pkg/mcs/tso/server/handler.go b/pkg/mcs/tso/server/handler.go index 40ddd2f726b8..14f83bccc613 100644 --- a/pkg/mcs/tso/server/handler.go +++ b/pkg/mcs/tso/server/handler.go @@ -17,6 +17,7 @@ package server import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/tso" "go.uber.org/zap" ) @@ -31,12 +32,18 @@ func newHandler(s *Server) *Handler { } // ResetTS resets the ts with specified tso. +// TODO: Support multiple keyspace groups. func (h *Handler) ResetTS(ts uint64, ignoreSmaller, skipUpperBoundCheck bool) error { log.Info("reset-ts", zap.Uint64("new-ts", ts), zap.Bool("ignore-smaller", ignoreSmaller), zap.Bool("skip-upper-bound-check", skipUpperBoundCheck)) - tsoAllocator, err := h.s.GetTSOAllocatorManager().GetAllocator(tso.GlobalDCLocation) + tsoAllocatorManager, err := h.s.GetTSOAllocatorManager(mcsutils.DefaultKeySpaceGroupID) + if err != nil { + log.Error("failed to get allocator manager", errs.ZapError(err)) + return err + } + tsoAllocator, err := tsoAllocatorManager.GetAllocator(tso.GlobalDCLocation) if err != nil { return err } diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index db8e8a239f2c..642952b16e48 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -111,7 +111,10 @@ type Server struct { // Callback functions for different stages // startCallbacks will be called after the server is started. - startCallbacks []func() + startCallbacks []func() + + // for service registry + serviceID *discovery.ServiceRegistryEntry serviceRegister *discovery.ServiceRegister } @@ -199,14 +202,30 @@ func (s *Server) AddStartCallback(callbacks ...func()) { // IsServing implements basicserver. It returns whether the server is the leader // if there is embedded etcd, or the primary otherwise. +// TODO: support multiple keyspace groups func (s *Server) IsServing() bool { - return atomic.LoadInt64(&s.isRunning) == 1 && s.keyspaceGroupManager.GetElectionMember(mcsutils.DefaultKeySpaceGroupID).IsLeader() + if atomic.LoadInt64(&s.isRunning) == 0 { + return false + } + + member, err := s.keyspaceGroupManager.GetElectionMember(mcsutils.DefaultKeySpaceGroupID) + if err != nil { + log.Error("failed to get election member", errs.ZapError(err)) + return false + } + return member.IsLeader() } // GetLeaderListenUrls gets service endpoints from the leader in election group. // The entry at the index 0 is the primary's service endpoint. func (s *Server) GetLeaderListenUrls() []string { - return s.keyspaceGroupManager.GetElectionMember(mcsutils.DefaultKeySpaceGroupID).GetLeaderListenUrls() + member, err := s.keyspaceGroupManager.GetElectionMember(mcsutils.DefaultKeySpaceGroupID) + if err != nil { + log.Error("failed to get election member", errs.ZapError(err)) + return nil + } + + return member.GetLeaderListenUrls() } // AddServiceReadyCallback implements basicserver. @@ -229,8 +248,8 @@ func (s *Server) IsClosed() bool { } // GetTSOAllocatorManager returns the manager of TSO Allocator. -func (s *Server) GetTSOAllocatorManager() *tso.AllocatorManager { - return s.keyspaceGroupManager.GetAllocatorManager(mcsutils.DefaultKeySpaceGroupID) +func (s *Server) GetTSOAllocatorManager(keyspaceGroupID uint32) (*tso.AllocatorManager, error) { + return s.keyspaceGroupManager.GetAllocatorManager(keyspaceGroupID) } // IsLocalRequest checks if the forwarded host is the current host @@ -416,11 +435,16 @@ func (s *Server) startServer() (err error) { } s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx) - defaultKsgStorageTSRootPath := path.Join(pdRootPath, strconv.FormatUint(s.clusterID, 10)) + legacySvcRootPath := path.Join(pdRootPath, strconv.FormatUint(s.clusterID, 10)) tsoSvcRootPath := fmt.Sprintf(tsoSvcRootPathFormat, s.clusterID) + s.serviceID = &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr} s.keyspaceGroupManager = tso.NewKeyspaceGroupManager( - s.serverLoopCtx, s.etcdClient, s.listenURL.Host, defaultKsgStorageTSRootPath, tsoSvcRootPath, s.cfg) - s.keyspaceGroupManager.Initialize() + s.serverLoopCtx, s.serviceID, s.etcdClient, s.listenURL.Host, legacySvcRootPath, tsoSvcRootPath, s.cfg) + // The param `false` means that we don't initialize the keyspace group manager + // by loading the keyspace group meta from etcd. + if err := s.keyspaceGroupManager.Initialize(false); err != nil { + return err + } s.tsoProtoFactory = &tsoutil.TSOProtoFactory{} s.service = &Service{Server: s} @@ -448,8 +472,7 @@ func (s *Server) startServer() (err error) { } // Server has started. - entry := &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr} - serializedEntry, err := entry.Serialize() + serializedEntry, err := s.serviceID.Serialize() if err != nil { return err } diff --git a/pkg/storage/endpoint/key_path.go b/pkg/storage/endpoint/key_path.go index 1cf258c08f90..f84459fbd7d4 100644 --- a/pkg/storage/endpoint/key_path.go +++ b/pkg/storage/endpoint/key_path.go @@ -17,6 +17,7 @@ package endpoint import ( "fmt" "path" + "regexp" "strconv" "strings" @@ -239,6 +240,22 @@ func KeyspaceGroupIDPath(id uint32) string { return path.Join(tsoKeyspaceGroupPrefix, keyspaceGroupMembershipKey, encodeKeyspaceGroupID(id)) } +// ExtractKeyspaceGroupIDFromPath extracts keyspace group id from the given path, which contains +// the pattern of `tso/keyspace_groups/membership/(\d{5})$`. +func ExtractKeyspaceGroupIDFromPath(path string) (uint32, error) { + pattern := strings.Join([]string{KeyspaceGroupIDPrefix(), `(\d{5})$`}, "/") + re := regexp.MustCompile(pattern) + match := re.FindStringSubmatch(path) + if match == nil { + return 0, fmt.Errorf("invalid keyspace group id path: %s", path) + } + id, err := strconv.ParseUint(match[1], 10, 32) + if err != nil { + return 0, fmt.Errorf("failed to parse keyspace group ID: %v", err) + } + return uint32(id), nil +} + // encodeKeyspaceGroupID from uint32 to string. func encodeKeyspaceGroupID(groupID uint32) string { return fmt.Sprintf("%05d", groupID) diff --git a/pkg/storage/endpoint/key_path_test.go b/pkg/storage/endpoint/key_path_test.go index d6ef584105a4..270d1e266fe8 100644 --- a/pkg/storage/endpoint/key_path_test.go +++ b/pkg/storage/endpoint/key_path_test.go @@ -27,3 +27,51 @@ func BenchmarkRegionPath(b *testing.B) { _ = RegionPath(uint64(i)) } } + +func TestExtractKeyspaceGroupIDFromPath(t *testing.T) { + re := require.New(t) + + rightCases := []struct { + path string + id uint32 + }{ + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/00000", id: 0}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/00001", id: 1}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/12345", id: 12345}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/99999", id: 99999}, + {path: "tso/keyspace_groups/membership/00000", id: 0}, + {path: "tso/keyspace_groups/membership/00001", id: 1}, + {path: "tso/keyspace_groups/membership/12345", id: 12345}, + {path: "tso/keyspace_groups/membership/99999", id: 99999}, + } + + for _, tt := range rightCases { + id, err := ExtractKeyspaceGroupIDFromPath(tt.path) + re.Equal(tt.id, id) + re.NoError(err) + } + + wrongCases := []struct { + path string + }{ + {path: ""}, + {path: "00001"}, + {path: "xxx/keyspace_groups/membership/00001"}, + {path: "tso/xxxxxxxxxxxxxxx/membership/00001"}, + {path: "tso/keyspace_groups/xxxxxxxxxx/00001"}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/xxxxxxxxxx/00001"}, + {path: "/pd/{cluster_id}/xxx/keyspace_groups/membership/00001"}, + {path: "/pd/{cluster_id}/tso/xxxxxxxxxxxxxxx/membership/00001"}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/"}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/0"}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/0001"}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/123456"}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/1234a"}, + {path: "/pd/{cluster_id}/tso/keyspace_groups/membership/12345a"}, + } + + for _, tt := range wrongCases { + _, err := ExtractKeyspaceGroupIDFromPath(tt.path) + re.Error(err) + } +} diff --git a/pkg/storage/endpoint/tso_keyspace_group.go b/pkg/storage/endpoint/tso_keyspace_group.go index 2d65dfee28a5..3e4b5f2235e6 100644 --- a/pkg/storage/endpoint/tso_keyspace_group.go +++ b/pkg/storage/endpoint/tso_keyspace_group.go @@ -22,11 +22,19 @@ import ( "go.etcd.io/etcd/clientv3" ) +// KeyspaceGroupMember defines an election member which campaigns for the primary of the keyspace group. +type KeyspaceGroupMember struct { + Address string `json:"address"` +} + // KeyspaceGroup is the keyspace group. type KeyspaceGroup struct { ID uint32 `json:"id"` UserKind string `json:"user-kind"` - // TODO: add `Members` field + // Members are the election members which campaign for the primary of the keyspace group. + Members []KeyspaceGroupMember `json:"members"` + // Keyspaces are the keyspace IDs which belong to the keyspace group. + Keyspaces []uint32 `json:"keyspaces"` } // KeyspaceGroupStorage is the interface for keyspace group storage. diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index 362ff6763b47..c074e25a6d9d 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -294,8 +294,8 @@ func (am *AllocatorManager) close() { log.Info("closed the allocator manager") } -func (am *AllocatorManager) getMember() *ElectionMember { - return &am.member +func (am *AllocatorManager) getMember() ElectionMember { + return am.member } // SetLocalTSOConfig receives the zone label of this PD server and write it into etcd as dc-location @@ -1072,6 +1072,7 @@ func (am *AllocatorManager) HandleRequest(dcLocation string, count uint32) (pdpb err := errs.ErrGetAllocator.FastGenByArgs(fmt.Sprintf("%s allocator not found, generate timestamp failed", dcLocation)) return pdpb.Timestamp{}, err } + return allocatorGroup.allocator.GenerateTSO(count) } diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 09febeccc3b8..d79a9b2f0b7c 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -193,7 +193,7 @@ func (gta *GlobalTSOAllocator) GenerateTSO(count uint32) (pdpb.Timestamp, error) // Have dc-locations configured in the cluster, use the Global TSO generation way. // (whit synchronization with other Local TSO Allocators) - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel(gta.ctx) defer cancel() for i := 0; i < maxRetryCount; i++ { var ( @@ -237,7 +237,7 @@ func (gta *GlobalTSOAllocator) GenerateTSO(count uint32) (pdpb.Timestamp, error) skipCheck = true goto SETTING_PHASE } - // Is skipCheck is false and globalTSOResp remains the same, it means the estimatedTSO is valide. + // Is skipCheck is false and globalTSOResp remains the same, it means the estimatedTSO is valid. if !skipCheck && tsoutil.CompareTimestamp(&globalTSOResp, estimatedMaxTSO) == 0 { tsoCounter.WithLabelValues("global_tso_estimate", gta.timestampOracle.dcLocation).Inc() } @@ -309,7 +309,7 @@ type syncResp struct { // SyncMaxTS is used to sync MaxTS with all Local TSO Allocator leaders in dcLocationMap. // If maxTSO is the biggest TSO among all Local TSO Allocators, it will be written into -// each allocator and remines the same after the synchronization. +// each allocator and remains the same after the synchronization. // If not, it will be replaced with the new max Local TSO and return. func (gta *GlobalTSOAllocator) SyncMaxTS( ctx context.Context, diff --git a/pkg/tso/keyspace_group_manager.go b/pkg/tso/keyspace_group_manager.go index 2598da4a6b01..e2cfba24658b 100644 --- a/pkg/tso/keyspace_group_manager.go +++ b/pkg/tso/keyspace_group_manager.go @@ -16,53 +16,81 @@ package tso import ( "context" + "encoding/json" "fmt" "path" + "strings" + "sync" + "sync/atomic" + "time" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/discovery" mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/member" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" + "github.com/tikv/pd/pkg/utils/etcdutil" + "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/memberutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" ) -// primaryElectionSuffix is the suffix of the key for keyspace group primary election -const primaryElectionSuffix = "primary" +const ( + // primaryElectionSuffix is the suffix of the key for keyspace group primary election + primaryElectionSuffix = "primary" + // defaultLoadKeyspaceGroupsTimeout is the default timeout for loading the initial + // keyspace group assignment + defaultLoadKeyspaceGroupsTimeout = 30 * time.Second + defaultLoadKeyspaceGroupsBatchSize = int64(400) + loadFromEtcdMaxRetryTimes = 6 + loadFromEtcdRetryInterval = 500 * time.Millisecond + watchKEtcdChangeRetryInterval = 1 * time.Second +) -// KeyspaceGroupManager manages the primary/secondaries of the keyspace groups -// assigned to this host. The primaries provide the tso service for the corresponding +// KeyspaceGroupManager manages the members of the keyspace groups assigned to this host. +// The replicas campaign for the leaders which provide the tso service for the corresponding // keyspace groups. type KeyspaceGroupManager struct { - // ksgAllocatorManagers[i] stores the AllocatorManager of the keyspace group i. + // ams stores the allocator managers of the keyspace groups. Each keyspace group is assigned + // with an allocator manager managing its global/local tso allocators. // Use a fixed size array to maximize the efficiency of concurrent access to // different keyspace groups for tso service. - // TODO: change item type to atomic.Value stored as *AllocatorManager after we - // support online keyspace group assignment. - ksgAllocatorManagers [mcsutils.MaxKeyspaceGroupCountInUse]*AllocatorManager + ams [mcsutils.MaxKeyspaceGroupCountInUse]atomic.Pointer[AllocatorManager] + // ksgs stores the keyspace groups' membership/distribution meta. + ksgs [mcsutils.MaxKeyspaceGroupCountInUse]atomic.Pointer[endpoint.KeyspaceGroup] + + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup - ctx context.Context - cancel context.CancelFunc - etcdClient *clientv3.Client + // tsoServiceID is the service ID of the TSO service, registered in the service discovery + tsoServiceID *discovery.ServiceRegistryEntry + etcdClient *clientv3.Client // electionNamePrefix is the name prefix to generate the unique name of a participant, // which participate in the election of its keyspace group's primary, in the format of // "electionNamePrefix:keyspace-group-id" electionNamePrefix string - // defaultKsgStorageTSRootPath is the root path of the default keyspace group in the - // storage endpoiont which is used for LoadTimestamp/SaveTimestamp. - // This is the legacy root path in the format of "/pd/{cluster_id}". - // Below is the entire path of in the legacy format (used by the default keyspace group) - // Key: /pd/{cluster_id}/timestamp - // Value: ts(time.Time) - // Key: /pd/{cluster_id}/lta/{dc-location}/timestamp - // Value: ts(time.Time) - defaultKsgStorageTSRootPath string - // tsoSvcRootPath defines the root path for all etcd paths used for different purposes. + // legacySvcRootPath defines the legacy root path for all etcd paths which derives from + // the PD/API service. It's in the format of "/pd/{cluster_id}". + // The main paths for different usages include: + // 1. The path, used by the default keyspace group, for LoadTimestamp/SaveTimestamp in the + // storage endpoint. + // Key: /pd/{cluster_id}/timestamp + // Value: ts(time.Time) + // Key: /pd/{cluster_id}/lta/{dc-location}/timestamp + // Value: ts(time.Time) + // 2. The path for storing keyspace group membership/distribution metadata. + // Key: /pd/{cluster_id}/tso/keyspace_groups/membership/{group} + // Value: endpoint.KeyspaceGroup + // Note: The {group} is 5 digits integer with leading zeros. + legacySvcRootPath string + // tsoSvcRootPath defines the root path for all etcd paths used in the tso microservices. // It is in the format of "/ms//tso". - // The main paths for different usages in the tso microservice include: + // The main paths for different usages include: // 1. The path for keyspace group primary election. Format: "/ms/{cluster_id}/tso/{group}/primary" // 2. The path for LoadTimestamp/SaveTimestamp in the storage endpoint for all the non-default // keyspace groups. @@ -72,16 +100,24 @@ type KeyspaceGroupManager struct { // Value: ts(time.Time) // Note: The {group} is 5 digits integer with leading zeros. tsoSvcRootPath string + // legacySvcStorage is storage with legacySvcRootPath. + legacySvcStorage *endpoint.StorageEndpoint + // tsoSvcStorage is storage with tsoSvcRootPath. + tsoSvcStorage *endpoint.StorageEndpoint // cfg is the TSO config cfg ServiceConfig + // loadKeyspaceGroupsTimeout is the timeout for loading the initial keyspace group assignment. + loadKeyspaceGroupsTimeout time.Duration + loadKeyspaceGroupsBatchSize int64 } // NewKeyspaceGroupManager creates a new Keyspace Group Manager. func NewKeyspaceGroupManager( ctx context.Context, + tsoServiceID *discovery.ServiceRegistryEntry, etcdClient *clientv3.Client, electionNamePrefix string, - defaultKsgStorageTSRootPath string, + legacySvcRootPath string, tsoSvcRootPath string, cfg ServiceConfig, ) *KeyspaceGroupManager { @@ -92,60 +128,380 @@ func NewKeyspaceGroupManager( } ctx, cancel := context.WithCancel(ctx) - ksgMgr := &KeyspaceGroupManager{ + kgm := &KeyspaceGroupManager{ ctx: ctx, cancel: cancel, + tsoServiceID: tsoServiceID, etcdClient: etcdClient, electionNamePrefix: electionNamePrefix, - defaultKsgStorageTSRootPath: defaultKsgStorageTSRootPath, + legacySvcRootPath: legacySvcRootPath, tsoSvcRootPath: tsoSvcRootPath, cfg: cfg, + loadKeyspaceGroupsTimeout: defaultLoadKeyspaceGroupsTimeout, + loadKeyspaceGroupsBatchSize: defaultLoadKeyspaceGroupsBatchSize, } - return ksgMgr + kgm.legacySvcStorage = endpoint.NewStorageEndpoint( + kv.NewEtcdKVBase(kgm.etcdClient, kgm.legacySvcRootPath), nil) + kgm.tsoSvcStorage = endpoint.NewStorageEndpoint( + kv.NewEtcdKVBase(kgm.etcdClient, kgm.tsoSvcRootPath), nil) + return kgm } // Initialize this KeyspaceGroupManager -func (kgm *KeyspaceGroupManager) Initialize() { - // TODO: dynamically load keyspace group assignment from the persistent storage and add watch for the assignment change - kgm.initDefaultKeyspaceGroup() +func (kgm *KeyspaceGroupManager) Initialize(loadFromStorage bool) error { + // Initialize the default keyspace group if not loading from storage + if !loadFromStorage { + group := &endpoint.KeyspaceGroup{ + ID: mcsutils.DefaultKeySpaceGroupID, + Members: []endpoint.KeyspaceGroupMember{{Address: kgm.tsoServiceID.ServiceAddr}}, + Keyspaces: []uint32{mcsutils.DefaultKeyspaceID}, + } + kgm.updateKeyspaceGroup(group) + return nil + } + + // Load the initial keyspace group assignment from storage with time limit + done := make(chan struct{}, 1) + ctx, cancel := context.WithCancel(kgm.ctx) + go kgm.checkInitProgress(ctx, cancel, done) + watchStartRevision, err := kgm.initAssignment(ctx) + done <- struct{}{} + if err != nil { + log.Error("failed to initialize keyspace group manager", errs.ZapError(err)) + // We might have partially loaded/initialized the keyspace groups. Close the manager to clean up. + kgm.Close() + return err + } + + // Watch/apply keyspace group membership/distribution meta changes dynamically. + kgm.wg.Add(1) + go kgm.startKeyspaceGroupsMetaWatchLoop(watchStartRevision) + + return nil +} + +// Close this KeyspaceGroupManager +func (kgm *KeyspaceGroupManager) Close() { + log.Info("closing keyspace group manager") + + // Note: don't change the order. We need to cancel all service loops in the keyspace group manager + // before closing all keyspace groups. It's to prevent concurrent addition/removal of keyspace groups + // during critical periods such as service shutdown and online keyspace group, while the former requires + // snapshot isolation to ensure all keyspace groups are properly closed and no new keyspace group is + // added/initialized after that. + kgm.cancel() + kgm.wg.Wait() + kgm.closeKeyspaceGroups() + + log.Info("keyspace group manager closed") +} + +func (kgm *KeyspaceGroupManager) closeKeyspaceGroups() { + log.Info("closing all keyspace groups") + + wg := sync.WaitGroup{} + for i := range kgm.ams { + if am := kgm.ams[i].Load(); am != nil { + wg.Add(1) + go func(am *AllocatorManager) { + defer wg.Done() + am.close() + log.Info("keyspace group closed", zap.Uint32("keyspace-group-id", am.ksgID)) + }(am) + } + } + wg.Wait() + + log.Info("All keyspace groups closed") +} + +func (kgm *KeyspaceGroupManager) checkInitProgress(ctx context.Context, cancel context.CancelFunc, done chan struct{}) { + select { + case <-done: + return + case <-time.After(kgm.loadKeyspaceGroupsTimeout): + log.Error("failed to initialize keyspace group manager", + zap.Any("timeout-setting", kgm.loadKeyspaceGroupsTimeout), + errs.ZapError(errs.ErrLoadKeyspaceGroupsTimeout)) + cancel() + case <-ctx.Done(): + } + <-done } -// Initialize this the default keyspace group -func (kgm *KeyspaceGroupManager) initDefaultKeyspaceGroup() { - uniqueName := fmt.Sprintf("%s-%05d", kgm.electionNamePrefix, mcsutils.DefaultKeySpaceGroupID) - uniqueID := memberutil.GenerateUniqueID(uniqueName) - log.Info("joining primary election", zap.String("participant-name", uniqueName), zap.Uint64("participant-id", uniqueID)) +// initAssignment loads initial keyspace group assignment from storage and initialize the group manager. +func (kgm *KeyspaceGroupManager) initAssignment(ctx context.Context) (int64, error) { + var ( + // The start revision for watching keyspace group membership/distribution change + watchStartRevision int64 + groups []*endpoint.KeyspaceGroup + more bool + err error + keyspaceGroupsLoaded uint32 + revision int64 + ) + + // Load all keyspace groups from etcd and apply the ones assigned to this tso service. + for { + revision, groups, more, err = kgm.loadKeyspaceGroups(ctx, keyspaceGroupsLoaded, kgm.loadKeyspaceGroupsBatchSize) + if err != nil { + return 0, err + } + + keyspaceGroupsLoaded += uint32(len(groups)) + + if watchStartRevision == 0 || revision < watchStartRevision { + watchStartRevision = revision + } + + // Update the keyspace groups + for _, group := range groups { + select { + case <-ctx.Done(): + return watchStartRevision, errs.ErrLoadKeyspaceGroupsTerminated + default: + } - participant := member.NewParticipant(kgm.etcdClient) - participant.InitInfo(uniqueName, uniqueID, path.Join(kgm.tsoSvcRootPath, fmt.Sprintf("%05d", mcsutils.DefaultKeySpaceGroupID)), - primaryElectionSuffix, "keyspace group primary election", kgm.cfg.GetAdvertiseListenAddr()) + kgm.updateKeyspaceGroup(group) + } - defaultKsgGroupStorage := endpoint.NewStorageEndpoint(kv.NewEtcdKVBase(kgm.etcdClient, kgm.defaultKsgStorageTSRootPath), nil) - kgm.ksgAllocatorManagers[mcsutils.DefaultKeySpaceGroupID] = - NewAllocatorManager( - kgm.ctx, mcsutils.DefaultKeySpaceGroupID, participant, - kgm.defaultKsgStorageTSRootPath, defaultKsgGroupStorage, - kgm.cfg, true) + if !more { + break + } + } + + log.Info("loaded keyspace groups", zap.Uint32("keyspace-groups-loaded", keyspaceGroupsLoaded)) + return watchStartRevision, nil +} + +// loadKeyspaceGroups loads keyspace groups from the start ID with limit. +// If limit is 0, it will load all keyspace groups from the start ID. +func (kgm *KeyspaceGroupManager) loadKeyspaceGroups( + ctx context.Context, startID uint32, limit int64, +) (revison int64, ksgs []*endpoint.KeyspaceGroup, more bool, err error) { + rootPath := kgm.legacySvcRootPath + startKey := strings.Join([]string{rootPath, endpoint.KeyspaceGroupIDPath(startID)}, "/") + endKey := strings.Join( + []string{rootPath, clientv3.GetPrefixRangeEnd(endpoint.KeyspaceGroupIDPrefix())}, "/") + opOption := []clientv3.OpOption{clientv3.WithRange(endKey), clientv3.WithLimit(limit)} + + var resp *clientv3.GetResponse + for i := 0; i < loadFromEtcdMaxRetryTimes; i++ { + resp, err = etcdutil.EtcdKVGet(kgm.etcdClient, startKey, opOption...) + if err == nil && resp != nil { + break + } + select { + case <-ctx.Done(): + return 0, []*endpoint.KeyspaceGroup{}, false, errs.ErrLoadKeyspaceGroupsTerminated + case <-time.After(loadFromEtcdRetryInterval): + } + } + + kgs := make([]*endpoint.KeyspaceGroup, 0, len(resp.Kvs)) + for _, item := range resp.Kvs { + kg := &endpoint.KeyspaceGroup{} + if err = json.Unmarshal(item.Value, kg); err != nil { + return 0, nil, false, err + } + kgs = append(kgs, kg) + } + + if resp.Header != nil { + revison = resp.Header.Revision + } + + return revison, kgs, resp.More, nil +} + +// startKeyspaceGroupsMetaWatchLoop Repeatedly watches any change in keyspace group membership/distribution +// and apply the change dynamically. +func (kgm *KeyspaceGroupManager) startKeyspaceGroupsMetaWatchLoop(revision int64) { + defer logutil.LogPanic() + defer kgm.wg.Done() + + // Repeatedly watch/apply keyspace group membership/distribution changes until the context is canceled. + for { + select { + case <-kgm.ctx.Done(): + return + default: + } + + nextRevision, err := kgm.watchKeyspaceGroupsMetaChange(revision) + if err != nil { + log.Error("watcher canceled unexpectedly. Will start a new watcher after a while", + zap.Int64("next-revision", nextRevision), + zap.Time("retry-at", time.Now().Add(watchKEtcdChangeRetryInterval)), + zap.Error(err)) + time.Sleep(watchKEtcdChangeRetryInterval) + } + } +} + +// watchKeyspaceGroupsMetaChange watches any change in keyspace group membership/distribution +// and apply the change dynamically. +func (kgm *KeyspaceGroupManager) watchKeyspaceGroupsMetaChange(revision int64) (int64, error) { + watcher := clientv3.NewWatcher(kgm.etcdClient) + defer watcher.Close() + + ksgPrefix := strings.Join([]string{kgm.legacySvcRootPath, endpoint.KeyspaceGroupIDPrefix()}, "/") + + for { + watchChan := watcher.Watch(kgm.ctx, ksgPrefix, clientv3.WithPrefix(), clientv3.WithRev(revision)) + for wresp := range watchChan { + if wresp.CompactRevision != 0 { + log.Warn("Required revision has been compacted, the watcher will watch again with the compact revision", + zap.Int64("required-revision", revision), + zap.Int64("compact-revision", wresp.CompactRevision)) + revision = wresp.CompactRevision + break + } + if wresp.Err() != nil { + log.Error("watch is canceled or closed", + zap.Int64("required-revision", revision), + errs.ZapError(errs.ErrEtcdWatcherCancel, wresp.Err())) + return revision, wresp.Err() + } + for _, event := range wresp.Events { + id, err := endpoint.ExtractKeyspaceGroupIDFromPath(string(event.Kv.Key)) + if err != nil { + log.Warn("failed to extract keyspace group ID from the key path", + zap.String("key-path", string(event.Kv.Key)), zap.Error(err)) + continue + } + + switch event.Type { + case clientv3.EventTypePut: + group := &endpoint.KeyspaceGroup{} + if err := json.Unmarshal(event.Kv.Value, group); err != nil { + log.Warn("failed to unmarshal keyspace group", + zap.Uint32("keysapce-group-id", id), + zap.Error(errs.ErrJSONUnmarshal.Wrap(err).FastGenWithCause())) + } else { + kgm.updateKeyspaceGroup(group) + } + case clientv3.EventTypeDelete: + kgm.deleteKeyspaceGroup(id) + } + } + revision = wresp.Header.Revision + } + + select { + case <-kgm.ctx.Done(): + return revision, nil + default: + } + } +} + +func (kgm *KeyspaceGroupManager) isAssignedToMe(group *endpoint.KeyspaceGroup) bool { + for _, member := range group.Members { + if member.Address == kgm.tsoServiceID.ServiceAddr { + return true + } + } + return false +} + +// updateKeyspaceGroup applies the given keyspace group. If the keyspace group is just assigned to +// this host/pod, it will join the primary election. +func (kgm *KeyspaceGroupManager) updateKeyspaceGroup(group *endpoint.KeyspaceGroup) { + if group.ID >= uint32(len(kgm.ams)) { + log.Warn("keyspace group ID is out of range, ignore it", + zap.Uint32("keyspace-group-id", group.ID), zap.Int("max-keyspace-group-id", len(kgm.ams)-1)) + return + } + + assignedToMe := kgm.isAssignedToMe(group) + if assignedToMe { + if kgm.ams[group.ID].Load() != nil { + log.Info("keyspace group already initialized, so update meta only", + zap.Uint32("keyspace-group-id", group.ID)) + kgm.ksgs[group.ID].Store(group) + return + } + + uniqueName := fmt.Sprintf("%s-%05d", kgm.electionNamePrefix, group.ID) + uniqueID := memberutil.GenerateUniqueID(uniqueName) + log.Info("joining primary election", + zap.Uint32("keyspace-group-id", group.ID), + zap.String("participant-name", uniqueName), + zap.Uint64("participant-id", uniqueID)) + + participant := member.NewParticipant(kgm.etcdClient) + participant.InitInfo( + uniqueName, uniqueID, path.Join(kgm.tsoSvcRootPath, fmt.Sprintf("%05d", group.ID)), + primaryElectionSuffix, "keyspace group primary election", kgm.cfg.GetAdvertiseListenAddr()) + + // Only the default keyspace group uses the legacy service root path for LoadTimestamp/SyncTimestamp. + var ( + tsRootPath string + storage *endpoint.StorageEndpoint + ) + if group.ID == mcsutils.DefaultKeySpaceGroupID { + tsRootPath = kgm.legacySvcRootPath + storage = kgm.legacySvcStorage + } else { + tsRootPath = kgm.tsoSvcRootPath + storage = kgm.tsoSvcStorage + } + + kgm.ams[group.ID].Store(NewAllocatorManager(kgm.ctx, group.ID, participant, tsRootPath, storage, kgm.cfg, true)) + kgm.ksgs[group.ID].Store(group) + } else { + // Not assigned to me. If this host/pod owns this keyspace group, it should resign. + kgm.deleteKeyspaceGroup(group.ID) + } +} + +// deleteKeyspaceGroup deletes the given keyspace group. +func (kgm *KeyspaceGroupManager) deleteKeyspaceGroup(id uint32) { + kgm.ksgs[id].Store(nil) + am := kgm.ams[id].Swap(nil) + if am == nil { + return + } + am.close() + log.Info("deleted keyspace group", zap.Uint32("keyspace-group-id", id)) } // GetAllocatorManager returns the AllocatorManager of the given keyspace group -func (kgm *KeyspaceGroupManager) GetAllocatorManager(keyspaceGroupID uint32) *AllocatorManager { - return kgm.ksgAllocatorManagers[keyspaceGroupID] +func (kgm *KeyspaceGroupManager) GetAllocatorManager(id uint32) (*AllocatorManager, error) { + if err := kgm.checkKeySpaceGroupID(id); err != nil { + return nil, err + } + if am := kgm.ams[id].Load(); am != nil { + return am, nil + } + return nil, errs.ErrGetAllocatorManager.FastGenByArgs( + fmt.Sprintf("requested keyspace group with id %d %s by this host/pod", id, errs.NotServedErr)) } // GetElectionMember returns the election member of the given keyspace group -func (kgm *KeyspaceGroupManager) GetElectionMember(keyspaceGroupID uint32) ElectionMember { - return *kgm.ksgAllocatorManagers[keyspaceGroupID].getMember() +func (kgm *KeyspaceGroupManager) GetElectionMember(id uint32) (ElectionMember, error) { + am, err := kgm.GetAllocatorManager(id) + if err != nil { + return nil, err + } + return am.getMember(), nil } // HandleTSORequest forwards TSO allocation requests to correct TSO Allocators of the given keyspace group. -func (kgm *KeyspaceGroupManager) HandleTSORequest(keyspaceGroupID uint32, dcLocation string, count uint32) (pdpb.Timestamp, error) { - return kgm.ksgAllocatorManagers[keyspaceGroupID].HandleRequest(dcLocation, count) +func (kgm *KeyspaceGroupManager) HandleTSORequest(id uint32, dcLocation string, count uint32) (pdpb.Timestamp, error) { + am, err := kgm.GetAllocatorManager(id) + if err != nil { + return pdpb.Timestamp{}, err + } + return am.HandleRequest(dcLocation, count) } -// Close this KeyspaceGroupManager -func (kgm *KeyspaceGroupManager) Close() { - kgm.cancel() - kgm.ksgAllocatorManagers[mcsutils.DefaultKeySpaceGroupID].close() +func (kgm *KeyspaceGroupManager) checkKeySpaceGroupID(id uint32) error { + if id < mcsutils.MaxKeyspaceGroupCountInUse { + return nil + } + return errs.ErrKeyspaceGroupIDInvalid.FastGenByArgs( + fmt.Sprintf("invalid keyspace group id %d which shouldn't >= %d", id, mcsutils.MaxKeyspaceGroupCountInUse)) } diff --git a/pkg/tso/keyspace_group_manager_test.go b/pkg/tso/keyspace_group_manager_test.go index c8f7f1a81c2a..bbdd1e2cfd3e 100644 --- a/pkg/tso/keyspace_group_manager_test.go +++ b/pkg/tso/keyspace_group_manager_test.go @@ -16,56 +16,414 @@ package tso import ( "context" + "encoding/json" + "fmt" + "math/rand" "path" + "reflect" + "sort" + "strconv" + "strings" + "sync" "testing" "time" + "github.com/google/uuid" "github.com/stretchr/testify/require" - "github.com/tikv/pd/pkg/mcs/utils" + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/mcs/discovery" + mcsutils "github.com/tikv/pd/pkg/mcs/utils" + "github.com/tikv/pd/pkg/storage/endpoint" + "github.com/tikv/pd/pkg/utils/memberutil" + "github.com/tikv/pd/pkg/utils/testutil" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/mvcc/mvccpb" + "go.uber.org/goleak" ) -func TestNewKeyspaceGroupManager(t *testing.T) { - re := require.New(t) - backendpoints, etcdClient, clean := startEmbeddedEtcd(t) - defer clean() +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m, testutil.LeakOptions...) +} + +type keyspaceGroupManagerTestSuite struct { + suite.Suite + ctx context.Context + cancel context.CancelFunc + backendEndpoints string + etcdClient *clientv3.Client + clean func() + cfg *TestServiceConfig +} + +func TestKeyspaceGroupManagerTestSuite(t *testing.T) { + suite.Run(t, new(keyspaceGroupManagerTestSuite)) +} - cfg := &TestServiceConfig{ +func (suite *keyspaceGroupManagerTestSuite) SetupSuite() { + t := suite.T() + suite.ctx, suite.cancel = context.WithCancel(context.Background()) + suite.backendEndpoints, suite.etcdClient, suite.clean = startEmbeddedEtcd(t) + + suite.cfg = &TestServiceConfig{ Name: "tso-test-name", - BackendEndpoints: backendpoints, + BackendEndpoints: suite.backendEndpoints, ListenAddr: "http://127.0.0.1:3379", AdvertiseListenAddr: "http://127.0.0.1:3379", - LeaderLease: utils.DefaultLeaderLease, + LeaderLease: mcsutils.DefaultLeaderLease, LocalTSOEnabled: false, TSOUpdatePhysicalInterval: 50 * time.Millisecond, - TSOSaveInterval: time.Duration(utils.DefaultLeaderLease) * time.Second, + TSOSaveInterval: time.Duration(mcsutils.DefaultLeaderLease) * time.Second, MaxResetTSGap: time.Hour * 24, TLSConfig: nil, } +} - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - defaultKsgStorageTSRootPath := path.Join("/pd/1") - tsoSvcRootPath := "/ms/1/tso" - electionNamePrefix := "tso-server-1" +func (suite *keyspaceGroupManagerTestSuite) TearDownSuite() { + suite.clean() + suite.cancel() +} - keyspaceGroupManager := NewKeyspaceGroupManager( - ctx, etcdClient, electionNamePrefix, defaultKsgStorageTSRootPath, tsoSvcRootPath, cfg) - keyspaceGroupManager.Initialize() +// TestNewKeyspaceGroupManager tests the initialization of KeyspaceGroupManager. +// It should initialize the allocator manager with the desired configurations and parameters. +func (suite *keyspaceGroupManagerTestSuite) TestNewKeyspaceGroupManager() { + re := suite.Require() - re.Equal(etcdClient, keyspaceGroupManager.etcdClient) - re.Equal(electionNamePrefix, keyspaceGroupManager.electionNamePrefix) - re.Equal(defaultKsgStorageTSRootPath, keyspaceGroupManager.defaultKsgStorageTSRootPath) - re.Equal(tsoSvcRootPath, keyspaceGroupManager.tsoSvcRootPath) - re.Equal(cfg, keyspaceGroupManager.cfg) + tsoServiceID := &discovery.ServiceRegistryEntry{ServiceAddr: suite.cfg.AdvertiseListenAddr} + guid := uuid.New().String() + legacySvcRootPath := path.Join("/pd", guid) + tsoSvcRootPath := path.Join("/ms", guid, "tso") + electionNamePrefix := "tso-server-" + guid - am := keyspaceGroupManager.GetAllocatorManager(utils.DefaultKeySpaceGroupID) + ksgMgr := NewKeyspaceGroupManager( + suite.ctx, tsoServiceID, suite.etcdClient, electionNamePrefix, legacySvcRootPath, tsoSvcRootPath, suite.cfg) + err := ksgMgr.Initialize(false) + re.NoError(err) + + re.Equal(tsoServiceID, ksgMgr.tsoServiceID) + re.Equal(suite.etcdClient, ksgMgr.etcdClient) + re.Equal(electionNamePrefix, ksgMgr.electionNamePrefix) + re.Equal(legacySvcRootPath, ksgMgr.legacySvcRootPath) + re.Equal(tsoSvcRootPath, ksgMgr.tsoSvcRootPath) + re.Equal(suite.cfg, ksgMgr.cfg) + re.Equal(defaultLoadKeyspaceGroupsBatchSize, ksgMgr.loadKeyspaceGroupsBatchSize) + re.Equal(defaultLoadKeyspaceGroupsTimeout, ksgMgr.loadKeyspaceGroupsTimeout) + + am, err := ksgMgr.GetAllocatorManager(mcsutils.DefaultKeySpaceGroupID) + re.NoError(err) re.False(am.enableLocalTSO) - re.Equal(utils.DefaultKeySpaceGroupID, am.ksgID) - re.Equal(utils.DefaultLeaderLease, am.leaderLease) + re.Equal(mcsutils.DefaultKeySpaceGroupID, am.ksgID) + re.Equal(mcsutils.DefaultLeaderLease, am.leaderLease) re.Equal(time.Hour*24, am.maxResetTSGap()) - re.Equal(defaultKsgStorageTSRootPath, am.rootPath) - re.Equal(time.Duration(utils.DefaultLeaderLease)*time.Second, am.saveInterval) + re.Equal(legacySvcRootPath, am.rootPath) + re.Equal(time.Duration(mcsutils.DefaultLeaderLease)*time.Second, am.saveInterval) re.Equal(time.Duration(50)*time.Millisecond, am.updatePhysicalInterval) - keyspaceGroupManager.Close() + ksgMgr.Close() +} + +// TestLoadKeyspaceGroupsAssignment tests the loading of the keyspace group assignment. +func (suite *keyspaceGroupManagerTestSuite) TestLoadKeyspaceGroupsAssignment() { + re := suite.Require() + maxCountInUse := int(mcsutils.MaxKeyspaceGroupCountInUse) + // Test loading of empty keyspace group assignment. + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, 0, 0, 100) + // Test loading of single keyspace group assignment. + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, 1, 0, 100) + // Test loading of multiple keyspace group assignment. + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, 3, 0, 100) + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, maxCountInUse-1, 0, 10) + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, maxCountInUse, 0, 10) + // Test loading of the keyspace group assignment which exceeds the maximum keyspace group count. + // In this case, the manager should only load/serve the first MaxKeyspaceGroupCountInUse keyspace + // groups and ignore the rest. + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, maxCountInUse+1, 0, 10) +} + +// TestLoadWithDifferentBatchSize tests the loading of the keyspace group assignment with the different batch size. +func (suite *keyspaceGroupManagerTestSuite) TestLoadWithDifferentBatchSize() { + re := suite.Require() + + batchSize := int64(17) + maxCount := uint32(1024) + params := []struct { + batchSize int64 + count int + probabilityAssignToMe int // percentage of assigning keyspace groups to this host/pod + }{ + {batchSize: 1, count: 1, probabilityAssignToMe: 100}, + {batchSize: 2, count: int(maxCount / 10), probabilityAssignToMe: 100}, + {batchSize: 7, count: int(maxCount / 10), probabilityAssignToMe: 100}, + {batchSize: batchSize, count: int(batchSize), probabilityAssignToMe: 50}, + {batchSize: int64(maxCount / 13), count: int(maxCount / 13), probabilityAssignToMe: 50}, + {batchSize: int64(maxCount), count: int(maxCount / 13), probabilityAssignToMe: 10}, + } + + for _, param := range params { + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, + param.count-1, param.batchSize, param.probabilityAssignToMe) + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, + param.count, param.batchSize, param.probabilityAssignToMe) + runTestLoadKeyspaceGroupsAssignment(suite.ctx, re, suite.etcdClient, suite.cfg, + param.count+1, param.batchSize, param.probabilityAssignToMe) + } +} + +// TestWatchAndDynamicallyApplyChanges tests the keyspace group manager watch and dynamically apply +// keyspace groups' membership/distribution meta changes. +func (suite *keyspaceGroupManagerTestSuite) TestWatchAndDynamicallyApplyChanges() { + re := suite.Require() + + // Start with the empty keyspace group assignment. + mgr := newUniqueKeyspaceGroupManager(suite.ctx, suite.etcdClient, suite.cfg, 0, 0) + re.NotNil(mgr) + defer mgr.Close() + err := mgr.Initialize(true) + re.NoError(err) + + rootPath := mgr.legacySvcRootPath + svcAddr := mgr.tsoServiceID.ServiceAddr + + // Initialize PUT/DELETE events + events := []*etcdEvent{} + // Assign keyspace group 0 to this host/pod/keyspace-group-manager. + // final result: [0] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 0, []string{svcAddr}) + // Assign keyspace group 1 to this host/pod/keyspace-group-manager. + // final result: [0,1] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 1, []string{"unknown", svcAddr}) + // Assign keyspace group 2 to other host/pod/keyspace-group-manager. + // final result: [0,1] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 2, []string{"unknown"}) + // Assign keyspace group 3 to this host/pod/keyspace-group-manager. + // final result: [0,1,3] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 3, []string{svcAddr}) + // Delete keyspace group 0 + // final result: [1,3] + events = generateKeyspaceGroupEvent(events, mvccpb.DELETE, 0, []string{}) + // Put keyspace group 4 which doesn't belong to anyone. + // final result: [1,3] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 4, []string{}) + // Put keyspace group 5 which doesn't belong to anyone. + // final result: [1,3] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 5, []string{}) + // Assign keyspace group 2 to this host/pod/keyspace-group-manager. + // final result: [1,2,3] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 2, []string{svcAddr}) + // Reassign keyspace group 3 to no one. + // final result: [1,2] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 3, []string{}) + // Reassign keyspace group 4 to this host/pod/keyspace-group-manager. + // final result: [1,2,4] + events = generateKeyspaceGroupEvent(events, mvccpb.PUT, 4, []string{svcAddr}) + + // Eventually, this keyspace groups manager is expected to serve the following keyspace groups. + idsExpected := []int{1, 2, 4} + + // Apply the keyspace group assignment change events to etcd. + for _, event := range events { + switch event.eventType { + case mvccpb.PUT: + err = putKeyspaceGroupToEtcd(suite.ctx, suite.etcdClient, rootPath, event.ksg) + re.NoError(err) + case mvccpb.DELETE: + err = deleteKeyspaceGroupInEtcd(suite.ctx, suite.etcdClient, rootPath, event.ksg.ID) + re.NoError(err) + } + } + + // Verify the keyspace group assignment. + testutil.Eventually(re, func() bool { + idsAssigned := collectAssignedKeyspaceGroupIDs(re, mgr) + return reflect.DeepEqual(idsExpected, idsAssigned) + }) +} + +type etcdEvent struct { + eventType mvccpb.Event_EventType + ksg *endpoint.KeyspaceGroup +} + +func generateKeyspaceGroupEvent( + events []*etcdEvent, eventType mvccpb.Event_EventType, id uint32, addrs []string, +) []*etcdEvent { + members := []endpoint.KeyspaceGroupMember{} + for _, addr := range addrs { + members = append(members, endpoint.KeyspaceGroupMember{Address: addr}) + } + + return append(events, + &etcdEvent{ + eventType: eventType, + ksg: &endpoint.KeyspaceGroup{ + ID: id, + Members: members, + Keyspaces: []uint32{id}, + }, + }, + ) +} + +// runTestLoadMultipleKeyspaceGroupsAssignment tests the loading of multiple keyspace group assignment. +func runTestLoadKeyspaceGroupsAssignment( + ctx context.Context, + re *require.Assertions, + etcdClient *clientv3.Client, + cfg *TestServiceConfig, + numberOfKeypaceGroupsToAdd int, + loadKeyspaceGroupsBatchSize int64, // set to 0 to use the default value + probabilityAssignToMe int, // percentage of assigning keyspace groups to this host/pod +) { + idsExpected := []int{} + mgr := newUniqueKeyspaceGroupManager(ctx, etcdClient, cfg, 0, loadKeyspaceGroupsBatchSize) + re.NotNil(mgr) + defer mgr.Close() + + step := 30 + mux := sync.Mutex{} + wg := sync.WaitGroup{} + for i := 0; i < numberOfKeypaceGroupsToAdd; i += step { + wg.Add(1) + go func(startID int) { + defer wg.Done() + + endID := startID + step + if endID > numberOfKeypaceGroupsToAdd { + endID = numberOfKeypaceGroupsToAdd + } + + randomGen := rand.New(rand.NewSource(time.Now().UnixNano())) + for j := startID; j < endID; j++ { + assignToMe := false + // Assign the keyspace group to this host/pod with the given probability, + // and the keyspace group manager only loads the keyspace groups with id + // less than len(mgr.ams). + if j < len(mgr.ams) && randomGen.Intn(100) < probabilityAssignToMe { + assignToMe = true + mux.Lock() + idsExpected = append(idsExpected, j) + mux.Unlock() + } + addKeyspaceGroupAssignment( + ctx, etcdClient, assignToMe, + mgr.legacySvcRootPath, mgr.tsoServiceID.ServiceAddr, uint32(j)) + } + }(i) + } + wg.Wait() + + err := mgr.Initialize(true) + re.NoError(err) + + // Verify the keyspace group assignment. + sort.Ints(idsExpected) + idsAssigned := collectAssignedKeyspaceGroupIDs(re, mgr) + re.Equal(idsExpected, idsAssigned) +} + +func newUniqueKeyspaceGroupManager( + ctx context.Context, etcdClient *clientv3.Client, cfg *TestServiceConfig, + loadKeyspaceGroupsTimeout time.Duration, // set to 0 to use the default value + loadKeyspaceGroupsBatchSize int64, // set to 0 to use the default value +) *KeyspaceGroupManager { + tsoServiceID := &discovery.ServiceRegistryEntry{ServiceAddr: cfg.AdvertiseListenAddr} + uniqueID := memberutil.GenerateUniqueID(uuid.New().String()) + uniqueStr := strconv.FormatUint(uniqueID, 10) + legacySvcRootPath := path.Join("/pd", uniqueStr) + tsoSvcRootPath := path.Join("/ms", uniqueStr, "tso") + electionNamePrefix := "kgm-test-" + uniqueStr + + keyspaceGroupManager := NewKeyspaceGroupManager( + ctx, tsoServiceID, etcdClient, electionNamePrefix, legacySvcRootPath, tsoSvcRootPath, cfg) + if loadKeyspaceGroupsTimeout != 0 { + keyspaceGroupManager.loadKeyspaceGroupsTimeout = loadKeyspaceGroupsTimeout + } + if loadKeyspaceGroupsBatchSize != 0 { + keyspaceGroupManager.loadKeyspaceGroupsBatchSize = loadKeyspaceGroupsBatchSize + } + return keyspaceGroupManager +} + +// putKeyspaceGroupToEtcd puts a keyspace group to etcd. +func putKeyspaceGroupToEtcd( + ctx context.Context, etcdClient *clientv3.Client, + rootPath string, group *endpoint.KeyspaceGroup, +) error { + key := strings.Join([]string{rootPath, endpoint.KeyspaceGroupIDPath(group.ID)}, "/") + value, err := json.Marshal(group) + if err != nil { + return err + } + + if _, err := etcdClient.Put(ctx, key, string(value)); err != nil { + return err + } + + return nil +} + +// deleteKeyspaceGroupInEtcd deletes a keyspace group in etcd. +func deleteKeyspaceGroupInEtcd( + ctx context.Context, etcdClient *clientv3.Client, + rootPath string, id uint32, +) error { + key := strings.Join([]string{rootPath, endpoint.KeyspaceGroupIDPath(id)}, "/") + + if _, err := etcdClient.Delete(ctx, key); err != nil { + return err + } + + return nil +} + +// addKeyspaceGroupAssignment adds a keyspace group assignment to etcd. +func addKeyspaceGroupAssignment( + ctx context.Context, etcdClient *clientv3.Client, + assignToMe bool, rootPath, svcAddr string, id uint32, +) error { + var location string + if assignToMe { + location = svcAddr + } else { + location = uuid.NewString() + } + group := &endpoint.KeyspaceGroup{ + ID: id, + Members: []endpoint.KeyspaceGroupMember{{Address: location}}, + Keyspaces: []uint32{id}, + } + + key := strings.Join([]string{rootPath, endpoint.KeyspaceGroupIDPath(id)}, "/") + value, err := json.Marshal(group) + if err != nil { + return err + } + + if _, err := etcdClient.Put(ctx, key, string(value)); err != nil { + return err + } + + return nil +} + +func collectAssignedKeyspaceGroupIDs(re *require.Assertions, ksgMgr *KeyspaceGroupManager) []int { + ids := []int{} + for i := 0; i < len(ksgMgr.ksgs); i++ { + ksg := ksgMgr.ksgs[i].Load() + if ksg == nil { + re.Nil(ksgMgr.ams[i].Load(), fmt.Sprintf("ksg is nil but am is not nil for id %d", i)) + } else { + am := ksgMgr.ams[i].Load() + re.NotNil(am, fmt.Sprintf("ksg is not nil but am is nil for id %d", i)) + re.Equal(i, int(am.ksgID)) + re.Equal(i, int(ksg.ID)) + for _, m := range ksg.Members { + if m.Address == ksgMgr.tsoServiceID.ServiceAddr { + ids = append(ids, i) + break + } + } + } + } + + return ids } diff --git a/server/server.go b/server/server.go index b2573032d7ff..b85d8f026e40 100644 --- a/server/server.go +++ b/server/server.go @@ -1299,7 +1299,7 @@ func (s *Server) GetServiceRateLimiter() *ratelimit.Limiter { return s.serviceRateLimiter } -// IsInRateLimitAllowList returns whethis given service label is in allow lost +// IsInRateLimitAllowList returns whether given service label is in allow lost func (s *Server) IsInRateLimitAllowList(serviceLabel string) bool { return s.serviceRateLimiter.IsInAllowList(serviceLabel) } From 69ec7fc04005c3c5982d2445744ce5a5498a3118 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 6 Apr 2023 17:14:58 +0800 Subject: [PATCH 13/16] *: define user kind for keyspace group (#6241) ref tikv/pd#6231 Signed-off-by: Ryan Leung --- pkg/keyspace/keyspace.go | 35 ++++++++++++++++- pkg/keyspace/keyspace_test.go | 14 ++++++- pkg/keyspace/tso_keyspace_group.go | 11 ++++-- pkg/keyspace/tso_keyspace_group_test.go | 21 ++++++---- pkg/storage/endpoint/tso_keyspace_group.go | 38 +++++++++++++++++++ server/server.go | 2 +- tests/server/apiv2/handlers/keyspace_test.go | 4 ++ .../apiv2/handlers/tso_keyspace_group_test.go | 18 ++++----- 8 files changed, 121 insertions(+), 22 deletions(-) diff --git a/pkg/keyspace/keyspace.go b/pkg/keyspace/keyspace.go index 1d6fe227891e..189cba7fcbbe 100644 --- a/pkg/keyspace/keyspace.go +++ b/pkg/keyspace/keyspace.go @@ -46,6 +46,10 @@ const ( regionLabelIDPrefix = "keyspaces/" // regionLabelKey is the key for keyspace id in keyspace region label. regionLabelKey = "id" + // UserKindKey is the key for user kind in keyspace config. + UserKindKey = "user_kind" + // TSOKeyspaceGroupIDKey is the key for tso keyspace group id in keyspace config. + TSOKeyspaceGroupIDKey = "tso_keyspace_group_id" ) // Config is the interface for keyspace config. @@ -68,6 +72,7 @@ type Manager struct { ctx context.Context // config is the configurations of the manager. config Config + kgm *GroupManager } // CreateKeyspaceRequest represents necessary arguments to create a keyspace. @@ -85,6 +90,7 @@ func NewKeyspaceManager(store endpoint.KeyspaceStorage, cluster schedule.Cluster, idAllocator id.Allocator, config Config, + kgm *GroupManager, ) *Manager { return &Manager{ metaLock: syncutil.NewLockGroup(syncutil.WithHash(keyspaceIDHash)), @@ -93,6 +99,7 @@ func NewKeyspaceManager(store endpoint.KeyspaceStorage, cluster: cluster, ctx: context.TODO(), config: config, + kgm: kgm, } } @@ -103,14 +110,22 @@ func (manager *Manager) Bootstrap() error { return err } now := time.Now().Unix() + id, err := manager.kgm.GetAvailableKeyspaceGroupIDByKind(endpoint.Basic) + if err != nil { + return err + } defaultKeyspace := &keyspacepb.KeyspaceMeta{ Id: DefaultKeyspaceID, Name: DefaultKeyspaceName, State: keyspacepb.KeyspaceState_ENABLED, CreatedAt: now, StateChangedAt: now, + Config: map[string]string{ + UserKindKey: endpoint.Basic.String(), + TSOKeyspaceGroupIDKey: id, + }, } - err := manager.saveNewKeyspace(defaultKeyspace) + err = manager.saveNewKeyspace(defaultKeyspace) // It's possible that default keyspace already exists in the storage (e.g. PD restart/recover), // so we ignore the keyspaceExists error. if err != nil && err != ErrKeyspaceExists { @@ -120,9 +135,17 @@ func (manager *Manager) Bootstrap() error { // Initialize pre-alloc keyspace. preAlloc := manager.config.GetPreAlloc() for _, keyspaceName := range preAlloc { + id, err := manager.kgm.GetAvailableKeyspaceGroupIDByKind(endpoint.Basic) + if err != nil { + return err + } _, err = manager.CreateKeyspace(&CreateKeyspaceRequest{ Name: keyspaceName, Now: now, + Config: map[string]string{ + UserKindKey: endpoint.Basic.String(), + TSOKeyspaceGroupIDKey: id, + }, }) // Ignore the keyspaceExists error for the same reason as saving default keyspace. if err != nil && err != ErrKeyspaceExists { @@ -148,6 +171,16 @@ func (manager *Manager) CreateKeyspace(request *CreateKeyspaceRequest) (*keyspac if err != nil { return nil, err } + userKind := endpoint.StringUserKind(request.Config[UserKindKey]) + id, err := manager.kgm.GetAvailableKeyspaceGroupIDByKind(userKind) + if err != nil { + return nil, err + } + if request.Config == nil { + request.Config = make(map[string]string) + } + request.Config[TSOKeyspaceGroupIDKey] = id + request.Config[UserKindKey] = userKind.String() // Create and save keyspace metadata. keyspace := &keyspacepb.KeyspaceMeta{ Id: newID, diff --git a/pkg/keyspace/keyspace_test.go b/pkg/keyspace/keyspace_test.go index 1fc7252bc6a2..de1cd71041c0 100644 --- a/pkg/keyspace/keyspace_test.go +++ b/pkg/keyspace/keyspace_test.go @@ -15,6 +15,7 @@ package keyspace import ( + "context" "fmt" "math" "strconv" @@ -39,6 +40,8 @@ const ( type keyspaceTestSuite struct { suite.Suite + ctx context.Context + cancel context.CancelFunc manager *Manager } @@ -53,12 +56,18 @@ type mockConfig struct { func (m *mockConfig) GetPreAlloc() []string { return m.PreAlloc } func (suite *keyspaceTestSuite) SetupTest() { + suite.ctx, suite.cancel = context.WithCancel(context.Background()) store := endpoint.NewStorageEndpoint(kv.NewMemoryKV(), nil) allocator := mockid.NewIDAllocator() - suite.manager = NewKeyspaceManager(store, nil, allocator, &mockConfig{}) + kgm := NewKeyspaceGroupManager(suite.ctx, store) + suite.manager = NewKeyspaceManager(store, nil, allocator, &mockConfig{}, kgm) suite.NoError(suite.manager.Bootstrap()) } +func (suite *keyspaceTestSuite) TearDownTest() { + suite.cancel() +} + func (suite *keyspaceTestSuite) SetupSuite() { suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion", "return(true)")) } @@ -155,6 +164,9 @@ func (suite *keyspaceTestSuite) TestUpdateKeyspaceConfig() { // Changing config of DEFAULT keyspace is allowed. updated, err := manager.UpdateKeyspaceConfig(DefaultKeyspaceName, mutations) re.NoError(err) + // remove auto filled fields + delete(updated.Config, TSOKeyspaceGroupIDKey) + delete(updated.Config, UserKindKey) checkMutations(re, nil, updated.Config, mutations) } diff --git a/pkg/keyspace/tso_keyspace_group.go b/pkg/keyspace/tso_keyspace_group.go index a2165666aadf..ae3dd7b9c168 100644 --- a/pkg/keyspace/tso_keyspace_group.go +++ b/pkg/keyspace/tso_keyspace_group.go @@ -40,9 +40,8 @@ func NewKeyspaceGroupManager(ctx context.Context, store endpoint.KeyspaceGroupSt // Bootstrap saves default keyspace group info. func (m *GroupManager) Bootstrap() error { defaultKeyspaceGroup := &endpoint.KeyspaceGroup{ - ID: utils.DefaultKeySpaceGroupID, - // TODO: define a user kind type - UserKind: "default", + ID: utils.DefaultKeySpaceGroupID, + UserKind: endpoint.Basic.String(), } err := m.saveKeyspaceGroups([]*endpoint.KeyspaceGroup{defaultKeyspaceGroup}) // It's possible that default keyspace group already exists in the storage (e.g. PD restart/recover), @@ -111,3 +110,9 @@ func (m *GroupManager) saveKeyspaceGroups(keyspaceGroups []*endpoint.KeyspaceGro return nil }) } + +// GetAvailableKeyspaceGroupIDByKind returns the available keyspace group id by user kind. +func (m *GroupManager) GetAvailableKeyspaceGroupIDByKind(userKind endpoint.UserKind) (string, error) { + // TODO: implement it + return "0", nil +} diff --git a/pkg/keyspace/tso_keyspace_group_test.go b/pkg/keyspace/tso_keyspace_group_test.go index f55d3ce80280..0bb3864b94c7 100644 --- a/pkg/keyspace/tso_keyspace_group_test.go +++ b/pkg/keyspace/tso_keyspace_group_test.go @@ -25,6 +25,8 @@ import ( type keyspaceGroupTestSuite struct { suite.Suite + ctx context.Context + cancel context.CancelFunc manager *GroupManager } @@ -33,26 +35,31 @@ func TestKeyspaceGroupTestSuite(t *testing.T) { } func (suite *keyspaceGroupTestSuite) SetupTest() { + suite.ctx, suite.cancel = context.WithCancel(context.Background()) store := endpoint.NewStorageEndpoint(kv.NewMemoryKV(), nil) - suite.manager = NewKeyspaceGroupManager(context.Background(), store) + suite.manager = NewKeyspaceGroupManager(suite.ctx, store) suite.NoError(suite.manager.Bootstrap()) } +func (suite *keyspaceGroupTestSuite) TearDownTest() { + suite.cancel() +} + func (suite *keyspaceGroupTestSuite) TestKeyspaceGroupOperations() { re := suite.Require() keyspaceGroups := []*endpoint.KeyspaceGroup{ { ID: uint32(1), - UserKind: "business", + UserKind: endpoint.Standard.String(), }, { ID: uint32(2), - UserKind: "business", + UserKind: endpoint.Standard.String(), }, { ID: uint32(3), - UserKind: "business", + UserKind: endpoint.Standard.String(), }, } err := suite.manager.CreateKeyspaceGroups(keyspaceGroups) @@ -69,11 +76,11 @@ func (suite *keyspaceGroupTestSuite) TestKeyspaceGroupOperations() { kg, err := suite.manager.GetKeyspaceGroupByID(0) re.NoError(err) re.Equal(uint32(0), kg.ID) - re.Equal("default", kg.UserKind) + re.Equal(endpoint.Basic.String(), kg.UserKind) kg, err = suite.manager.GetKeyspaceGroupByID(3) re.NoError(err) re.Equal(uint32(3), kg.ID) - re.Equal("business", kg.UserKind) + re.Equal(endpoint.Standard.String(), kg.UserKind) // remove the keyspace group 3 err = suite.manager.DeleteKeyspaceGroupByID(3) re.NoError(err) @@ -83,7 +90,7 @@ func (suite *keyspaceGroupTestSuite) TestKeyspaceGroupOperations() { re.Empty(kg) // create an existing keyspace group - keyspaceGroups = []*endpoint.KeyspaceGroup{{ID: uint32(1), UserKind: "business"}} + keyspaceGroups = []*endpoint.KeyspaceGroup{{ID: uint32(1), UserKind: endpoint.Standard.String()}} err = suite.manager.CreateKeyspaceGroups(keyspaceGroups) re.Error(err) } diff --git a/pkg/storage/endpoint/tso_keyspace_group.go b/pkg/storage/endpoint/tso_keyspace_group.go index 3e4b5f2235e6..91268d2c7392 100644 --- a/pkg/storage/endpoint/tso_keyspace_group.go +++ b/pkg/storage/endpoint/tso_keyspace_group.go @@ -22,6 +22,44 @@ import ( "go.etcd.io/etcd/clientv3" ) +// UserKind represents the user kind. +type UserKind int + +// Different user kinds. +const ( + Basic UserKind = iota + Standard + Enterprise + + UserKindCount +) + +// StringUserKind creates a UserKind with string. +func StringUserKind(input string) UserKind { + switch input { + case Basic.String(): + return Basic + case Standard.String(): + return Standard + case Enterprise.String(): + return Enterprise + default: + return Basic + } +} + +func (k UserKind) String() string { + switch k { + case Basic: + return "basic" + case Standard: + return "standard" + case Enterprise: + return "enterprise" + } + return "unknown UserKind" +} + // KeyspaceGroupMember defines an election member which campaigns for the primary of the keyspace group. type KeyspaceGroupMember struct { Address string `json:"address"` diff --git a/server/server.go b/server/server.go index b85d8f026e40..06ad2198288d 100644 --- a/server/server.go +++ b/server/server.go @@ -435,8 +435,8 @@ func (s *Server) startServer(ctx context.Context) error { Member: s.member.MemberValue(), Step: keyspace.AllocStep, }) - s.keyspaceManager = keyspace.NewKeyspaceManager(s.storage, s.cluster, keyspaceIDAllocator, &s.cfg.Keyspace) s.keyspaceGroupManager = keyspace.NewKeyspaceGroupManager(s.ctx, s.storage) + s.keyspaceManager = keyspace.NewKeyspaceManager(s.storage, s.cluster, keyspaceIDAllocator, &s.cfg.Keyspace, s.keyspaceGroupManager) s.hbStreams = hbstream.NewHeartbeatStreams(ctx, s.clusterID, s.cluster) // initial hot_region_storage in here. s.hotRegionStorage, err = storage.NewHotRegionsStorage( diff --git a/tests/server/apiv2/handlers/keyspace_test.go b/tests/server/apiv2/handlers/keyspace_test.go index b9b9742b2dc9..2a3a197c7406 100644 --- a/tests/server/apiv2/handlers/keyspace_test.go +++ b/tests/server/apiv2/handlers/keyspace_test.go @@ -27,6 +27,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/keyspace" + "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server/apiv2/handlers" "github.com/tikv/pd/tests" @@ -218,6 +219,9 @@ func mustCreateKeyspace(re *require.Assertions, server *tests.TestServer, reques re.NoError(err) meta := &handlers.KeyspaceMeta{} re.NoError(json.Unmarshal(data, meta)) + // When creating a keyspace, it will be assigned a keyspace group id. + request.Config[keyspace.TSOKeyspaceGroupIDKey] = "0" + request.Config[keyspace.UserKindKey] = endpoint.Basic.String() checkCreateRequest(re, request, meta.KeyspaceMeta) return meta.KeyspaceMeta } diff --git a/tests/server/apiv2/handlers/tso_keyspace_group_test.go b/tests/server/apiv2/handlers/tso_keyspace_group_test.go index cee3c2f69f54..c800af324df3 100644 --- a/tests/server/apiv2/handlers/tso_keyspace_group_test.go +++ b/tests/server/apiv2/handlers/tso_keyspace_group_test.go @@ -33,7 +33,8 @@ const keyspaceGroupsPrefix = "/pd/api/v2/tso/keyspace-groups" type keyspaceGroupTestSuite struct { suite.Suite - cleanup func() + ctx context.Context + cancel context.CancelFunc cluster *tests.TestCluster server *tests.TestServer } @@ -43,9 +44,8 @@ func TestKeyspaceGroupTestSuite(t *testing.T) { } func (suite *keyspaceGroupTestSuite) SetupTest() { - ctx, cancel := context.WithCancel(context.Background()) - suite.cleanup = cancel - cluster, err := tests.NewTestCluster(ctx, 1) + suite.ctx, suite.cancel = context.WithCancel(context.Background()) + cluster, err := tests.NewTestCluster(suite.ctx, 1) suite.cluster = cluster suite.NoError(err) suite.NoError(cluster.RunInitialServers()) @@ -55,7 +55,7 @@ func (suite *keyspaceGroupTestSuite) SetupTest() { } func (suite *keyspaceGroupTestSuite) TearDownTest() { - suite.cleanup() + suite.cancel() suite.cluster.Destroy() } @@ -64,11 +64,11 @@ func (suite *keyspaceGroupTestSuite) TestCreateKeyspaceGroups() { kgs := &handlers.CreateKeyspaceGroupParams{KeyspaceGroups: []*endpoint.KeyspaceGroup{ { ID: uint32(1), - UserKind: "business", + UserKind: endpoint.Standard.String(), }, { ID: uint32(2), - UserKind: "business", + UserKind: endpoint.Standard.String(), }, }} @@ -80,11 +80,11 @@ func (suite *keyspaceGroupTestSuite) TestLoadKeyspaceGroup() { kgs := &handlers.CreateKeyspaceGroupParams{KeyspaceGroups: []*endpoint.KeyspaceGroup{ { ID: uint32(1), - UserKind: "business", + UserKind: endpoint.Standard.String(), }, { ID: uint32(2), - UserKind: "business", + UserKind: endpoint.Standard.String(), }, }} From 6bac6b7a68035b7744eeccb8c116ee7b2b5aa1fd Mon Sep 17 00:00:00 2001 From: Bin Shi <39923490+binshi-bing@users.noreply.github.com> Date: Thu, 6 Apr 2023 20:20:58 -0700 Subject: [PATCH 14/16] Add more failure tests when tso service loading initial keyspace groups assignment (#6280) ref tikv/pd#6232 Add more failure tests when tso service loading initial keyspace groups assignment Signed-off-by: Bin Shi --- errors.toml | 5 ++ pkg/errs/errno.go | 25 ++++---- pkg/tso/keyspace_group_manager.go | 36 +++++++++-- pkg/tso/keyspace_group_manager_test.go | 82 +++++++++++++++++++++++--- 4 files changed, 124 insertions(+), 24 deletions(-) diff --git a/errors.toml b/errors.toml index 5b9ecd0a3458..1133a8bf12ac 100644 --- a/errors.toml +++ b/errors.toml @@ -1,6 +1,11 @@ # AUTOGENERATED BY github.com/pingcap/errors/errdoc-gen # YOU CAN CHANGE THE 'description'/'workaround' FIELDS IF THEM ARE IMPROPER. +["ErrLoadKeyspaceGroupsRetryExhaustd"] +error = ''' +load keyspace groups retry exhausted, %s +''' + ["ErrLoadKeyspaceGroupsTerminated"] error = ''' load keyspace groups terminated diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 1deb285df948..d14275bb5e30 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -39,18 +39,19 @@ var ( // tso errors var ( - ErrSetLocalTSOConfig = errors.Normalize("set local tso config failed, %s", errors.RFCCodeText("PD:tso:ErrSetLocalTSOConfig")) - ErrGetAllocator = errors.Normalize("get allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetAllocator")) - ErrGetLocalAllocator = errors.Normalize("get local allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetLocalAllocator")) - ErrSyncMaxTS = errors.Normalize("sync max ts failed, %s", errors.RFCCodeText("PD:tso:ErrSyncMaxTS")) - ErrResetUserTimestamp = errors.Normalize("reset user timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrResetUserTimestamp")) - ErrGenerateTimestamp = errors.Normalize("generate timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrGenerateTimestamp")) - ErrLogicOverflow = errors.Normalize("logic part overflow", errors.RFCCodeText("PD:tso:ErrLogicOverflow")) - ErrProxyTSOTimeout = errors.Normalize("proxy tso timeout", errors.RFCCodeText("PD:tso:ErrProxyTSOTimeout")) - ErrKeyspaceGroupIDInvalid = errors.Normalize("the keyspace group id is invalid, %s", errors.RFCCodeText("PD:tso:ErrKeyspaceGroupIDInvalid")) - ErrGetAllocatorManager = errors.Normalize("get allocator manager failed, %s", errors.RFCCodeText("PD:tso:ErrGetAllocatorManager")) - ErrLoadKeyspaceGroupsTimeout = errors.Normalize("load keyspace groups timeout", errors.RFCCodeText("ErrLoadKeyspaceGroupsTimeout")) - ErrLoadKeyspaceGroupsTerminated = errors.Normalize("load keyspace groups terminated", errors.RFCCodeText("ErrLoadKeyspaceGroupsTerminated")) + ErrSetLocalTSOConfig = errors.Normalize("set local tso config failed, %s", errors.RFCCodeText("PD:tso:ErrSetLocalTSOConfig")) + ErrGetAllocator = errors.Normalize("get allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetAllocator")) + ErrGetLocalAllocator = errors.Normalize("get local allocator failed, %s", errors.RFCCodeText("PD:tso:ErrGetLocalAllocator")) + ErrSyncMaxTS = errors.Normalize("sync max ts failed, %s", errors.RFCCodeText("PD:tso:ErrSyncMaxTS")) + ErrResetUserTimestamp = errors.Normalize("reset user timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrResetUserTimestamp")) + ErrGenerateTimestamp = errors.Normalize("generate timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrGenerateTimestamp")) + ErrLogicOverflow = errors.Normalize("logic part overflow", errors.RFCCodeText("PD:tso:ErrLogicOverflow")) + ErrProxyTSOTimeout = errors.Normalize("proxy tso timeout", errors.RFCCodeText("PD:tso:ErrProxyTSOTimeout")) + ErrKeyspaceGroupIDInvalid = errors.Normalize("the keyspace group id is invalid, %s", errors.RFCCodeText("PD:tso:ErrKeyspaceGroupIDInvalid")) + ErrGetAllocatorManager = errors.Normalize("get allocator manager failed, %s", errors.RFCCodeText("PD:tso:ErrGetAllocatorManager")) + ErrLoadKeyspaceGroupsTimeout = errors.Normalize("load keyspace groups timeout", errors.RFCCodeText("ErrLoadKeyspaceGroupsTimeout")) + ErrLoadKeyspaceGroupsTerminated = errors.Normalize("load keyspace groups terminated", errors.RFCCodeText("ErrLoadKeyspaceGroupsTerminated")) + ErrLoadKeyspaceGroupsRetryExhaustd = errors.Normalize("load keyspace groups retry exhausted, %s", errors.RFCCodeText("ErrLoadKeyspaceGroupsRetryExhaustd")) ) // member errors diff --git a/pkg/tso/keyspace_group_manager.go b/pkg/tso/keyspace_group_manager.go index e2cfba24658b..2f97299fede9 100644 --- a/pkg/tso/keyspace_group_manager.go +++ b/pkg/tso/keyspace_group_manager.go @@ -17,6 +17,7 @@ package tso import ( "context" "encoding/json" + "errors" "fmt" "path" "strings" @@ -24,6 +25,7 @@ import ( "sync/atomic" "time" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" @@ -46,8 +48,8 @@ const ( // keyspace group assignment defaultLoadKeyspaceGroupsTimeout = 30 * time.Second defaultLoadKeyspaceGroupsBatchSize = int64(400) - loadFromEtcdMaxRetryTimes = 6 - loadFromEtcdRetryInterval = 500 * time.Millisecond + defaultLoadFromEtcdRetryInterval = 500 * time.Millisecond + defaultLoadFromEtcdMaxRetryTimes = int(defaultLoadKeyspaceGroupsTimeout / defaultLoadFromEtcdRetryInterval) watchKEtcdChangeRetryInterval = 1 * time.Second ) @@ -109,6 +111,7 @@ type KeyspaceGroupManager struct { // loadKeyspaceGroupsTimeout is the timeout for loading the initial keyspace group assignment. loadKeyspaceGroupsTimeout time.Duration loadKeyspaceGroupsBatchSize int64 + loadFromEtcdMaxRetryTimes int } // NewKeyspaceGroupManager creates a new Keyspace Group Manager. @@ -139,6 +142,7 @@ func NewKeyspaceGroupManager( cfg: cfg, loadKeyspaceGroupsTimeout: defaultLoadKeyspaceGroupsTimeout, loadKeyspaceGroupsBatchSize: defaultLoadKeyspaceGroupsBatchSize, + loadFromEtcdMaxRetryTimes: defaultLoadFromEtcdMaxRetryTimes, } kgm.legacySvcStorage = endpoint.NewStorageEndpoint( @@ -286,19 +290,41 @@ func (kgm *KeyspaceGroupManager) loadKeyspaceGroups( []string{rootPath, clientv3.GetPrefixRangeEnd(endpoint.KeyspaceGroupIDPrefix())}, "/") opOption := []clientv3.OpOption{clientv3.WithRange(endKey), clientv3.WithLimit(limit)} - var resp *clientv3.GetResponse - for i := 0; i < loadFromEtcdMaxRetryTimes; i++ { + var ( + i int + resp *clientv3.GetResponse + ) + for ; i < kgm.loadFromEtcdMaxRetryTimes; i++ { resp, err = etcdutil.EtcdKVGet(kgm.etcdClient, startKey, opOption...) + + failpoint.Inject("delayLoadKeyspaceGroups", func(val failpoint.Value) { + if sleepIntervalSeconds, ok := val.(int); ok && sleepIntervalSeconds > 0 { + time.Sleep(time.Duration(sleepIntervalSeconds) * time.Second) + } + }) + + failpoint.Inject("loadKeyspaceGroupsTemporaryFail", func(val failpoint.Value) { + if maxFailTimes, ok := val.(int); ok && i < maxFailTimes { + err = errors.New("fail to read from etcd") + failpoint.Continue() + } + }) + if err == nil && resp != nil { break } + select { case <-ctx.Done(): return 0, []*endpoint.KeyspaceGroup{}, false, errs.ErrLoadKeyspaceGroupsTerminated - case <-time.After(loadFromEtcdRetryInterval): + case <-time.After(defaultLoadFromEtcdRetryInterval): } } + if i == kgm.loadFromEtcdMaxRetryTimes { + return 0, []*endpoint.KeyspaceGroup{}, false, errs.ErrLoadKeyspaceGroupsRetryExhaustd.FastGenByArgs(err) + } + kgs := make([]*endpoint.KeyspaceGroup, 0, len(resp.Kvs)) for _, item := range resp.Kvs { kg := &endpoint.KeyspaceGroup{} diff --git a/pkg/tso/keyspace_group_manager_test.go b/pkg/tso/keyspace_group_manager_test.go index bbdd1e2cfd3e..64f1a4622933 100644 --- a/pkg/tso/keyspace_group_manager_test.go +++ b/pkg/tso/keyspace_group_manager_test.go @@ -29,8 +29,10 @@ import ( "time" "github.com/google/uuid" + "github.com/pingcap/failpoint" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mcs/discovery" mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/storage/endpoint" @@ -168,13 +170,80 @@ func (suite *keyspaceGroupManagerTestSuite) TestLoadWithDifferentBatchSize() { } } +// TestLoadKeyspaceGroupsTimeout tests there is timeout when loading the initial keyspace group assignment +// from etcd. The initialization of the keyspace group manager should fail. +func (suite *keyspaceGroupManagerTestSuite) TestLoadKeyspaceGroupsTimeout() { + re := suite.Require() + + mgr := newUniqueKeyspaceGroupManager(suite.ctx, suite.etcdClient, suite.cfg, 1) + re.NotNil(mgr) + defer mgr.Close() + + addKeyspaceGroupAssignment( + suite.ctx, suite.etcdClient, true, + mgr.legacySvcRootPath, mgr.tsoServiceID.ServiceAddr, uint32(0)) + + // Set the timeout to 1 second and inject the delayLoadKeyspaceGroups to return 3 seconds to let + // the loading sleep 3 seconds. + mgr.loadKeyspaceGroupsTimeout = time.Second + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/tso/delayLoadKeyspaceGroups", "return(3)")) + err := mgr.Initialize(true) + // If loading keyspace groups timeout, the initialization should fail with ErrLoadKeyspaceGroupsTerminated. + re.Equal(errs.ErrLoadKeyspaceGroupsTerminated, err) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/delayLoadKeyspaceGroups")) +} + +// TestLoadKeyspaceGroupsSucceedWithTempFailures tests the initialization should succeed when there are temporary +// failures during loading the initial keyspace group assignment from etcd. +func (suite *keyspaceGroupManagerTestSuite) TestLoadKeyspaceGroupsSucceedWithTempFailures() { + re := suite.Require() + + mgr := newUniqueKeyspaceGroupManager(suite.ctx, suite.etcdClient, suite.cfg, 1) + re.NotNil(mgr) + defer mgr.Close() + + addKeyspaceGroupAssignment( + suite.ctx, suite.etcdClient, true, + mgr.legacySvcRootPath, mgr.tsoServiceID.ServiceAddr, uint32(0)) + + // Set the max retry times to 3 and inject the loadKeyspaceGroupsTemporaryFail to return 2 to let + // loading from etcd fail 2 times but the whole initialization still succeeds. + mgr.loadFromEtcdMaxRetryTimes = 3 + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/tso/loadKeyspaceGroupsTemporaryFail", "return(2)")) + err := mgr.Initialize(true) + re.NoError(err) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/loadKeyspaceGroupsTemporaryFail")) +} + +// TestLoadKeyspaceGroupsFailed tests the initialization should fail when there are too many failures +// during loading the initial keyspace group assignment from etcd. +func (suite *keyspaceGroupManagerTestSuite) TestLoadKeyspaceGroupsFailed() { + re := suite.Require() + + mgr := newUniqueKeyspaceGroupManager(suite.ctx, suite.etcdClient, suite.cfg, 1) + re.NotNil(mgr) + defer mgr.Close() + + addKeyspaceGroupAssignment( + suite.ctx, suite.etcdClient, true, + mgr.legacySvcRootPath, mgr.tsoServiceID.ServiceAddr, uint32(0)) + + // Set the max retry times to 3 and inject the loadKeyspaceGroupsTemporaryFail to return 3 to let + // loading from etcd fail 3 times which should cause the whole initialization to fail. + mgr.loadFromEtcdMaxRetryTimes = 3 + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/tso/loadKeyspaceGroupsTemporaryFail", "return(3)")) + err := mgr.Initialize(true) + re.Error(err) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/loadKeyspaceGroupsTemporaryFail")) +} + // TestWatchAndDynamicallyApplyChanges tests the keyspace group manager watch and dynamically apply // keyspace groups' membership/distribution meta changes. func (suite *keyspaceGroupManagerTestSuite) TestWatchAndDynamicallyApplyChanges() { re := suite.Require() // Start with the empty keyspace group assignment. - mgr := newUniqueKeyspaceGroupManager(suite.ctx, suite.etcdClient, suite.cfg, 0, 0) + mgr := newUniqueKeyspaceGroupManager(suite.ctx, suite.etcdClient, suite.cfg, 0) re.NotNil(mgr) defer mgr.Close() err := mgr.Initialize(true) @@ -274,7 +343,7 @@ func runTestLoadKeyspaceGroupsAssignment( probabilityAssignToMe int, // percentage of assigning keyspace groups to this host/pod ) { idsExpected := []int{} - mgr := newUniqueKeyspaceGroupManager(ctx, etcdClient, cfg, 0, loadKeyspaceGroupsBatchSize) + mgr := newUniqueKeyspaceGroupManager(ctx, etcdClient, cfg, loadKeyspaceGroupsBatchSize) re.NotNil(mgr) defer mgr.Close() @@ -321,8 +390,9 @@ func runTestLoadKeyspaceGroupsAssignment( } func newUniqueKeyspaceGroupManager( - ctx context.Context, etcdClient *clientv3.Client, cfg *TestServiceConfig, - loadKeyspaceGroupsTimeout time.Duration, // set to 0 to use the default value + ctx context.Context, + etcdClient *clientv3.Client, + cfg *TestServiceConfig, loadKeyspaceGroupsBatchSize int64, // set to 0 to use the default value ) *KeyspaceGroupManager { tsoServiceID := &discovery.ServiceRegistryEntry{ServiceAddr: cfg.AdvertiseListenAddr} @@ -334,9 +404,7 @@ func newUniqueKeyspaceGroupManager( keyspaceGroupManager := NewKeyspaceGroupManager( ctx, tsoServiceID, etcdClient, electionNamePrefix, legacySvcRootPath, tsoSvcRootPath, cfg) - if loadKeyspaceGroupsTimeout != 0 { - keyspaceGroupManager.loadKeyspaceGroupsTimeout = loadKeyspaceGroupsTimeout - } + if loadKeyspaceGroupsBatchSize != 0 { keyspaceGroupManager.loadKeyspaceGroupsBatchSize = loadKeyspaceGroupsBatchSize } From 976be4b0546a48f2644a405adfa14ab0d3d4c210 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Fri, 7 Apr 2023 11:50:59 +0800 Subject: [PATCH 15/16] mcs: fix panic when resp is nil (#6283) close tikv/pd#6278 Signed-off-by: Ryan Leung --- pkg/mcs/meta_storage/server/grpc_service.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pkg/mcs/meta_storage/server/grpc_service.go b/pkg/mcs/meta_storage/server/grpc_service.go index 4a25bdff1e01..9d27e817e295 100644 --- a/pkg/mcs/meta_storage/server/grpc_service.go +++ b/pkg/mcs/meta_storage/server/grpc_service.go @@ -163,11 +163,15 @@ func (s *Service) Get(ctx context.Context, req *meta_storagepb.GetRequest) (*met } cli := s.manager.GetClient() res, err := cli.Get(ctx, key, options...) + var revision int64 + if res != nil { + revision = res.Header.GetRevision() + } if err != nil { - return &meta_storagepb.GetResponse{Header: s.wrapErrorAndRevision(res.Header.GetRevision(), meta_storagepb.ErrorType_UNKNOWN, err.Error())}, nil + return &meta_storagepb.GetResponse{Header: s.wrapErrorAndRevision(revision, meta_storagepb.ErrorType_UNKNOWN, err.Error())}, nil } resp := &meta_storagepb.GetResponse{ - Header: &meta_storagepb.ResponseHeader{ClusterId: s.manager.ClusterID(), Revision: res.Header.GetRevision()}, + Header: &meta_storagepb.ResponseHeader{ClusterId: s.manager.ClusterID(), Revision: revision}, Count: res.Count, More: res.More, } @@ -197,12 +201,16 @@ func (s *Service) Put(ctx context.Context, req *meta_storagepb.PutRequest) (*met cli := s.manager.GetClient() res, err := cli.Put(ctx, key, value, options...) + var revision int64 + if res != nil { + revision = res.Header.GetRevision() + } if err != nil { - return &meta_storagepb.PutResponse{Header: s.wrapErrorAndRevision(res.Header.GetRevision(), meta_storagepb.ErrorType_UNKNOWN, err.Error())}, nil + return &meta_storagepb.PutResponse{Header: s.wrapErrorAndRevision(revision, meta_storagepb.ErrorType_UNKNOWN, err.Error())}, nil } resp := &meta_storagepb.PutResponse{ - Header: &meta_storagepb.ResponseHeader{ClusterId: s.manager.ClusterID(), Revision: res.Header.GetRevision()}, + Header: &meta_storagepb.ResponseHeader{ClusterId: s.manager.ClusterID(), Revision: revision}, } if res.PrevKv != nil { resp.PrevKv = &meta_storagepb.KeyValue{Key: res.PrevKv.Key, Value: res.PrevKv.Value} From 34f48f26ec135f3b8adad17875d87efa5574576e Mon Sep 17 00:00:00 2001 From: bufferflies <1045931706@qq.com> Date: Fri, 7 Apr 2023 15:51:14 +0800 Subject: [PATCH 16/16] get add wrong Signed-off-by: bufferflies <1045931706@qq.com> --- pkg/movingaverage/max_filter.go | 3 +++ pkg/movingaverage/median_filter.go | 3 +++ pkg/movingaverage/weight_moving_average.go | 3 +++ pkg/schedule/schedulers/hot_region.go | 3 --- pkg/schedule/schedulers/hot_region_test.go | 2 ++ pkg/statistics/store_hot_peers_infos.go | 1 - 6 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pkg/movingaverage/max_filter.go b/pkg/movingaverage/max_filter.go index e2d049616254..21678147c576 100644 --- a/pkg/movingaverage/max_filter.go +++ b/pkg/movingaverage/max_filter.go @@ -54,6 +54,9 @@ func (r *MaxFilter) Get() float64 { // GetAll returns all the data points. func (r *MaxFilter) GetAll() []float64 { + if r.count < r.size { + return r.records[:r.count] + } return r.records } diff --git a/pkg/movingaverage/median_filter.go b/pkg/movingaverage/median_filter.go index da9f0cc3a3a9..77e41f45e624 100644 --- a/pkg/movingaverage/median_filter.go +++ b/pkg/movingaverage/median_filter.go @@ -55,6 +55,9 @@ func (r *MedianFilter) Get() float64 { } func (r *MedianFilter) GetAll() []float64 { + if r.count < r.size { + return r.records[:r.count] + } return r.records } diff --git a/pkg/movingaverage/weight_moving_average.go b/pkg/movingaverage/weight_moving_average.go index cec31292daf7..d00859b7c9e7 100644 --- a/pkg/movingaverage/weight_moving_average.go +++ b/pkg/movingaverage/weight_moving_average.go @@ -68,6 +68,9 @@ func (w *WMA) Get() float64 { // GetAll returns all the data points. func (w *WMA) GetAll() []float64 { + if w.count < w.size { + return w.records[:w.count] + } return w.records } diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index 927f03f6fabd..3e4d7427b3d7 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -267,7 +267,6 @@ func (h *hotScheduler) dispatch(typ statistics.RWType, cluster schedule.Cluster) if h.conf.IsForbidRWType(typ) { return nil } - switch typ { case statistics.Read: return h.balanceHotReadRegions(cluster) @@ -604,7 +603,6 @@ func (bs *balanceSolver) solve() []*operator.Operator { if !bs.isValid() { return nil } - bs.cur = &solution{} tryUpdateBestSolution := func() { if label, ok := bs.filterUniformStore(); ok { @@ -803,7 +801,6 @@ func (bs *balanceSolver) checkSrcByPriorityAndTolerance(minLoad, expectLoad *sta } func (bs *balanceSolver) checkSrcHistoryLoadByPriorityAndTolerance(current, expectLoad *statistics.StoreLoad, toleranceRatio float64) bool { - log.Info("check src history load", zap.Any("current", current), zap.Any("expect-load", expectLoad), zap.Any("bs", bs)) return bs.checkHistoryLoadsByPriority(current.HistoryLoads, func(i int) bool { return slice.AllOf(current.HistoryLoads[i], func(j int) bool { return current.HistoryLoads[i][j] > toleranceRatio*expectLoad.HistoryLoads[i][j] diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 11d86af508bf..f75544c8700f 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -520,11 +520,13 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { tikvKeysSum += float64(storesBytes[i]/100) / 10 tikvQuerySum += float64(storesBytes[i]/100) / 10 } + for i := uint64(1); i <= storeCount; i++ { if i != downStoreID { tc.UpdateStorageWrittenBytes(i, storesBytes[i]) } } + { // Check the load expect aliveTiKVCount := float64(aliveTiKVLastID - aliveTiKVStartID + 1) allowLeaderTiKVCount := aliveTiKVCount - 1 // store 5 with evict leader diff --git a/pkg/statistics/store_hot_peers_infos.go b/pkg/statistics/store_hot_peers_infos.go index 57b9e9334adb..91c1c0223e46 100644 --- a/pkg/statistics/store_hot_peers_infos.go +++ b/pkg/statistics/store_hot_peers_infos.go @@ -158,7 +158,6 @@ func summaryStoresLoadByEngine( store := info.StoreInfo id := store.GetID() storeLoads, ok := storesLoads[id] - //storesHistoryLoads, ok1 := storesHistoryLoads[id] if !ok || !collector.Filter(info, kind) { continue }