Skip to content

Commit

Permalink
Merge branch 'release-5.0' into cherry-pick-3643-to-release-5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Jun 28, 2021
2 parents 9391c28 + 09e8392 commit f24c091
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 7 deletions.
5 changes: 5 additions & 0 deletions pkg/mock/mockcluster/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ func (mc *Cluster) SetLocationLabels(v []string) {
mc.updateReplicationConfig(func(r *config.ReplicationConfig) { r.LocationLabels = v })
}

// SetIsolationLevel updates the IsolationLevel configuration.
func (mc *Cluster) SetIsolationLevel(v string) {
mc.updateReplicationConfig(func(r *config.ReplicationConfig) { r.IsolationLevel = v })
}

func (mc *Cluster) updateScheduleConfig(f func(*config.ScheduleConfig)) {
s := mc.GetScheduleConfig().Clone()
f(s)
Expand Down
2 changes: 1 addition & 1 deletion server/schedule/checker/replica_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ func (r *ReplicaChecker) fixPeer(region *core.RegionInfo, storeID uint64, status
}

regionStores := r.cluster.GetRegionStores(region)
target := r.strategy(region).SelectStoreToReplace(regionStores, storeID)
target := r.strategy(region).SelectStoreToFix(regionStores, storeID)
if target == 0 {
reason := fmt.Sprintf("no-store-%s", status)
checkerCounter.WithLabelValues("replica_checker", reason).Inc()
Expand Down
59 changes: 59 additions & 0 deletions server/schedule/checker/replica_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,62 @@ func (s *testReplicaCheckerSuite) TestOpts(c *C) {
tc.SetEnableReplaceOfflineReplica(false)
c.Assert(rc.Check(region), IsNil)
}

// See issue: https://github.com/tikv/pd/issues/3705
func (s *testReplicaCheckerSuite) TestFixDownPeer(c *C) {
opt := config.NewTestOptions()
tc := mockcluster.NewCluster(opt)
tc.DisableFeature(versioninfo.JointConsensus)
tc.SetLocationLabels([]string{"zone"})
rc := NewReplicaChecker(tc, cache.NewDefaultCache(10))

tc.AddLabelsStore(1, 1, map[string]string{"zone": "z1"})
tc.AddLabelsStore(2, 1, map[string]string{"zone": "z1"})
tc.AddLabelsStore(3, 1, map[string]string{"zone": "z2"})
tc.AddLabelsStore(4, 1, map[string]string{"zone": "z3"})
tc.AddLabelsStore(5, 1, map[string]string{"zone": "z3"})

tc.AddLeaderRegion(1, 1, 3, 4)
region := tc.GetRegion(1)
c.Assert(rc.Check(region), IsNil)

tc.SetStoreDown(4)
region = region.Clone(core.WithDownPeers([]*pdpb.PeerStats{
{Peer: region.GetStorePeer(4), DownSeconds: 6000},
}))
testutil.CheckTransferPeer(c, rc.Check(region), operator.OpRegion, 4, 5)

tc.SetStoreDown(5)
testutil.CheckTransferPeer(c, rc.Check(region), operator.OpRegion, 4, 2)

tc.SetIsolationLevel("zone")
c.Assert(rc.Check(region), IsNil)
}

// See issue: https://github.com/tikv/pd/issues/3705
func (s *testReplicaCheckerSuite) TestFixOfflinePeer(c *C) {
opt := config.NewTestOptions()
tc := mockcluster.NewCluster(opt)
tc.DisableFeature(versioninfo.JointConsensus)
tc.SetLocationLabels([]string{"zone"})
rc := NewReplicaChecker(tc, cache.NewDefaultCache(10))

tc.AddLabelsStore(1, 1, map[string]string{"zone": "z1"})
tc.AddLabelsStore(2, 1, map[string]string{"zone": "z1"})
tc.AddLabelsStore(3, 1, map[string]string{"zone": "z2"})
tc.AddLabelsStore(4, 1, map[string]string{"zone": "z3"})
tc.AddLabelsStore(5, 1, map[string]string{"zone": "z3"})

tc.AddLeaderRegion(1, 1, 3, 4)
region := tc.GetRegion(1)
c.Assert(rc.Check(region), IsNil)

tc.SetStoreOffline(4)
testutil.CheckTransferPeer(c, rc.Check(region), operator.OpRegion, 4, 5)

tc.SetStoreOffline(5)
testutil.CheckTransferPeer(c, rc.Check(region), operator.OpRegion, 4, 2)

tc.SetIsolationLevel("zone")
c.Assert(rc.Check(region), IsNil)
}
9 changes: 4 additions & 5 deletions server/schedule/checker/replica_strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,12 @@ func (s *ReplicaStrategy) SelectStoreToAdd(coLocationStores []*core.StoreInfo, e
return target.GetID()
}

// SelectStoreToReplace returns a store to replace oldStore. The location
// placement after scheduling should be not worse than original.
func (s *ReplicaStrategy) SelectStoreToReplace(coLocationStores []*core.StoreInfo, old uint64) uint64 {
// SelectStoreToFix returns a store to replace down/offline old peer. The location
// placement after scheduling is allowed to be worse than original.
func (s *ReplicaStrategy) SelectStoreToFix(coLocationStores []*core.StoreInfo, old uint64) uint64 {
// trick to avoid creating a slice with `old` removed.
s.swapStoreToFirst(coLocationStores, old)
safeGuard := filter.NewLocationSafeguard(s.checkerName, s.locationLabels, coLocationStores, s.cluster.GetStore(old))
return s.SelectStoreToAdd(coLocationStores[1:], safeGuard)
return s.SelectStoreToAdd(coLocationStores[1:])
}

// SelectStoreToImprove returns a store to replace oldStore. The location
Expand Down
2 changes: 1 addition & 1 deletion server/schedule/checker/rule_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func (c *RuleChecker) addRulePeer(region *core.RegionInfo, rf *placement.RuleFit

func (c *RuleChecker) replaceRulePeer(region *core.RegionInfo, rf *placement.RuleFit, peer *metapb.Peer, status string) (*operator.Operator, error) {
ruleStores := c.getRuleFitStores(rf)
store := c.strategy(region, rf.Rule).SelectStoreToReplace(ruleStores, peer.GetStoreId())
store := c.strategy(region, rf.Rule).SelectStoreToFix(ruleStores, peer.GetStoreId())
if store == 0 {
checkerCounter.WithLabelValues("rule_checker", "no-store-replace").Inc()
c.regionWaitingList.Put(region.GetID(), nil)
Expand Down
70 changes: 70 additions & 0 deletions server/schedule/checker/rule_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/tikv/pd/pkg/cache"
"github.com/tikv/pd/pkg/mock/mockcluster"
"github.com/tikv/pd/pkg/testutil"
"github.com/tikv/pd/server/config"
"github.com/tikv/pd/server/core"
"github.com/tikv/pd/server/schedule/operator"
Expand Down Expand Up @@ -501,3 +502,72 @@ func (s *testRuleCheckerSuite) TestIssue3299(c *C) {
}
}
}

// See issue: https://github.com/tikv/pd/issues/3705
func (s *testRuleCheckerSuite) TestFixDownPeer(c *C) {
s.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"})
s.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"})
s.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2"})
s.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"})
s.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"})
s.cluster.AddLeaderRegion(1, 1, 3, 4)
rule := &placement.Rule{
GroupID: "pd",
ID: "test",
Index: 100,
Override: true,
Role: placement.Voter,
Count: 3,
LocationLabels: []string{"zone"},
}
s.ruleManager.SetRule(rule)

region := s.cluster.GetRegion(1)
c.Assert(s.rc.Check(region), IsNil)

s.cluster.SetStoreDown(4)
region = region.Clone(core.WithDownPeers([]*pdpb.PeerStats{
{Peer: region.GetStorePeer(4), DownSeconds: 6000},
}))
testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 5)

s.cluster.SetStoreDown(5)
testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 2)

rule.IsolationLevel = "zone"
s.ruleManager.SetRule(rule)
c.Assert(s.rc.Check(region), IsNil)
}

// See issue: https://github.com/tikv/pd/issues/3705
func (s *testRuleCheckerSuite) TestFixOfflinePeer(c *C) {
s.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"})
s.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"})
s.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2"})
s.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"})
s.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"})
s.cluster.AddLeaderRegion(1, 1, 3, 4)
rule := &placement.Rule{
GroupID: "pd",
ID: "test",
Index: 100,
Override: true,
Role: placement.Voter,
Count: 3,
LocationLabels: []string{"zone"},
}
s.ruleManager.SetRule(rule)

region := s.cluster.GetRegion(1)
c.Assert(s.rc.Check(region), IsNil)

s.cluster.SetStoreOffline(4)
testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 5)

s.cluster.SetStoreOffline(5)
testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 2)

rule.IsolationLevel = "zone"
s.ruleManager.SetRule(rule)
c.Assert(s.rc.Check(region), IsNil)
}

0 comments on commit f24c091

Please sign in to comment.