diff --git a/client/go.mod b/client/go.mod index 22ef56aa417..893cada680d 100644 --- a/client/go.mod +++ b/client/go.mod @@ -7,7 +7,7 @@ require ( github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 + github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee github.com/prometheus/client_golang v1.11.0 go.uber.org/goleak v1.1.11 diff --git a/client/go.sum b/client/go.sum index becfbccfe12..6682bdb2893 100644 --- a/client/go.sum +++ b/client/go.sum @@ -106,8 +106,8 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 h1:i4MBe1zGq9/r3BH6rTRunizi4T59fpNk8hvBCrB5UAY= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a h1:TxdHGOFeNa1q1mVv6TgReayf26iI4F8PQUm6RnZ/V/E= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee h1:VO2t6IBpfvW34TdtD/G10VvnGqjLic1jzOuHjUb5VqM= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= diff --git a/go.mod b/go.mod index f31a93574bf..8060fc283d1 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce - github.com/pingcap/kvproto v0.0.0-20220429093005-2839fa5a1ed6 + github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20220517152638-ae6b14e7065b diff --git a/go.sum b/go.sum index 9c46c253105..0fcddcd9a9b 100644 --- a/go.sum +++ b/go.sum @@ -400,8 +400,8 @@ github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMt github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20220429093005-2839fa5a1ed6 h1:gT4uxwuZzTniXdzp4mPoZjhNkDNEuZBt7HESOuLRyMI= -github.com/pingcap/kvproto v0.0.0-20220429093005-2839fa5a1ed6/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a h1:TxdHGOFeNa1q1mVv6TgReayf26iI4F8PQUm6RnZ/V/E= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= diff --git a/server/api/operator_test.go b/server/api/operator_test.go index ff6276d5574..86d99c5e726 100644 --- a/server/api/operator_test.go +++ b/server/api/operator_test.go @@ -405,11 +405,13 @@ func mustPutStore(c *C, svr *server.Server, id uint64, state metapb.StoreState, }, }) c.Assert(err, IsNil) - _, err = s.StoreHeartbeat(context.Background(), &pdpb.StoreHeartbeatRequest{ - Header: &pdpb.RequestHeader{ClusterId: svr.ClusterID()}, - Stats: &pdpb.StoreStats{StoreId: id}, - }) - c.Assert(err, IsNil) + if state == metapb.StoreState_Up { + _, err = s.StoreHeartbeat(context.Background(), &pdpb.StoreHeartbeatRequest{ + Header: &pdpb.RequestHeader{ClusterId: svr.ClusterID()}, + Stats: &pdpb.StoreStats{StoreId: id}, + }) + c.Assert(err, IsNil) + } } func mustRegionHeartbeat(c *C, svr *server.Server, region *core.RegionInfo) { diff --git a/server/api/router.go b/server/api/router.go index b9eaeb4b090..c5553a0caa7 100644 --- a/server/api/router.go +++ b/server/api/router.go @@ -335,8 +335,6 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { unsafeOperationHandler.RemoveFailedStores, setMethods("POST")) registerFunc(clusterRouter, "/admin/unsafe/remove-failed-stores/show", unsafeOperationHandler.GetFailedStoresRemovalStatus, setMethods("GET")) - registerFunc(clusterRouter, "/admin/unsafe/remove-failed-stores/history", - unsafeOperationHandler.GetFailedStoresRemovalHistory, setMethods("GET")) // API to set or unset failpoints failpoint.Inject("enableFailpointAPI", func() { diff --git a/server/api/unsafe_operation.go b/server/api/unsafe_operation.go index 060f533e954..83912c120a0 100644 --- a/server/api/unsafe_operation.go +++ b/server/api/unsafe_operation.go @@ -55,11 +55,17 @@ func (h *unsafeOperationHandler) RemoveFailedStores(w http.ResponseWriter, r *ht h.rd.JSON(w, http.StatusBadRequest, "Store ids are invalid") return } - stores := make(map[uint64]string) + stores := make(map[uint64]struct{}) for _, store := range storeSlice { - stores[store] = "" + stores[store] = struct{}{} } - if err := rc.GetUnsafeRecoveryController().RemoveFailedStores(stores); err != nil { + timeout := uint64(600) + rawTimeout, exists := input["timeout"].(float64) + if exists { + timeout = uint64(rawTimeout) + } + + if err := rc.GetUnsafeRecoveryController().RemoveFailedStores(stores, timeout); err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } @@ -69,19 +75,9 @@ func (h *unsafeOperationHandler) RemoveFailedStores(w http.ResponseWriter, r *ht // @Tags unsafe // @Summary Show the current status of failed stores removal. // @Produce json -// Success 200 {object} []string +// Success 200 {object} []StageOutput // @Router /admin/unsafe/remove-failed-stores/show [GET] func (h *unsafeOperationHandler) GetFailedStoresRemovalStatus(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) h.rd.JSON(w, http.StatusOK, rc.GetUnsafeRecoveryController().Show()) } - -// @Tags unsafe -// @Summary Show the history of failed stores removal. -// @Produce json -// Success 200 {object} []string -// @Router /admin/unsafe/remove-failed-stores/history [GET] -func (h *unsafeOperationHandler) GetFailedStoresRemovalHistory(w http.ResponseWriter, r *http.Request) { - rc := getCluster(r) - h.rd.JSON(w, http.StatusOK, rc.GetUnsafeRecoveryController().History()) -} diff --git a/server/api/unsafe_operation_test.go b/server/api/unsafe_operation_test.go index 892795a2cea..f9d060d2fe3 100644 --- a/server/api/unsafe_operation_test.go +++ b/server/api/unsafe_operation_test.go @@ -19,8 +19,10 @@ import ( "fmt" . "github.com/pingcap/check" + "github.com/pingcap/kvproto/pkg/metapb" tu "github.com/tikv/pd/pkg/testutil" "github.com/tikv/pd/server" + "github.com/tikv/pd/server/cluster" ) var _ = Suite(&testUnsafeAPISuite{}) @@ -39,6 +41,7 @@ func (s *testUnsafeAPISuite) SetUpSuite(c *C) { s.urlPrefix = fmt.Sprintf("%s%s/api/v1/admin/unsafe", addr, apiPrefix) mustBootstrapCluster(c, s.svr) + mustPutStore(c, s.svr, 1, metapb.StoreState_Offline, metapb.NodeState_Removing, nil) } func (s *testUnsafeAPISuite) TearDownSuite(c *C) { @@ -51,20 +54,26 @@ func (s *testUnsafeAPISuite) TestRemoveFailedStores(c *C) { err := tu.CheckPostJSON(testDialClient, s.urlPrefix+"/remove-failed-stores", data, tu.StatusNotOK(c), tu.StringEqual(c, "\"[PD:unsaferecovery:ErrUnsafeRecoveryInvalidInput]invalid input no store specified\"\n")) c.Assert(err, IsNil) + input = map[string]interface{}{"stores": []string{"abc", "def"}} data, _ = json.Marshal(input) err = tu.CheckPostJSON(testDialClient, s.urlPrefix+"/remove-failed-stores", data, tu.StatusNotOK(c), tu.StringEqual(c, "\"Store ids are invalid\"\n")) c.Assert(err, IsNil) + input = map[string]interface{}{"stores": []uint64{1, 2}} data, _ = json.Marshal(input) + err = tu.CheckPostJSON(testDialClient, s.urlPrefix+"/remove-failed-stores", data, tu.StatusNotOK(c), + tu.StringEqual(c, "\"[PD:unsaferecovery:ErrUnsafeRecoveryInvalidInput]invalid input store 2 doesn't exist\"\n")) + c.Assert(err, IsNil) + + input = map[string]interface{}{"stores": []uint64{1}} + data, _ = json.Marshal(input) err = tu.CheckPostJSON(testDialClient, s.urlPrefix+"/remove-failed-stores", data, tu.StatusOK(c)) c.Assert(err, IsNil) + // Test show - var output []string + var output []cluster.StageOutput err = tu.ReadGetJSON(c, testDialClient, s.urlPrefix+"/remove-failed-stores/show", &output) c.Assert(err, IsNil) - // Test history - err = tu.ReadGetJSON(c, testDialClient, s.urlPrefix+"/remove-failed-stores/history", &output) - c.Assert(err, IsNil) } diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 888163bceaa..bcedcdb3fa5 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -223,6 +223,7 @@ func (c *RaftCluster) InitCluster( c.progressManager = progress.NewManager() c.changedRegions = make(chan *core.RegionInfo, defaultChangedRegionsLimit) c.prevStoreLimit = make(map[uint64]map[storelimit.Type]float64) + c.unsafeRecoveryController = newUnsafeRecoveryController(c) } // Start starts a cluster. @@ -265,7 +266,6 @@ func (c *RaftCluster) Start(s Server) error { c.coordinator = newCoordinator(c.ctx, cluster, s.GetHBStreams()) c.regionStats = statistics.NewRegionStatistics(c.opt, c.ruleManager, c.storeConfigManager) c.limiter = NewStoreLimiter(s.GetPersistOptions()) - c.unsafeRecoveryController = newUnsafeRecoveryController(cluster) c.wg.Add(8) go c.runCoordinator() @@ -630,8 +630,6 @@ func (c *RaftCluster) RemoveSuspectRegion(id uint64) { // GetUnsafeRecoveryController returns the unsafe recovery controller. func (c *RaftCluster) GetUnsafeRecoveryController() *unsafeRecoveryController { - c.RLock() - defer c.RUnlock() return c.unsafeRecoveryController } @@ -991,6 +989,11 @@ func (c *RaftCluster) DropCacheRegion(id uint64) { c.core.RemoveRegionIfExist(id) } +// DropCacheAllRegion removes all regions from the cache. +func (c *RaftCluster) DropCacheAllRegion() { + c.core.ResetRegionCache() +} + // GetMetaStores gets stores from cluster. func (c *RaftCluster) GetMetaStores() []*metapb.Store { return c.core.GetMetaStores() diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index 33f853bce0a..d48c5bec060 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -42,7 +42,7 @@ func (c *RaftCluster) HandleRegionHeartbeat(region *core.RegionInfo) error { // HandleAskSplit handles the split request. func (c *RaftCluster) HandleAskSplit(request *pdpb.AskSplitRequest) (*pdpb.AskSplitResponse, error) { - if c.GetUnsafeRecoveryController() != nil && c.GetUnsafeRecoveryController().IsRunning() { + if c.GetUnsafeRecoveryController().IsRunning() { return nil, errs.ErrUnsafeRecoveryIsRunning.FastGenByArgs() } reqRegion := request.GetRegion() @@ -101,7 +101,7 @@ func (c *RaftCluster) ValidRequestRegion(reqRegion *metapb.Region) error { // HandleAskBatchSplit handles the batch split request. func (c *RaftCluster) HandleAskBatchSplit(request *pdpb.AskBatchSplitRequest) (*pdpb.AskBatchSplitResponse, error) { - if c.GetUnsafeRecoveryController() != nil && c.GetUnsafeRecoveryController().IsRunning() { + if c.GetUnsafeRecoveryController().IsRunning() { return nil, errs.ErrUnsafeRecoveryIsRunning.FastGenByArgs() } reqRegion := request.GetRegion() diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index 973c628662c..e389dbf88af 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -117,7 +117,7 @@ func (c *coordinator) patrolRegions() { log.Info("patrol regions has been stopped") return } - if c.cluster.GetUnsafeRecoveryController() != nil && c.cluster.GetUnsafeRecoveryController().IsRunning() { + if c.cluster.GetUnsafeRecoveryController().IsRunning() { // Skip patrolling regions during unsafe recovery. continue } @@ -528,7 +528,7 @@ func (c *coordinator) collectSchedulerMetrics() { var allowScheduler float64 // If the scheduler is not allowed to schedule, it will disappear in Grafana panel. // See issue #1341. - if !s.IsPaused() { + if !s.IsPaused() && !s.cluster.GetUnsafeRecoveryController().IsRunning() { allowScheduler = 1 } schedulerStatusGauge.WithLabelValues(s.GetName(), "allow").Set(allowScheduler) @@ -904,7 +904,7 @@ func (s *scheduleController) GetInterval() time.Duration { // AllowSchedule returns if a scheduler is allowed to schedule. func (s *scheduleController) AllowSchedule() bool { - return s.Scheduler.IsScheduleAllowed(s.cluster) && !s.IsPaused() && !(s.cluster.GetUnsafeRecoveryController() != nil && s.cluster.GetUnsafeRecoveryController().IsRunning()) + return s.Scheduler.IsScheduleAllowed(s.cluster) && !s.IsPaused() && !s.cluster.GetUnsafeRecoveryController().IsRunning() } // isPaused returns if a scheduler is paused. diff --git a/server/cluster/unsafe_recovery_controller.go b/server/cluster/unsafe_recovery_controller.go index 21f7e7eec68..c85db2b2564 100644 --- a/server/cluster/unsafe_recovery_controller.go +++ b/server/cluster/unsafe_recovery_controller.go @@ -16,17 +16,19 @@ package cluster import ( "bytes" + "encoding/json" "fmt" "sort" "strconv" + "strings" "time" - "github.com/gogo/protobuf/proto" - "github.com/google/btree" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/btree" + "github.com/tikv/pd/pkg/codec" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/syncutil" "github.com/tikv/pd/server/core" @@ -36,47 +38,109 @@ import ( type unsafeRecoveryStage int const ( - storeReportRequestInterval = time.Second * 60 - storePlanRequestInterval = time.Second * 60 + storeRequestInterval = time.Second * 40 ) +// Stage transition graph: for more details, please check `unsafeRecoveryController.HandleStoreHeartbeat()` +// +-----------+ +// | | +// | idle | +// | | +// +-----------+ +// | +// | +// | +// v +-----------+ +// +-----------+ | | +-----------+ +-----------+ +// | |------->| force |--------->| | | | +-----------+ +// | collect | | LeaderFor | | force | | exitForce | | | +// | Report | |CommitMerge|----+-----| Leader |-----+---->| Leader |---------->| failed | +// | | | | | | | | | | | | +// +-----------+ +-----------+ | +-----------+ | +-----------+ +-----------+ +// | | | ^ | +// | | | | | +// | | | | | +// | | v | | +// | | +-----------+ | +// | | | | | +// | | | demote | | +// | +-----| Voter |-----+ +// | | | | | +// | | +-----------+ | +// | | | ^ | +// | | | | | +// | | | | | +// v +-----------+ | +-----------+ | +// +-----------+ | | | | | | +// | | | exitForce | v | create | | +// | finished |<------| Leader |<----------| Region |-----+ +// | | | | | | +// +-----------+ +-----------+ +-----------+ +// const ( - ready unsafeRecoveryStage = iota - collectingClusterInfo - recovering + idle unsafeRecoveryStage = iota + collectReport + forceLeaderForCommitMerge + forceLeader + demoteFailedVoter + createEmptyRegion + exitForceLeader finished + failed ) type unsafeRecoveryController struct { syncutil.RWMutex - cluster *RaftCluster - stage unsafeRecoveryStage - failedStores map[uint64]string - storeReportExpires map[uint64]time.Time - storeReports map[uint64]*pdpb.StoreReport // Store info proto - numStoresReported int - storePlanExpires map[uint64]time.Time - storeRecoveryPlans map[uint64]*pdpb.RecoveryPlan // StoreRecoveryPlan proto - executionResults map[uint64]bool // Execution reports for tracking purpose - executionReports map[uint64]*pdpb.StoreReport // Execution reports for tracking purpose - numStoresPlanExecuted int + cluster *RaftCluster + stage unsafeRecoveryStage + // the round of recovery, which is an increasing number to identify the reports of each round + step uint64 + failedStores map[uint64]struct{} + timeout time.Time + + // collected reports from store, if not reported yet, it would be nil + storeReports map[uint64]*pdpb.StoreReport + numStoresReported int + + storePlanExpires map[uint64]time.Time + storeRecoveryPlans map[uint64]*pdpb.RecoveryPlan + + // accumulated output for the whole recovery process + output []StageOutput + affectedTableIDs []int64 + affectedMetaRegions []uint64 + err error +} + +// StageOutput is the information for one stage of the recovery process. +type StageOutput struct { + Info string `json:"info,omitempty"` + Time string `json:"time,omitempty"` + Actions map[string][]string `json:"actions,omitempty"` + Details []string `json:"details,omitempty"` } func newUnsafeRecoveryController(cluster *RaftCluster) *unsafeRecoveryController { - return &unsafeRecoveryController{ - cluster: cluster, - stage: ready, - failedStores: make(map[uint64]string), - storeReportExpires: make(map[uint64]time.Time), - storeReports: make(map[uint64]*pdpb.StoreReport), - numStoresReported: 0, - storePlanExpires: make(map[uint64]time.Time), - storeRecoveryPlans: make(map[uint64]*pdpb.RecoveryPlan), - executionResults: make(map[uint64]bool), - executionReports: make(map[uint64]*pdpb.StoreReport), - numStoresPlanExecuted: 0, + u := &unsafeRecoveryController{ + cluster: cluster, } + u.reset() + return u +} + +func (u *unsafeRecoveryController) reset() { + u.stage = idle + u.step = 0 + u.failedStores = make(map[uint64]struct{}) + u.storeReports = make(map[uint64]*pdpb.StoreReport) + u.numStoresReported = 0 + u.storePlanExpires = make(map[uint64]time.Time) + u.storeRecoveryPlans = make(map[uint64]*pdpb.RecoveryPlan) + u.output = make([]StageOutput, 0) + u.affectedTableIDs = make([]int64, 0) + u.affectedMetaRegions = make([]uint64, 0) + u.err = nil } // IsRunning returns whether there is ongoing unsafe recovery process. If yes, further unsafe @@ -84,23 +148,27 @@ func newUnsafeRecoveryController(cluster *RaftCluster) *unsafeRecoveryController func (u *unsafeRecoveryController) IsRunning() bool { u.RLock() defer u.RUnlock() - return u.stage != ready && u.stage != finished + return u.stage != idle && u.stage != finished && u.stage != failed } // RemoveFailedStores removes failed stores from the cluster. -func (u *unsafeRecoveryController) RemoveFailedStores(failedStores map[uint64]string) error { +func (u *unsafeRecoveryController) RemoveFailedStores(failedStores map[uint64]struct{}, timeout uint64) error { if u.IsRunning() { return errs.ErrUnsafeRecoveryIsRunning.FastGenByArgs() } u.Lock() defer u.Unlock() + if len(failedStores) == 0 { return errs.ErrUnsafeRecoveryInvalidInput.FastGenByArgs("no store specified") } - u.reset() + + // validate the stores and mark the store as tombstone forcibly for failedStore := range failedStores { store := u.cluster.GetStore(failedStore) - if store != nil && (store.IsPreparing() || store.IsServing()) && !store.IsDisconnected() { + if store == nil { + return errs.ErrUnsafeRecoveryInvalidInput.FastGenByArgs(fmt.Sprintf("store %v doesn't exist", failedStore)) + } else if (store.IsPreparing() || store.IsServing()) && !store.IsDisconnected() { return errs.ErrUnsafeRecoveryInvalidInput.FastGenByArgs(fmt.Sprintf("store %v is up and connected", failedStore)) } } @@ -110,8 +178,10 @@ func (u *unsafeRecoveryController) RemoveFailedStores(failedStores map[uint64]st return err } } - u.failedStores = failedStores + + u.reset() for _, s := range u.cluster.GetStores() { + // Tiflash isn't supportted yet, so just do not collect store reports of Tiflash if s.IsRemoved() || s.IsPhysicallyDestroyed() || s.IsTiFlash() { continue } @@ -120,462 +190,863 @@ func (u *unsafeRecoveryController) RemoveFailedStores(failedStores map[uint64]st } u.storeReports[s.GetID()] = nil } - u.stage = collectingClusterInfo + + u.timeout = time.Now().Add(time.Duration(timeout) * time.Second) + u.failedStores = failedStores + u.changeStage(collectReport) return nil } +// Show returns the current status of ongoing unsafe recover operation. +func (u *unsafeRecoveryController) Show() []StageOutput { + u.Lock() + defer u.Unlock() + + if u.stage == idle { + return []StageOutput{{Info: "No on-going recovery."}} + } + u.checkTimeout() + status := u.output + if u.stage != finished && u.stage != failed { + status = append(status, u.getReportStatus()) + } + return status +} + +func (u *unsafeRecoveryController) getReportStatus() StageOutput { + var status StageOutput + status.Time = time.Now().Format("2006-01-02 15:04:05.000") + if u.numStoresReported != len(u.storeReports) { + status.Info = fmt.Sprintf("Collecting reports from alive stores(%d/%d)", u.numStoresReported, len(u.storeReports)) + var reported, unreported, undispatched string + for storeID, report := range u.storeReports { + str := strconv.FormatUint(storeID, 10) + ", " + if report == nil { + if _, requested := u.storePlanExpires[storeID]; !requested { + undispatched += str + } else { + unreported += str + } + } else { + reported += str + } + } + status.Details = append(status.Details, "Stores that have not dispatched plan: "+strings.Trim(undispatched, ", ")) + status.Details = append(status.Details, "Stores that have reported to PD: "+strings.Trim(reported, ", ")) + status.Details = append(status.Details, "Stores that have not reported to PD: "+strings.Trim(unreported, ", ")) + } else { + status.Info = fmt.Sprintf("Collected reports from all %d alive stores", len(u.storeReports)) + } + return status +} + +func (u *unsafeRecoveryController) checkTimeout() bool { + if u.stage == finished || u.stage == failed { + return false + } + + if time.Now().After(u.timeout) { + u.err = errors.Errorf("Exceeds timeout %v", u.timeout) + return u.handleErr() + } + return false +} + // HandleStoreHeartbeat handles the store heartbeat requests and checks whether the stores need to // send detailed report back. func (u *unsafeRecoveryController) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest, resp *pdpb.StoreHeartbeatResponse) { + if !u.IsRunning() { + // no recovery in progress, do nothing + return + } u.Lock() defer u.Unlock() - if len(u.failedStores) == 0 { + + if u.checkTimeout() { return } - switch u.stage { - case collectingClusterInfo: - if heartbeat.StoreReport == nil { - if _, isFailedStore := u.failedStores[heartbeat.Stats.StoreId]; isFailedStore { - // This should be unreachable. + + allCollected, err := u.collectReport(heartbeat) + if err != nil { + u.err = err + if u.handleErr() { + return + } + } + + if allCollected { + newestRegionTree, peersMap, err := u.buildUpFromReports() + if err != nil { + u.err = err + if u.handleErr() { return } + } + + // clean up previous plan + u.storePlanExpires = make(map[uint64]time.Time) + u.storeRecoveryPlans = make(map[uint64]*pdpb.RecoveryPlan) - expire, requested := u.storeReportExpires[heartbeat.Stats.StoreId] - now := time.Now() - if !requested || expire.Before(now) { - // Inform the store to send detailed report in the next heartbeat. - resp.RequireDetailedReport = true - u.storeReportExpires[heartbeat.Stats.StoreId] = now.Add(storeReportRequestInterval) + stage := u.stage + reCheck := false + for { + switch stage { + case collectReport: + fallthrough + case forceLeaderForCommitMerge: + if u.generateForceLeaderPlan(newestRegionTree, peersMap, true) { + u.changeStage(forceLeaderForCommitMerge) + break + } + fallthrough + case forceLeader: + if u.generateForceLeaderPlan(newestRegionTree, peersMap, false) { + u.changeStage(forceLeader) + break + } + fallthrough + case demoteFailedVoter: + if u.generateDemoteFailedVoterPlan(newestRegionTree, peersMap) { + u.changeStage(demoteFailedVoter) + break + } else if !reCheck { + reCheck = true + stage = forceLeaderForCommitMerge + continue + } + fallthrough + case createEmptyRegion: + if u.generateCreateEmptyRegionPlan(newestRegionTree, peersMap) { + u.changeStage(createEmptyRegion) + break + } + fallthrough + case exitForceLeader: + // no need to generate plan, empty recovery plan triggers exit force leader on TiKV side + if u.generateExitForceLeaderPlan() { + u.changeStage(exitForceLeader) + } + default: + panic("unreachable") } - } else if report, exist := u.storeReports[heartbeat.Stats.StoreId]; exist && report == nil { - u.storeReports[heartbeat.Stats.StoreId] = heartbeat.StoreReport + + hasPlan := len(u.storeRecoveryPlans) != 0 + if u.err != nil { + if u.handleErr() { + return + } + } else if !hasPlan { + u.changeStage(finished) + return + } + break + } + } + + u.dispatchPlan(heartbeat, resp) +} + +func (u *unsafeRecoveryController) handleErr() bool { + if u.err != nil { + if u.stage == exitForceLeader { + u.changeStage(failed) + return true + } + u.storePlanExpires = make(map[uint64]time.Time) + u.storeRecoveryPlans = make(map[uint64]*pdpb.RecoveryPlan) + u.timeout = time.Now().Add(storeRequestInterval) + u.changeStage(exitForceLeader) + } + return false +} + +/// It dispatches recovery plan if any. +func (u *unsafeRecoveryController) dispatchPlan(heartbeat *pdpb.StoreHeartbeatRequest, resp *pdpb.StoreHeartbeatResponse) { + storeID := heartbeat.Stats.StoreId + now := time.Now() + + if reported, exist := u.storeReports[storeID]; reported != nil || !exist { + // the plan has been executed, no need to dispatch again + // or no need to displan plan to this store(e.g. Tiflash) + return + } + + if expire, dispatched := u.storePlanExpires[storeID]; !dispatched || expire.Before(now) { + if dispatched { + log.Info("Unsafe recovery store recovery plan execution timeout, retry", zap.Uint64("store-id", storeID)) + } + // Dispatch the recovery plan to the store, and the plan may be empty. + resp.RecoveryPlan = u.getRecoveryPlan(storeID) + resp.RecoveryPlan.Step = u.step + u.storePlanExpires[storeID] = now.Add(storeRequestInterval) + } +} + +// It collects and checks if store reports have been fully collected. +func (u *unsafeRecoveryController) collectReport(heartbeat *pdpb.StoreHeartbeatRequest) (bool, error) { + storeID := heartbeat.Stats.StoreId + if _, isFailedStore := u.failedStores[storeID]; isFailedStore { + return false, errors.Errorf("Receive heartbeat from failed store %d", storeID) + } + + if heartbeat.StoreReport == nil { + return false, nil + } + + if heartbeat.StoreReport.GetStep() != u.step { + log.Info("Unsafe recovery receives invalid store report", + zap.Uint64("store-id", storeID), zap.Uint64("expected-step", u.step), zap.Uint64("obtained-step", heartbeat.StoreReport.GetStep())) + // invalid store report, ignore + return false, nil + } + + if report, exists := u.storeReports[storeID]; exists { + // if receive duplicated report from the same TiKV, use the latest one + u.storeReports[storeID] = heartbeat.StoreReport + if report == nil { u.numStoresReported++ if u.numStoresReported == len(u.storeReports) { - log.Info("Reports have been fully collected, generating plan...") - go u.generateRecoveryPlan() + return true, nil } } - case recovering: - if plan, tasked := u.storeRecoveryPlans[heartbeat.Stats.StoreId]; tasked { - if heartbeat.StoreReport == nil { - expire, requested := u.storePlanExpires[heartbeat.Stats.StoreId] - now := time.Now() - if !requested || expire.Before(now) { - // Sends the recovering plan to the store for execution. - resp.Plan = plan - u.storePlanExpires[heartbeat.Stats.StoreId] = now.Add(storePlanRequestInterval) - } - } else if !u.isPlanExecuted(heartbeat.Stats.StoreId, heartbeat.StoreReport) { - resp.Plan = plan - u.executionReports[heartbeat.Stats.StoreId] = heartbeat.StoreReport - } else { - u.executionResults[heartbeat.Stats.StoreId] = true - u.executionReports[heartbeat.Stats.StoreId] = heartbeat.StoreReport - u.numStoresPlanExecuted++ - if u.numStoresPlanExecuted == len(u.storeRecoveryPlans) { - u.cluster.PauseOrResumeScheduler("all", 0) - log.Info("Recover finished.") - go func() { - for _, history := range u.History() { - log.Info(history) - } - }() - u.stage = finished + } + return false, nil +} + +// Gets the stage of the current unsafe recovery. +func (u *unsafeRecoveryController) GetStage() unsafeRecoveryStage { + u.RLock() + defer u.RUnlock() + return u.stage +} + +func (u *unsafeRecoveryController) changeStage(stage unsafeRecoveryStage) { + u.stage = stage + + var output StageOutput + output.Time = time.Now().Format("2006-01-02 15:04:05.000") + switch u.stage { + case idle: + case collectReport: + stores := "" + count := 0 + for store := range u.failedStores { + count += 1 + stores += fmt.Sprintf("%d", store) + if count != len(u.failedStores) { + stores += ", " + } + } + output.Info = fmt.Sprintf("Unsafe recovery enters collect report stage: failed stores %s", stores) + case forceLeaderForCommitMerge: + output.Info = "Unsafe recovery enters force leader for commit merge stage" + output.Actions = u.getForceLeaderPlanDigest() + case forceLeader: + output.Info = "Unsafe recovery enters force leader stage" + output.Actions = u.getForceLeaderPlanDigest() + case demoteFailedVoter: + output.Info = "Unsafe recovery enters demote failed voter stage" + output.Actions = u.getDemoteFailedVoterPlanDigest() + case createEmptyRegion: + output.Info = "Unsafe recovery enters create empty region stage" + output.Actions = u.getCreateEmptyRegionPlanDigest() + case exitForceLeader: + output.Info = "Unsafe recovery enters exit force leader stage" + if u.err != nil { + output.Details = append(output.Details, u.err.Error()) + } + case finished: + if u.step > 1 { + // == 1 means no operation has done, no need to invalid cache + u.cluster.DropCacheAllRegion() + } + output.Info = "Unsafe recovery finished" + output.Details = u.getAffectedTableDigest() + u.storePlanExpires = make(map[uint64]time.Time) + u.storeRecoveryPlans = make(map[uint64]*pdpb.RecoveryPlan) + case failed: + output.Info = fmt.Sprintf("Unsafe recovery failed: %v", u.err) + output.Details = u.getAffectedTableDigest() + u.storePlanExpires = make(map[uint64]time.Time) + u.storeRecoveryPlans = make(map[uint64]*pdpb.RecoveryPlan) + } + + u.output = append(u.output, output) + data, err := json.Marshal(output) + if err != nil { + u.err = err + return + } + log.Info(string(data)) + + // reset store reports to nil instead of delete, because it relays on the item + // to decide which store it needs to collect the report from. + for k := range u.storeReports { + u.storeReports[k] = nil + } + u.numStoresReported = 0 + u.step += 1 +} + +func (u *unsafeRecoveryController) getForceLeaderPlanDigest() map[string][]string { + outputs := make(map[string][]string) + for storeID, plan := range u.storeRecoveryPlans { + forceLeaders := plan.GetForceLeader() + if forceLeaders != nil { + regions := "" + for i, regionID := range forceLeaders.GetEnterForceLeaders() { + regions += fmt.Sprintf("%d", regionID) + if i != len(forceLeaders.GetEnterForceLeaders())-1 { + regions += ", " } } + outputs[fmt.Sprintf("store %d", storeID)] = []string{fmt.Sprintf("force leader on regions: %s", regions)} } } + return outputs } -func (u *unsafeRecoveryController) reset() { - u.stage = ready - u.failedStores = make(map[uint64]string) - u.storeReportExpires = make(map[uint64]time.Time) - u.storeReports = make(map[uint64]*pdpb.StoreReport) - u.numStoresReported = 0 - u.storePlanExpires = make(map[uint64]time.Time) - u.storeRecoveryPlans = make(map[uint64]*pdpb.RecoveryPlan) - u.executionResults = make(map[uint64]bool) - u.executionReports = make(map[uint64]*pdpb.StoreReport) - u.numStoresPlanExecuted = 0 +func (u *unsafeRecoveryController) getDemoteFailedVoterPlanDigest() map[string][]string { + outputs := make(map[string][]string) + for storeID, plan := range u.storeRecoveryPlans { + if len(plan.GetDemotes()) == 0 && len(plan.GetTombstones()) == 0 { + continue + } + output := []string{} + for _, demote := range plan.GetDemotes() { + peers := "" + for _, peer := range demote.GetFailedVoters() { + peers += fmt.Sprintf("{ %v}, ", peer) // the extra space is intentional + } + output = append(output, fmt.Sprintf("region %d demotes peers %s", demote.GetRegionId(), strings.Trim(peers, ", "))) + } + for _, tombstone := range plan.GetTombstones() { + output = append(output, fmt.Sprintf("tombstone the peer of region %d", tombstone)) + } + outputs[fmt.Sprintf("store %d", storeID)] = output + } + return outputs } -func (u *unsafeRecoveryController) isPlanExecuted(storeID uint64, report *pdpb.StoreReport) bool { - targetRegions := make(map[uint64]*metapb.Region) - toBeRemovedRegions := make(map[uint64]bool) - for _, create := range u.storeRecoveryPlans[storeID].Creates { - targetRegions[create.Id] = create +func (u *unsafeRecoveryController) getCreateEmptyRegionPlanDigest() map[string][]string { + outputs := make(map[string][]string) + for storeID, plan := range u.storeRecoveryPlans { + if plan.GetCreates() == nil { + continue + } + output := []string{} + for _, region := range plan.GetCreates() { + info := core.RegionToHexMeta(region).String() + // avoid json escape character to make the output readable + info = strings.ReplaceAll(info, "<", "{ ") // the extra space is intentional + info = strings.ReplaceAll(info, ">", "}") + output = append(output, fmt.Sprintf("create region %v", info)) + } + outputs[fmt.Sprintf("store %d", storeID)] = output } - for _, update := range u.storeRecoveryPlans[storeID].Updates { - targetRegions[update.Id] = update + return outputs +} + +func (u *unsafeRecoveryController) getAffectedTableDigest() []string { + var details []string + if len(u.affectedMetaRegions) != 0 { + regions := "" + for _, r := range u.affectedMetaRegions { + regions += fmt.Sprintf("%d, ", r) + } + details = append(details, "affected meta regions: "+strings.Trim(regions, ", ")) } - for _, del := range u.storeRecoveryPlans[storeID].Deletes { - toBeRemovedRegions[del] = true + if len(u.affectedTableIDs) != 0 { + tables := "" + for _, t := range u.affectedTableIDs { + tables += fmt.Sprintf("%d, ", t) + } + details = append(details, "affected table ids: "+strings.Trim(tables, ", ")) } - numFinished := 0 - for _, peerReport := range report.PeerReports { - region := peerReport.RegionState.Region - if _, ok := toBeRemovedRegions[region.Id]; ok { - return false - } else if target, ok := targetRegions[region.Id]; ok { - if bytes.Equal(target.StartKey, region.StartKey) && bytes.Equal(target.EndKey, region.EndKey) && !u.containsFailedPeers(region) { - numFinished += 1 + return details +} + +func (u *unsafeRecoveryController) recordAffectedRegion(region *metapb.Region) { + isMeta, tableID := codec.Key(region.StartKey).MetaOrTable() + if isMeta { + u.affectedMetaRegions = append(u.affectedMetaRegions, region.GetId()) + } else if tableID != 0 { + u.affectedTableIDs = append(u.affectedTableIDs, tableID) + } +} + +func (u *unsafeRecoveryController) canElectLeader(region *metapb.Region, onlyIncoming bool) bool { + hasQuorum := func(voters []*metapb.Peer) bool { + numFailedVoters := 0 + numLiveVoters := 0 + + for _, voter := range voters { + if _, ok := u.failedStores[voter.StoreId]; ok { + numFailedVoters += 1 } else { - return false + numLiveVoters += 1 } } + return numFailedVoters < numLiveVoters } - return numFinished == len(targetRegions) -} -type regionItem struct { - region *metapb.Region -} + // consider joint consensus + var incomingVoters []*metapb.Peer + var outgoingVoters []*metapb.Peer -func (r regionItem) Less(other btree.Item) bool { - return bytes.Compare(r.region.StartKey, other.(regionItem).region.StartKey) < 0 + for _, peer := range region.Peers { + if peer.Role == metapb.PeerRole_Voter || peer.Role == metapb.PeerRole_IncomingVoter { + incomingVoters = append(incomingVoters, peer) + } + if peer.Role == metapb.PeerRole_Voter || peer.Role == metapb.PeerRole_DemotingVoter { + outgoingVoters = append(outgoingVoters, peer) + } + } + + return hasQuorum(incomingVoters) && (onlyIncoming || hasQuorum(outgoingVoters)) } -func (u *unsafeRecoveryController) canElectLeader(region *metapb.Region) bool { - numFailedVoters := 0 - numLiveVoters := 0 +func (u *unsafeRecoveryController) getFailedPeers(region *metapb.Region) []*metapb.Peer { + // if it can form a quorum after exiting the joint state, then no need to demotes any peer + if u.canElectLeader(region, true) { + return nil + } + + var failedPeers []*metapb.Peer for _, peer := range region.Peers { - if peer.Role != metapb.PeerRole_Voter && peer.Role != metapb.PeerRole_IncomingVoter { + if peer.Role == metapb.PeerRole_Learner || peer.Role == metapb.PeerRole_DemotingVoter { continue } if _, ok := u.failedStores[peer.StoreId]; ok { - numFailedVoters += 1 - } else { - numLiveVoters += 1 + failedPeers = append(failedPeers, peer) } } - return numFailedVoters < numLiveVoters + return failedPeers } -func (u *unsafeRecoveryController) containsFailedPeers(region *metapb.Region) bool { - for _, peer := range region.Peers { - if _, ok := u.failedStores[peer.StoreId]; ok { +var _ btree.Item = ®ionItem{} + +type regionItem struct { + report *pdpb.PeerReport + storeID uint64 +} + +// Less returns true if the region start key is less than the other. +func (r *regionItem) Less(other btree.Item) bool { + left := r.Region().GetStartKey() + right := other.(*regionItem).Region().GetStartKey() + return bytes.Compare(left, right) < 0 +} + +func (r *regionItem) Contains(key []byte) bool { + start, end := r.Region().GetStartKey(), r.Region().GetEndKey() + return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0) +} + +func (r *regionItem) Region() *metapb.Region { + return r.report.GetRegionState().GetRegion() +} + +func (r *regionItem) IsInitialized() bool { + return len(r.Region().Peers) != 0 +} + +func (r *regionItem) IsEpochStale(other *regionItem) bool { + re := r.Region().GetRegionEpoch() + oe := other.Region().GetRegionEpoch() + return re.GetVersion() < oe.GetVersion() || re.GetConfVer() < oe.GetConfVer() +} + +func (r *regionItem) IsRaftStale(origin *regionItem) bool { + rs := r.report.GetRaftState() + os := origin.report.GetRaftState() + if rs.GetHardState().GetTerm() < os.GetHardState().GetTerm() { + return true + } else if rs.GetHardState().GetTerm() == os.GetHardState().GetTerm() { + if rs.GetLastIndex() < os.GetLastIndex() { return true + } else if rs.GetLastIndex() == os.GetLastIndex() { + if rs.GetHardState().GetCommit() < os.GetHardState().GetCommit() { + return true + } else if rs.GetHardState().GetCommit() == os.GetHardState().GetCommit() { + // better use voter rather than learner + for _, peer := range r.Region().GetPeers() { + if peer.StoreId == r.storeID { + if peer.Role == metapb.PeerRole_DemotingVoter || peer.Role == metapb.PeerRole_Learner { + return true + } + } + } + } } } return false } -func keepOneReplica(storeID uint64, region *metapb.Region) { - var newPeerList []*metapb.Peer - for _, peer := range region.Peers { - if peer.StoreId == storeID { - if peer.Role != metapb.PeerRole_Voter { - peer.Role = metapb.PeerRole_Voter - } - newPeerList = append(newPeerList, peer) - } +const ( + defaultBTreeDegree = 64 +) + +type regionTree struct { + regions map[uint64]*regionItem + tree *btree.BTree +} + +func newRegionTree() *regionTree { + return ®ionTree{ + regions: make(map[uint64]*regionItem), + tree: btree.New(defaultBTreeDegree), } - region.Peers = newPeerList } -type peerStorePair struct { - peer *pdpb.PeerReport - storeID uint64 +func (t *regionTree) size() int { + return t.tree.Len() } -func getOverlapRanges(tree *btree.BTree, region *metapb.Region) []*metapb.Region { - var overlapRanges []*metapb.Region - tree.DescendLessOrEqual(regionItem{region}, func(item btree.Item) bool { - if bytes.Compare(item.(regionItem).region.StartKey, region.StartKey) < 0 && bytes.Compare(item.(regionItem).region.EndKey, region.StartKey) > 0 { - overlapRanges = append(overlapRanges, item.(regionItem).region) - } - return false - }) +func (t *regionTree) contains(regionID uint64) bool { + _, ok := t.regions[regionID] + return ok +} + +// getOverlaps gets the regions which are overlapped with the specified region range. +func (t *regionTree) getOverlaps(item *regionItem) []*regionItem { + // note that find() gets the last item that is less or equal than the region. + // in the case: |_______a_______|_____b_____|___c___| + // new region is |______d______| + // find() will return regionItem of region_a + // and both startKey of region_a and region_b are less than endKey of region_d, + // thus they are regarded as overlapped regions. + result := t.find(item) + if result == nil { + result = item + } - tree.AscendGreaterOrEqual(regionItem{region}, func(item btree.Item) bool { - if len(region.EndKey) != 0 && bytes.Compare(item.(regionItem).region.StartKey, region.EndKey) > 0 { + end := item.Region().GetEndKey() + var overlaps []*regionItem + t.tree.AscendGreaterOrEqual(result, func(i btree.Item) bool { + over := i.(*regionItem) + if len(end) > 0 && bytes.Compare(end, over.Region().GetStartKey()) <= 0 { return false } - overlapRanges = append(overlapRanges, item.(regionItem).region) + overlaps = append(overlaps, over) return true }) - return overlapRanges + return overlaps } -func (u *unsafeRecoveryController) generateRecoveryPlan() { - u.Lock() - defer u.Unlock() - newestRegionReports := make(map[uint64]*pdpb.PeerReport) - var allPeerReports []*peerStorePair +// find is a helper function to find an item that contains the regions start key. +func (t *regionTree) find(item *regionItem) *regionItem { + var result *regionItem + t.tree.DescendLessOrEqual(item, func(i btree.Item) bool { + result = i.(*regionItem) + return false + }) + + if result == nil || !result.Contains(item.Region().GetStartKey()) { + return nil + } + + return result +} + +// Insert the peer report of one region int the tree. +// It finds and deletes all the overlapped regions first, and then +// insert the new region. +func (t *regionTree) insert(item *regionItem) (bool, error) { + overlaps := t.getOverlaps(item) + + if t.contains(item.Region().GetId()) { + // it's ensured by the `buildUpFromReports` that only insert the latest peer of one region. + return false, errors.Errorf("region %v shouldn't be updated twice", item.Region().GetId()) + } + + for _, old := range overlaps { + // it's ensured by the `buildUpFromReports` that peers are inserted in epoch descending order. + if old.IsEpochStale(item) { + return false, errors.Errorf("region %v's epoch shouldn't be staler than old ones %v", item, old) + } + } + if len(overlaps) != 0 { + return false, nil + } + + t.regions[item.Region().GetId()] = item + t.tree.ReplaceOrInsert(item) + return true, nil +} + +func (u *unsafeRecoveryController) getRecoveryPlan(storeID uint64) *pdpb.RecoveryPlan { + if _, exists := u.storeRecoveryPlans[storeID]; !exists { + u.storeRecoveryPlans[storeID] = &pdpb.RecoveryPlan{} + } + return u.storeRecoveryPlans[storeID] +} + +func (u *unsafeRecoveryController) buildUpFromReports() (*regionTree, map[uint64][]*regionItem, error) { + peersMap := make(map[uint64][]*regionItem) + // Go through all the peer reports to build up the newest region tree for storeID, storeReport := range u.storeReports { for _, peerReport := range storeReport.PeerReports { - allPeerReports = append(allPeerReports, &peerStorePair{peerReport, storeID}) - regionID := peerReport.RegionState.Region.Id - if existing, ok := newestRegionReports[regionID]; ok { - if existing.RegionState.Region.RegionEpoch.Version >= peerReport.RegionState.Region.RegionEpoch.Version && - existing.RegionState.Region.RegionEpoch.ConfVer >= peerReport.RegionState.Region.RegionEpoch.Version && - existing.RaftState.LastIndex >= peerReport.RaftState.LastIndex { - continue - } - } - newestRegionReports[regionID] = peerReport + item := ®ionItem{report: peerReport, storeID: storeID} + peersMap[item.Region().GetId()] = append(peersMap[item.Region().GetId()], item) } } - recoveredRanges := btree.New(2) - healthyRegions := make(map[uint64]*pdpb.PeerReport) - inUseRegions := make(map[uint64]bool) - for _, report := range newestRegionReports { - region := report.RegionState.Region - // TODO(v01dstar): Whether the group can elect a leader should not merely rely on failed stores / peers, since it is possible that all reported peers are stale. - if u.canElectLeader(report.RegionState.Region) { - healthyRegions[region.Id] = report - inUseRegions[region.Id] = true - recoveredRanges.ReplaceOrInsert(regionItem{report.RegionState.Region}) + + // find the report of the leader + newestPeerReports := make([]*regionItem, 0, len(peersMap)) + for _, peers := range peersMap { + var latest *regionItem + for _, peer := range peers { + if latest == nil || latest.IsEpochStale(peer) { + latest = peer + } + } + if !latest.IsInitialized() { + // ignore the uninitialized peer + continue } + newestPeerReports = append(newestPeerReports, latest) } - sort.SliceStable(allPeerReports, func(i, j int) bool { - return allPeerReports[i].peer.RegionState.Region.RegionEpoch.Version > allPeerReports[j].peer.RegionState.Region.RegionEpoch.Version + + // sort in descending order of epoch + sort.SliceStable(newestPeerReports, func(i, j int) bool { + return newestPeerReports[j].IsEpochStale(newestPeerReports[i]) }) - for _, peerStorePair := range allPeerReports { - region := peerStorePair.peer.RegionState.Region - storeID := peerStorePair.storeID - lastEnd := region.StartKey - reachedTheEnd := false - var creates []*metapb.Region - var update *metapb.Region - for _, overlapRegion := range getOverlapRanges(recoveredRanges, region) { - if bytes.Compare(lastEnd, overlapRegion.StartKey) < 0 { - newRegion := proto.Clone(region).(*metapb.Region) - keepOneReplica(storeID, newRegion) - newRegion.StartKey = lastEnd - newRegion.EndKey = overlapRegion.StartKey - if _, inUse := inUseRegions[region.Id]; inUse { - newRegion.Id, _ = u.cluster.GetAllocator().Alloc() - creates = append(creates, newRegion) - } else { - inUseRegions[region.Id] = true - update = newRegion - } - recoveredRanges.ReplaceOrInsert(regionItem{newRegion}) - if len(overlapRegion.EndKey) == 0 { - reachedTheEnd = true - break - } - lastEnd = overlapRegion.EndKey - } else if len(overlapRegion.EndKey) == 0 { - reachedTheEnd = true - break - } else if bytes.Compare(overlapRegion.EndKey, lastEnd) > 0 { - lastEnd = overlapRegion.EndKey + + newestRegionTree := newRegionTree() + for _, peer := range newestPeerReports { + _, err := newestRegionTree.insert(peer) + if err != nil { + return nil, nil, err + } + } + return newestRegionTree, peersMap, nil +} + +func (u *unsafeRecoveryController) generateForceLeaderPlan(newestRegionTree *regionTree, peersMap map[uint64][]*regionItem, forCommitMerge bool) bool { + if u.err != nil { + return false + } + hasPlan := false + + selectLeader := func(region *metapb.Region) *regionItem { + var leader *regionItem + for _, peer := range peersMap[region.GetId()] { + if leader == nil || leader.IsRaftStale(peer) { + leader = peer } } - if !reachedTheEnd && (bytes.Compare(lastEnd, region.EndKey) < 0 || len(region.EndKey) == 0) { - newRegion := proto.Clone(region).(*metapb.Region) - keepOneReplica(storeID, newRegion) - newRegion.StartKey = lastEnd - newRegion.EndKey = region.EndKey - if _, inUse := inUseRegions[region.Id]; inUse { - newRegion.Id, _ = u.cluster.GetAllocator().Alloc() - creates = append(creates, newRegion) - } else { - inUseRegions[region.Id] = true - update = newRegion + return leader + } + + hasForceLeader := func(region *metapb.Region) bool { + for _, peer := range peersMap[region.GetId()] { + if peer.report.IsForceLeader { + return true } - recoveredRanges.ReplaceOrInsert(regionItem{newRegion}) } - if len(creates) != 0 || update != nil { - storeRecoveryPlan, exists := u.storeRecoveryPlans[storeID] - if !exists { - u.storeRecoveryPlans[storeID] = &pdpb.RecoveryPlan{} - storeRecoveryPlan = u.storeRecoveryPlans[storeID] + return false + } + + // Check the regions in newest Region Tree to see if it can still elect leader + // considering the failed stores + newestRegionTree.tree.Ascend(func(item btree.Item) bool { + report := item.(*regionItem).report + region := item.(*regionItem).Region() + if !u.canElectLeader(region, false) { + if hasForceLeader(region) { + // already is a force leader, skip + return true } - storeRecoveryPlan.Creates = append(storeRecoveryPlan.Creates, creates...) - if update != nil { - storeRecoveryPlan.Updates = append(storeRecoveryPlan.Updates, update) + if forCommitMerge && !report.HasCommitMerge { + // check force leader only for ones has commit merge to avoid the case that + // target region can't catch up log for the source region due to force leader + // propose an empty raft log on being leader + return true + } else if !forCommitMerge && report.HasCommitMerge { + u.err = errors.Errorf("unexpected commit merge state for report %v", report) + return false } - } else if _, healthy := healthyRegions[region.Id]; !healthy { - // If this peer contributes nothing to the recovered ranges, and it does not belong to a healthy region, delete it. - storeRecoveryPlan, exists := u.storeRecoveryPlans[storeID] - if !exists { - u.storeRecoveryPlans[storeID] = &pdpb.RecoveryPlan{} - storeRecoveryPlan = u.storeRecoveryPlans[storeID] + // the peer with largest log index/term may have lower commit/apply index, namely, lower epoch version + // so find which peer should to be the leader instead of using peer info in the region tree. + leader := selectLeader(region) + if leader == nil { + u.err = errors.Errorf("can't select leader for region %v", region) + return false } - storeRecoveryPlan.Deletes = append(storeRecoveryPlan.Deletes, region.Id) - } - } - // There may be ranges that are covered by no one. Find these empty ranges, create new regions that cover them and evenly distribute newly created regions among all stores. - lastEnd := []byte("") - var creates []*metapb.Region - recoveredRanges.Ascend(func(item btree.Item) bool { - region := item.(regionItem).region - if !bytes.Equal(region.StartKey, lastEnd) { - newRegion := &metapb.Region{} - newRegion.StartKey = lastEnd - newRegion.EndKey = region.StartKey - newRegion.Id, _ = u.cluster.GetAllocator().Alloc() - newRegion.RegionEpoch = &metapb.RegionEpoch{ConfVer: 1, Version: 1} - creates = append(creates, newRegion) + storeRecoveryPlan := u.getRecoveryPlan(leader.storeID) + if storeRecoveryPlan.ForceLeader == nil { + storeRecoveryPlan.ForceLeader = &pdpb.ForceLeader{} + for store := range u.failedStores { + storeRecoveryPlan.ForceLeader.FailedStores = append(storeRecoveryPlan.ForceLeader.FailedStores, store) + } + } + storeRecoveryPlan.ForceLeader.EnterForceLeaders = append(storeRecoveryPlan.ForceLeader.EnterForceLeaders, region.GetId()) + u.recordAffectedRegion(leader.Region()) + hasPlan = true } - lastEnd = region.EndKey return true }) - if !bytes.Equal(lastEnd, []byte("")) { - newRegion := &metapb.Region{} - newRegion.StartKey = lastEnd - newRegion.Id, _ = u.cluster.GetAllocator().Alloc() - creates = append(creates, newRegion) - } - var allStores []uint64 - for storeID := range u.storeReports { - allStores = append(allStores, storeID) - } - for idx, create := range creates { - storeID := allStores[idx%len(allStores)] - peerID, _ := u.cluster.GetAllocator().Alloc() - create.Peers = []*metapb.Peer{{Id: peerID, StoreId: storeID, Role: metapb.PeerRole_Voter}} - storeRecoveryPlan, exists := u.storeRecoveryPlans[storeID] - if !exists { - u.storeRecoveryPlans[storeID] = &pdpb.RecoveryPlan{} - storeRecoveryPlan = u.storeRecoveryPlans[storeID] - } - storeRecoveryPlan.Creates = append(storeRecoveryPlan.Creates, create) - } - log.Info("Plan generated") - if len(u.storeRecoveryPlans) == 0 { - log.Info("Nothing to do") - u.stage = finished - return - } - for store, plan := range u.storeRecoveryPlans { - log.Info("Store plan", zap.String("store", strconv.FormatUint(store, 10)), zap.String("plan", proto.MarshalTextString(plan))) - } - u.stage = recovering -} -func getPeerDigest(peer *metapb.Peer) string { - return strconv.FormatUint(peer.Id, 10) + ", " + strconv.FormatUint(peer.StoreId, 10) + ", " + peer.Role.String() -} + // TODO: need to resolve the case 2 + // it's hard to distinguish it with unfinished split region + // and it's rare, so won't do it now -func getRegionDigest(region *metapb.Region) string { - if region == nil { - return "nil" - } - regionID := strconv.FormatUint(region.Id, 10) - regionStartKey := core.HexRegionKeyStr(region.StartKey) - regionEndKey := core.HexRegionKeyStr(region.EndKey) - var peers string - for _, peer := range region.Peers { - peers += "(" + getPeerDigest(peer) + "), " - } - return fmt.Sprintf("region %s [%s, %s) {%s}", regionID, regionStartKey, regionEndKey, peers) + return hasPlan } -func getStoreDigest(storeReport *pdpb.StoreReport) string { - if storeReport == nil { - return "nil" - } - var result string - for _, peerReport := range storeReport.PeerReports { - result += getRegionDigest(peerReport.RegionState.Region) + ", " +func (u *unsafeRecoveryController) generateDemoteFailedVoterPlan(newestRegionTree *regionTree, peersMap map[uint64][]*regionItem) bool { + if u.err != nil { + return false } - return result -} + hasPlan := false -// Show returns the current status of ongoing unsafe recover operation. -func (u *unsafeRecoveryController) Show() []string { - u.RLock() - defer u.RUnlock() - switch u.stage { - case ready: - return []string{"No on-going operation."} - case collectingClusterInfo: - var status []string - status = append(status, fmt.Sprintf("Collecting cluster info from all alive stores, %d/%d.", u.numStoresReported, len(u.storeReports))) - var reported, unreported string - for storeID, report := range u.storeReports { - if report == nil { - unreported += strconv.FormatUint(storeID, 10) + "," - } else { - reported += strconv.FormatUint(storeID, 10) + "," + findForceLeader := func(peersMap map[uint64][]*regionItem, region *metapb.Region) *regionItem { + var leader *regionItem + for _, peer := range peersMap[region.GetId()] { + if peer.report.IsForceLeader { + leader = peer + break } } - status = append(status, "Stores that have reported to PD: "+reported) - status = append(status, "Stores that have not reported to PD: "+unreported) - return status - case recovering: - var status []string - status = append(status, fmt.Sprintf("Waiting for recover commands being applied, %d/%d", u.numStoresPlanExecuted, len(u.storeRecoveryPlans))) - status = append(status, "Recovery plan:") - for storeID, plan := range u.storeRecoveryPlans { - planDigest := "Store " + strconv.FormatUint(storeID, 10) + ", creates: " - for _, create := range plan.Creates { - planDigest += getRegionDigest(create) + ", " - } - planDigest += "; updates: " - for _, update := range plan.Updates { - planDigest += getRegionDigest(update) + ", " - } - planDigest += "; deletes: " - for _, deletion := range plan.Deletes { - planDigest += strconv.FormatUint(deletion, 10) + ", " + return leader + } + + // Check the regions in newest Region Tree to see if it can still elect leader + // considering the failed stores + newestRegionTree.tree.Ascend(func(item btree.Item) bool { + region := item.(*regionItem).Region() + if !u.canElectLeader(region, false) { + leader := findForceLeader(peersMap, region) + if leader == nil { + // can't find the force leader, maybe a newly split region, skip + return true } - status = append(status, planDigest) + storeRecoveryPlan := u.getRecoveryPlan(leader.storeID) + storeRecoveryPlan.Demotes = append(storeRecoveryPlan.Demotes, + &pdpb.DemoteFailedVoters{ + RegionId: region.GetId(), + FailedVoters: u.getFailedPeers(leader.Region()), + }, + ) + u.recordAffectedRegion(leader.Region()) + hasPlan = true } - status = append(status, "Execution progess:") - for storeID, applied := range u.executionResults { - if !applied { - status = append(status, strconv.FormatUint(storeID, 10)+"not yet applied, last report: "+getStoreDigest(u.executionReports[storeID])) + return true + }) + + // Tombstone the peers of region not presented in the newest region tree + for storeID, storeReport := range u.storeReports { + for _, peerReport := range storeReport.PeerReports { + region := peerReport.GetRegionState().Region + if !newestRegionTree.contains(region.GetId()) { + if !u.canElectLeader(region, false) { + // the peer is not in the valid regions, should be deleted directly + storeRecoveryPlan := u.getRecoveryPlan(storeID) + storeRecoveryPlan.Tombstones = append(storeRecoveryPlan.Tombstones, region.GetId()) + u.recordAffectedRegion(region) + hasPlan = true + } } } - return status - case finished: - return []string{"Last recovery has finished."} } - return []string{"Undefined status"} + return hasPlan } -// History returns the history logs of the current unsafe recover operation. -func (u *unsafeRecoveryController) History() []string { - u.RLock() - defer u.RUnlock() - if u.stage <= ready { - return []string{"No unasfe recover has been triggered since PD restarted."} +func (u *unsafeRecoveryController) generateCreateEmptyRegionPlan(newestRegionTree *regionTree, peersMap map[uint64][]*regionItem) bool { + if u.err != nil { + return false } - var history []string - if u.stage >= collectingClusterInfo { - history = append(history, "Store reports collection:") - for storeID, report := range u.storeReports { - if report == nil { - history = append(history, "Store "+strconv.FormatUint(storeID, 10)+": waiting for report.") - } else { - history = append(history, "Store "+strconv.FormatUint(storeID, 10)+": "+getStoreDigest(report)) - } + hasPlan := false + + createRegion := func(startKey, endKey []byte, storeID uint64) (*metapb.Region, error) { + regionID, err := u.cluster.GetAllocator().Alloc() + if err != nil { + return nil, err } + peerID, err := u.cluster.GetAllocator().Alloc() + if err != nil { + return nil, err + } + return &metapb.Region{ + Id: regionID, + StartKey: startKey, + EndKey: endKey, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{{Id: peerID, StoreId: storeID, Role: metapb.PeerRole_Voter}}, + }, nil } - if u.stage >= recovering { - history = append(history, "Recovery plan:") - for storeID, plan := range u.storeRecoveryPlans { - planDigest := "Store " + strconv.FormatUint(storeID, 10) + ", creates: " - for _, create := range plan.Creates { - planDigest += getRegionDigest(create) + ", " + + // There may be ranges that are covered by no one. Find these empty ranges, create new + // regions that cover them and evenly distribute newly created regions among all stores. + lastEnd := []byte("") + var lastStoreID uint64 + newestRegionTree.tree.Ascend(func(item btree.Item) bool { + region := item.(*regionItem).Region() + storeID := item.(*regionItem).storeID + if !bytes.Equal(region.StartKey, lastEnd) { + newRegion, err := createRegion(lastEnd, region.StartKey, storeID) + if err != nil { + u.err = err + return false } - planDigest += "; updates: " - for _, update := range plan.Updates { - planDigest += getRegionDigest(update) + ", " + // paranoid check: shouldn't overlap with any of the peers + for _, peers := range peersMap { + for _, peer := range peers { + if !peer.IsInitialized() { + continue + } + if (bytes.Compare(newRegion.StartKey, peer.Region().StartKey) <= 0 && + (len(newRegion.EndKey) == 0 || bytes.Compare(peer.Region().StartKey, newRegion.EndKey) < 0)) || + ((len(peer.Region().EndKey) == 0 || bytes.Compare(newRegion.StartKey, peer.Region().EndKey) < 0) && + (len(newRegion.EndKey) == 0 || (len(peer.Region().EndKey) != 0 && bytes.Compare(peer.Region().EndKey, newRegion.EndKey) <= 0))) { + u.err = errors.Errorf("Find overlap peer %v with newly created empty region %v", core.RegionToHexMeta(peer.Region()), core.RegionToHexMeta(newRegion)) + return false + } + } } - planDigest += "; deletes: " - for _, deletion := range plan.Deletes { - planDigest += strconv.FormatUint(deletion, 10) + ", " + storeRecoveryPlan := u.getRecoveryPlan(storeID) + storeRecoveryPlan.Creates = append(storeRecoveryPlan.Creates, newRegion) + u.recordAffectedRegion(newRegion) + hasPlan = true + } + lastEnd = region.EndKey + lastStoreID = storeID + return true + }) + if u.err != nil { + return false + } + + if !bytes.Equal(lastEnd, []byte("")) || newestRegionTree.size() == 0 { + if lastStoreID == 0 { + // the last store id is invalid, so choose a random one + for storeID := range u.storeReports { + lastStoreID = storeID + break } - history = append(history, planDigest) } - history = append(history, "Execution progress:") - for storeID, applied := range u.executionResults { - executionDigest := "Store " + strconv.FormatUint(storeID, 10) - if !applied { - executionDigest += "not yet finished, " - } else { - executionDigest += "finished, " + newRegion, err := createRegion(lastEnd, []byte(""), lastStoreID) + if err != nil { + u.err = err + return false + } + storeRecoveryPlan := u.getRecoveryPlan(lastStoreID) + storeRecoveryPlan.Creates = append(storeRecoveryPlan.Creates, newRegion) + u.recordAffectedRegion(newRegion) + hasPlan = true + } + return hasPlan +} + +func (u *unsafeRecoveryController) generateExitForceLeaderPlan() bool { + for storeID, storeReport := range u.storeReports { + for _, peerReport := range storeReport.PeerReports { + if peerReport.IsForceLeader { + _ = u.getRecoveryPlan(storeID) + break } - executionDigest += getStoreDigest(u.executionReports[storeID]) - history = append(history, executionDigest) } } - return history + return false } diff --git a/server/cluster/unsafe_recovery_controller_test.go b/server/cluster/unsafe_recovery_controller_test.go index 8455b201c98..4b38f9499ba 100644 --- a/server/cluster/unsafe_recovery_controller_test.go +++ b/server/cluster/unsafe_recovery_controller_test.go @@ -15,11 +15,11 @@ package cluster import ( - "bytes" "context" "time" . "github.com/pingcap/check" + "github.com/pingcap/kvproto/pkg/eraftpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/kvproto/pkg/raft_serverpb" @@ -29,604 +29,1201 @@ import ( "github.com/tikv/pd/server/storage" ) -var _ = Suite(&testUnsafeRecoverSuite{}) +var _ = Suite(&testUnsafeRecoverySuite{}) -type testUnsafeRecoverSuite struct { +type testUnsafeRecoverySuite struct { ctx context.Context cancel context.CancelFunc } -func (s *testUnsafeRecoverSuite) TearDownTest(c *C) { +func (s *testUnsafeRecoverySuite) TearDownTest(c *C) { s.cancel() } -func (s *testUnsafeRecoverSuite) SetUpTest(c *C) { +func (s *testUnsafeRecoverySuite) SetUpTest(c *C) { s.ctx, s.cancel = context.WithCancel(context.Background()) } -func (s *testUnsafeRecoverSuite) TestPlanGenerationOneHealthyRegion(c *C) { +func newStoreHeartbeat(storeID uint64, report *pdpb.StoreReport) *pdpb.StoreHeartbeatRequest { + return &pdpb.StoreHeartbeatRequest{ + Stats: &pdpb.StoreStats{ + StoreId: storeID, + }, + StoreReport: report, + } +} + +func applyRecoveryPlan(c *C, storeID uint64, storeReports map[uint64]*pdpb.StoreReport, resp *pdpb.StoreHeartbeatResponse) { + plan := resp.GetRecoveryPlan() + if plan == nil { + return + } + + reports := storeReports[storeID] + reports.Step = plan.GetStep() + + forceLeaders := plan.GetForceLeader() + if forceLeaders != nil { + for _, forceLeader := range forceLeaders.GetEnterForceLeaders() { + for _, report := range reports.PeerReports { + region := report.GetRegionState().GetRegion() + if region.GetId() == forceLeader { + report.IsForceLeader = true + break + } + } + } + return + } + + for _, create := range plan.GetCreates() { + reports.PeerReports = append(reports.PeerReports, &pdpb.PeerReport{ + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: create, + }, + }) + } + + for _, tombstone := range plan.GetTombstones() { + for i, report := range reports.PeerReports { + if report.GetRegionState().GetRegion().GetId() == tombstone { + reports.PeerReports = append(reports.PeerReports[:i], reports.PeerReports[i+1:]...) + break + } + } + } + + for _, demote := range plan.GetDemotes() { + for store, storeReport := range storeReports { + for _, report := range storeReport.PeerReports { + region := report.GetRegionState().GetRegion() + if region.GetId() == demote.GetRegionId() { + for _, peer := range region.GetPeers() { + // promote learner + if peer.StoreId == storeID && peer.Role == metapb.PeerRole_Learner { + peer.Role = metapb.PeerRole_Voter + } + // exit joint state + if peer.Role == metapb.PeerRole_DemotingVoter { + peer.Role = metapb.PeerRole_Learner + } else if peer.Role == metapb.PeerRole_IncomingVoter { + peer.Role = metapb.PeerRole_Voter + } + } + for _, failedVoter := range demote.GetFailedVoters() { + for _, peer := range region.GetPeers() { + if failedVoter.GetId() == peer.GetId() { + peer.Role = metapb.PeerRole_Learner + break + } + } + } + region.RegionEpoch.ConfVer += 1 + if store == storeID { + c.Assert(report.IsForceLeader, IsTrue) + } + break + } + } + } + } + + for _, report := range reports.PeerReports { + report.IsForceLeader = false + } +} + +func advanceUntilFinished(c *C, recoveryController *unsafeRecoveryController, reports map[uint64]*pdpb.StoreReport) { + retry := 0 + + for { + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + applyRecoveryPlan(c, storeID, reports, resp) + } + if recoveryController.GetStage() == finished { + break + } else if recoveryController.GetStage() == failed { + panic("failed to recovery") + } else if retry >= 10 { + panic("retry timeout") + } + retry += 1 + } +} + +func (s *testUnsafeRecoverySuite) TestFinished(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) - recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 3: "", + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 60), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, - RegionState: &raft_serverpb.RegionLocalState{ - Region: &metapb.Region{ - Id: 1, - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, - Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, - }}, - 2: {PeerReports: []*pdpb.PeerReport{ - { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, + Id: 1001, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, Peers: []*metapb.Peer{ {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, }}, } - recoveryController.generateRecoveryPlan() - // Rely on PD replica checker to remove failed stores. - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 0) + c.Assert(recoveryController.GetStage(), Equals, collectReport) + for storeID := range reports { + req := newStoreHeartbeat(storeID, nil) + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + // require peer report by empty plan + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(len(resp.RecoveryPlan.Creates), Equals, 0) + c.Assert(len(resp.RecoveryPlan.Demotes), Equals, 0) + c.Assert(resp.RecoveryPlan.ForceLeader, IsNil) + c.Assert(resp.RecoveryPlan.Step, Equals, uint64(1)) + applyRecoveryPlan(c, storeID, reports, resp) + } + + // receive all reports and dispatch plan + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(resp.RecoveryPlan.ForceLeader, NotNil) + c.Assert(len(resp.RecoveryPlan.ForceLeader.EnterForceLeaders), Equals, 1) + c.Assert(resp.RecoveryPlan.ForceLeader.FailedStores, NotNil) + applyRecoveryPlan(c, storeID, reports, resp) + } + c.Assert(recoveryController.GetStage(), Equals, forceLeader) + + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(len(resp.RecoveryPlan.Demotes), Equals, 1) + applyRecoveryPlan(c, storeID, reports, resp) + } + c.Assert(recoveryController.GetStage(), Equals, demoteFailedVoter) + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, IsNil) + // remove the two failed peers + applyRecoveryPlan(c, storeID, reports, resp) + } + c.Assert(recoveryController.GetStage(), Equals, finished) } -func (s *testUnsafeRecoverSuite) TestPlanGenerationOneUnhealthyRegion(c *C) { +func (s *testUnsafeRecoverySuite) TestFailed(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) - recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 2: "", - 3: "", + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 60), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, + Id: 1001, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, Peers: []*metapb.Peer{ {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, }}, } - recoveryController.generateRecoveryPlan() - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 1) - store1Plan, ok := recoveryController.storeRecoveryPlans[1] - c.Assert(ok, IsTrue) - c.Assert(len(store1Plan.Updates), Equals, 1) - update := store1Plan.Updates[0] - c.Assert(bytes.Compare(update.StartKey, []byte("")), Equals, 0) - c.Assert(bytes.Compare(update.EndKey, []byte("")), Equals, 0) - c.Assert(len(update.Peers), Equals, 1) - c.Assert(update.Peers[0].StoreId, Equals, uint64(1)) + c.Assert(recoveryController.GetStage(), Equals, collectReport) + // require peer report + for storeID := range reports { + req := newStoreHeartbeat(storeID, nil) + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(len(resp.RecoveryPlan.Creates), Equals, 0) + c.Assert(len(resp.RecoveryPlan.Demotes), Equals, 0) + c.Assert(resp.RecoveryPlan.ForceLeader, IsNil) + applyRecoveryPlan(c, storeID, reports, resp) + } + + // receive all reports and dispatch plan + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(resp.RecoveryPlan.ForceLeader, NotNil) + c.Assert(len(resp.RecoveryPlan.ForceLeader.EnterForceLeaders), Equals, 1) + c.Assert(resp.RecoveryPlan.ForceLeader.FailedStores, NotNil) + applyRecoveryPlan(c, storeID, reports, resp) + } + c.Assert(recoveryController.GetStage(), Equals, forceLeader) + + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(len(resp.RecoveryPlan.Demotes), Equals, 1) + applyRecoveryPlan(c, storeID, reports, resp) + } + c.Assert(recoveryController.GetStage(), Equals, demoteFailedVoter) + + // received heartbeat from failed store, abort + req := newStoreHeartbeat(2, nil) + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, IsNil) + c.Assert(recoveryController.GetStage(), Equals, exitForceLeader) + + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, NotNil) + applyRecoveryPlan(c, storeID, reports, resp) + } + + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + applyRecoveryPlan(c, storeID, reports, resp) + } + c.Assert(recoveryController.GetStage(), Equals, failed) +} + +func (s *testUnsafeRecoverySuite) TestForceLeaderFail(c *C) { + _, opt, _ := newTestScheduleConfig() + cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 1), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ + 1: { + PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + }, + }, + } + + req := newStoreHeartbeat(1, reports[1]) + resp := &pdpb.StoreHeartbeatResponse{} + req.StoreReport.Step = 1 + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, forceLeader) + + applyRecoveryPlan(c, 1, reports, resp) + // force leader doesn't succeed + reports[1].PeerReports[0].IsForceLeader = false + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, forceLeader) + + // force leader succeed this time + applyRecoveryPlan(c, 1, reports, resp) + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, demoteFailedVoter) } -func (s *testUnsafeRecoverSuite) TestPlanGenerationEmptyRange(c *C) { +func (s *testUnsafeRecoverySuite) TestForceLeaderForCommitMerge(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 3: "", + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 1), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ + 1: { + PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1002, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}, + HasCommitMerge: true, + }, + }, + }, } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ + + req := newStoreHeartbeat(1, reports[1]) + resp := &pdpb.StoreHeartbeatResponse{} + req.StoreReport.Step = 1 + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, forceLeaderForCommitMerge) + + // force leader on regions of commit merge first + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(resp.RecoveryPlan.ForceLeader, NotNil) + c.Assert(len(resp.RecoveryPlan.ForceLeader.EnterForceLeaders), Equals, 1) + c.Assert(resp.RecoveryPlan.ForceLeader.EnterForceLeaders[0], Equals, uint64(1002)) + c.Assert(resp.RecoveryPlan.ForceLeader.FailedStores, NotNil) + applyRecoveryPlan(c, 1, reports, resp) + + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, forceLeader) + + // force leader on the rest regions + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(resp.RecoveryPlan.ForceLeader, NotNil) + c.Assert(len(resp.RecoveryPlan.ForceLeader.EnterForceLeaders), Equals, 1) + c.Assert(resp.RecoveryPlan.ForceLeader.EnterForceLeaders[0], Equals, uint64(1001)) + c.Assert(resp.RecoveryPlan.ForceLeader.FailedStores, NotNil) + applyRecoveryPlan(c, 1, reports, resp) + + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, demoteFailedVoter) +} + +func (s *testUnsafeRecoverySuite) TestOneLearner(c *C) { + _, opt, _ := newTestScheduleConfig() + cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 60), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, - EndKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 2}, + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + {Id: 11, StoreId: 1, Role: metapb.PeerRole_Learner}, {Id: 12, StoreId: 2}, {Id: 13, StoreId: 3}}}}}, }}, - 2: {PeerReports: []*pdpb.PeerReport{ + } + + advanceUntilFinished(c, recoveryController, reports) + + expects := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 2, - StartKey: []byte("d"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 2}, + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, Peers: []*metapb.Peer{ - {Id: 12, StoreId: 1}, {Id: 22, StoreId: 2}, {Id: 32, StoreId: 3}}}}}, + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 2, Role: metapb.PeerRole_Learner}, {Id: 13, StoreId: 3, Role: metapb.PeerRole_Learner}}}}}, }}, } - recoveryController.generateRecoveryPlan() - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 1) - for storeID, plan := range recoveryController.storeRecoveryPlans { - c.Assert(len(plan.Creates), Equals, 1) - create := plan.Creates[0] - c.Assert(bytes.Compare(create.StartKey, []byte("c")), Equals, 0) - c.Assert(bytes.Compare(create.EndKey, []byte("d")), Equals, 0) - c.Assert(len(create.Peers), Equals, 1) - c.Assert(create.Peers[0].StoreId, Equals, storeID) - c.Assert(create.Peers[0].Role, Equals, metapb.PeerRole_Voter) + + for storeID, report := range reports { + if result, ok := expects[storeID]; ok { + c.Assert(report.PeerReports, DeepEquals, result.PeerReports) + } else { + c.Assert(len(report.PeerReports), Equals, 0) + } } } -func (s *testUnsafeRecoverSuite) TestPlanGenerationEmptyRangeAtTheEnd(c *C) { +func (s *testUnsafeRecoverySuite) TestUninitializedPeer(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) - recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 3: "", + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 60), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ + // uninitialized region that has no peer list Region: &metapb.Region{ - Id: 1, - StartKey: []byte(""), - EndKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 2}, - Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + Id: 1001, + }}}, }}, - 2: {PeerReports: []*pdpb.PeerReport{ + } + + advanceUntilFinished(c, recoveryController, reports) + + expects := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ Id: 1, StartKey: []byte(""), - EndKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 2}, + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + {Id: 2, StoreId: 1}}}}}, }}, } - recoveryController.generateRecoveryPlan() - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 1) - for storeID, plan := range recoveryController.storeRecoveryPlans { - c.Assert(len(plan.Creates), Equals, 1) - create := plan.Creates[0] - c.Assert(bytes.Compare(create.StartKey, []byte("c")), Equals, 0) - c.Assert(bytes.Compare(create.EndKey, []byte("")), Equals, 0) - c.Assert(len(create.Peers), Equals, 1) - c.Assert(create.Peers[0].StoreId, Equals, storeID) - c.Assert(create.Peers[0].Role, Equals, metapb.PeerRole_Voter) + + for storeID, report := range reports { + if result, ok := expects[storeID]; ok { + c.Assert(report.PeerReports, DeepEquals, result.PeerReports) + } else { + c.Assert(len(report.PeerReports), Equals, 0) + } } } -func (s *testUnsafeRecoverSuite) TestPlanGenerationUseNewestRanges(c *C) { +func (s *testUnsafeRecoverySuite) TestJointState(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) - recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 3: "", - 4: "", + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(5, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 4: {}, + 5: {}, + }, 3600), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, + Id: 1001, StartKey: []byte(""), - EndKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 20}, + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 31, StoreId: 3}, {Id: 41, StoreId: 4}}}}}, + {Id: 11, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 12, StoreId: 4, Role: metapb.PeerRole_DemotingVoter}, + {Id: 13, StoreId: 5, Role: metapb.PeerRole_DemotingVoter}, + {Id: 14, StoreId: 2, Role: metapb.PeerRole_IncomingVoter}, + {Id: 15, StoreId: 3, Role: metapb.PeerRole_IncomingVoter}, + }}}}, { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 2, - StartKey: []byte("a"), - EndKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 10}, + Id: 1002, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 3, Version: 6}, Peers: []*metapb.Peer{ - {Id: 12, StoreId: 1}, {Id: 22, StoreId: 2}, {Id: 32, StoreId: 3}}}}}, - { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, - RegionState: &raft_serverpb.RegionLocalState{ - Region: &metapb.Region{ - Id: 4, - StartKey: []byte("m"), - EndKey: []byte("p"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 10}, - Peers: []*metapb.Peer{ - {Id: 14, StoreId: 1}, {Id: 24, StoreId: 2}, {Id: 44, StoreId: 4}}}}}, + {Id: 21, StoreId: 1, Role: metapb.PeerRole_DemotingVoter}, + {Id: 22, StoreId: 4}, + {Id: 23, StoreId: 5}, + {Id: 24, StoreId: 2, Role: metapb.PeerRole_IncomingVoter}, + }}}}, }}, 2: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 3, - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 5}, + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, Peers: []*metapb.Peer{ - {Id: 23, StoreId: 2}, {Id: 33, StoreId: 3}, {Id: 43, StoreId: 4}}}}}, + {Id: 11, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 12, StoreId: 4, Role: metapb.PeerRole_DemotingVoter}, + {Id: 13, StoreId: 5, Role: metapb.PeerRole_DemotingVoter}, + {Id: 14, StoreId: 2, Role: metapb.PeerRole_IncomingVoter}, + {Id: 15, StoreId: 3, Role: metapb.PeerRole_IncomingVoter}, + }}}}, { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 2, - StartKey: []byte("a"), - EndKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 10}, + Id: 1002, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 3, Version: 6}, Peers: []*metapb.Peer{ - {Id: 12, StoreId: 1}, {Id: 22, StoreId: 2}, {Id: 32, StoreId: 3}}}}}, + {Id: 21, StoreId: 1, Role: metapb.PeerRole_DemotingVoter}, + {Id: 22, StoreId: 4}, + {Id: 23, StoreId: 5}, + {Id: 24, StoreId: 2, Role: metapb.PeerRole_IncomingVoter}, + }}}}, + }}, + 3: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 4, - StartKey: []byte("m"), - EndKey: []byte("p"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 10}, + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, Peers: []*metapb.Peer{ - {Id: 14, StoreId: 1}, {Id: 24, StoreId: 2}, {Id: 44, StoreId: 4}}}}}, + {Id: 11, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 12, StoreId: 4, Role: metapb.PeerRole_DemotingVoter}, + {Id: 13, StoreId: 5, Role: metapb.PeerRole_DemotingVoter}, + {Id: 14, StoreId: 2, Role: metapb.PeerRole_IncomingVoter}, + {Id: 15, StoreId: 3, Role: metapb.PeerRole_IncomingVoter}, + }}}}, }}, } - recoveryController.generateRecoveryPlan() - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 2) - store1Plan, ok := recoveryController.storeRecoveryPlans[1] - c.Assert(ok, IsTrue) - updatedRegion1 := store1Plan.Updates[0] - c.Assert(updatedRegion1.Id, Equals, uint64(1)) - c.Assert(len(updatedRegion1.Peers), Equals, 1) - c.Assert(bytes.Compare(updatedRegion1.StartKey, []byte("")), Equals, 0) - c.Assert(bytes.Compare(updatedRegion1.EndKey, []byte("a")), Equals, 0) - - store2Plan := recoveryController.storeRecoveryPlans[2] - updatedRegion3 := store2Plan.Updates[0] - c.Assert(updatedRegion3.Id, Equals, uint64(3)) - c.Assert(len(updatedRegion3.Peers), Equals, 1) - c.Assert(bytes.Compare(updatedRegion3.StartKey, []byte("c")), Equals, 0) - c.Assert(bytes.Compare(updatedRegion3.EndKey, []byte("m")), Equals, 0) - create := store2Plan.Creates[0] - c.Assert(bytes.Compare(create.StartKey, []byte("p")), Equals, 0) - c.Assert(bytes.Compare(create.EndKey, []byte("")), Equals, 0) -} -func (s *testUnsafeRecoverSuite) TestPlanGenerationMembershipChange(c *C) { - _, opt, _ := newTestScheduleConfig() - cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) - recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 4: "", - 5: "", - } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ + advanceUntilFinished(c, recoveryController, reports) + + expects := map[uint64]*pdpb.StoreReport{ 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, - EndKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 2}, + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 41, StoreId: 4}, {Id: 51, StoreId: 5}}}}}, + {Id: 11, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 12, StoreId: 4, Role: metapb.PeerRole_Learner}, + {Id: 13, StoreId: 5, Role: metapb.PeerRole_Learner}, + {Id: 14, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 15, StoreId: 3, Role: metapb.PeerRole_Voter}, + }}}}, { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 2, - StartKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 2}, + Id: 1002, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 4, Version: 6}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + {Id: 21, StoreId: 1, Role: metapb.PeerRole_Learner}, + {Id: 22, StoreId: 4, Role: metapb.PeerRole_Learner}, + {Id: 23, StoreId: 5, Role: metapb.PeerRole_Learner}, + {Id: 24, StoreId: 2, Role: metapb.PeerRole_Voter}, + }}}}, }}, 2: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + {Id: 11, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 12, StoreId: 4, Role: metapb.PeerRole_Learner}, + {Id: 13, StoreId: 5, Role: metapb.PeerRole_Learner}, + {Id: 14, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 15, StoreId: 3, Role: metapb.PeerRole_Voter}, + }}}}, { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 2, - StartKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 2}, + Id: 1002, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 4, Version: 6}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + {Id: 21, StoreId: 1, Role: metapb.PeerRole_Learner}, + {Id: 22, StoreId: 4, Role: metapb.PeerRole_Learner}, + {Id: 23, StoreId: 5, Role: metapb.PeerRole_Learner}, + {Id: 24, StoreId: 2, Role: metapb.PeerRole_Voter}, + }}}}, }}, 3: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, - Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, - { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, - RegionState: &raft_serverpb.RegionLocalState{ - Region: &metapb.Region{ - Id: 2, - StartKey: []byte("c"), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 2}, + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + {Id: 11, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 12, StoreId: 4, Role: metapb.PeerRole_Learner}, + {Id: 13, StoreId: 5, Role: metapb.PeerRole_Learner}, + {Id: 14, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 15, StoreId: 3, Role: metapb.PeerRole_Voter}, + }}}}, }}, } - recoveryController.generateRecoveryPlan() - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 3) - store1Plan, ok := recoveryController.storeRecoveryPlans[1] - c.Assert(ok, IsTrue) - updatedRegion1 := store1Plan.Updates[0] - c.Assert(updatedRegion1.Id, Equals, uint64(1)) - c.Assert(len(updatedRegion1.Peers), Equals, 1) - c.Assert(bytes.Compare(updatedRegion1.StartKey, []byte("")), Equals, 0) - c.Assert(bytes.Compare(updatedRegion1.EndKey, []byte("c")), Equals, 0) - - store2Plan := recoveryController.storeRecoveryPlans[2] - deleteStaleRegion1 := store2Plan.Deletes[0] - c.Assert(deleteStaleRegion1, Equals, uint64(1)) - - store3Plan := recoveryController.storeRecoveryPlans[3] - deleteStaleRegion1 = store3Plan.Deletes[0] - c.Assert(deleteStaleRegion1, Equals, uint64(1)) + + for storeID, report := range reports { + if result, ok := expects[storeID]; ok { + c.Assert(report.PeerReports, DeepEquals, result.PeerReports) + } else { + c.Assert(len(report.PeerReports), Equals, 0) + } + } } -func (s *testUnsafeRecoverSuite) TestPlanGenerationPromotingLearner(c *C) { +func (s *testUnsafeRecoverySuite) TestTimeout(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 2: "", - 3: "", + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 1), IsNil) + + time.Sleep(time.Second) + req := newStoreHeartbeat(1, nil) + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, exitForceLeader) + req.StoreReport = &pdpb.StoreReport{Step: 2} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, failed) +} + +func (s *testUnsafeRecoverySuite) TestExitForceLeader(c *C) { + _, opt, _ := newTestScheduleConfig() + cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ - 1: {PeerReports: []*pdpb.PeerReport{ - { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, - RegionState: &raft_serverpb.RegionLocalState{ - Region: &metapb.Region{ - Id: 1, - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, - Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1, Role: metapb.PeerRole_Learner}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, - }}, + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 1), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ + 1: { + PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2, Role: metapb.PeerRole_Learner}, {Id: 31, StoreId: 3, Role: metapb.PeerRole_Learner}}}}, + IsForceLeader: true, + }, + }, + }, + } + + advanceUntilFinished(c, recoveryController, reports) + + expects := map[uint64]*pdpb.StoreReport{ + 1: { + PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2, Role: metapb.PeerRole_Learner}, {Id: 31, StoreId: 3, Role: metapb.PeerRole_Learner}}}}}, + }, + }, + } + + for storeID, report := range reports { + if result, ok := expects[storeID]; ok { + c.Assert(report.PeerReports, DeepEquals, result.PeerReports) + } else { + c.Assert(len(report.PeerReports), Equals, 0) + } } - recoveryController.generateRecoveryPlan() - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 1) - store1Plan, ok := recoveryController.storeRecoveryPlans[1] - c.Assert(ok, IsTrue) - c.Assert(len(store1Plan.Updates), Equals, 1) - update := store1Plan.Updates[0] - c.Assert(bytes.Compare(update.StartKey, []byte("")), Equals, 0) - c.Assert(bytes.Compare(update.EndKey, []byte("")), Equals, 0) - c.Assert(len(update.Peers), Equals, 1) - c.Assert(update.Peers[0].StoreId, Equals, uint64(1)) - c.Assert(update.Peers[0].Role, Equals, metapb.PeerRole_Voter) } -func (s *testUnsafeRecoverSuite) TestPlanGenerationKeepingOneReplica(c *C) { +func (s *testUnsafeRecoverySuite) TestStep(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.failedStores = map[uint64]string{ - 3: "", - 4: "", + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 1), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ + 1: { + PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + }, + }, + } + + req := newStoreHeartbeat(1, reports[1]) + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + // step is not set, ignore + c.Assert(recoveryController.GetStage(), Equals, collectReport) + + // valid store report + req.StoreReport.Step = 1 + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, forceLeader) + + // duplicate report with same step, ignore + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, forceLeader) + applyRecoveryPlan(c, 1, reports, resp) + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, demoteFailedVoter) + applyRecoveryPlan(c, 1, reports, resp) + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(recoveryController.GetStage(), Equals, finished) +} + +func (s *testUnsafeRecoverySuite) TestOnHealthyRegions(c *C) { + _, opt, _ := newTestScheduleConfig() + cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(5, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) } - recoveryController.storeReports = map[uint64]*pdpb.StoreReport{ + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 4: {}, + 5: {}, + }, 60), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, + Id: 1001, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}, {Id: 41, StoreId: 4}}}}}, + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, }}, 2: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, + Id: 1001, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}, {Id: 41, StoreId: 4}}}}}, + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, + }}, + 3: {PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}}}}}, }}, } - recoveryController.generateRecoveryPlan() - c.Assert(len(recoveryController.storeRecoveryPlans), Equals, 2) - foundUpdate := false - foundDelete := false - for storeID, plan := range recoveryController.storeRecoveryPlans { - if len(plan.Updates) == 1 { - foundUpdate = true - update := plan.Updates[0] - c.Assert(bytes.Compare(update.StartKey, []byte("")), Equals, 0) - c.Assert(bytes.Compare(update.EndKey, []byte("")), Equals, 0) - c.Assert(len(update.Peers), Equals, 1) - c.Assert(update.Peers[0].StoreId, Equals, storeID) - } else if len(plan.Deletes) == 1 { - foundDelete = true - c.Assert(plan.Deletes[0], Equals, uint64(1)) - } + c.Assert(recoveryController.GetStage(), Equals, collectReport) + // require peer report + for storeID := range reports { + req := newStoreHeartbeat(storeID, nil) + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, NotNil) + c.Assert(len(resp.RecoveryPlan.Creates), Equals, 0) + c.Assert(len(resp.RecoveryPlan.Demotes), Equals, 0) + c.Assert(resp.RecoveryPlan.ForceLeader, IsNil) + applyRecoveryPlan(c, storeID, reports, resp) + } + + // receive all reports and dispatch no plan + for storeID, report := range reports { + req := newStoreHeartbeat(storeID, report) + req.StoreReport = report + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + c.Assert(resp.RecoveryPlan, IsNil) + applyRecoveryPlan(c, storeID, reports, resp) } - c.Assert(foundUpdate, Equals, true) - c.Assert(foundDelete, Equals, true) + // nothing to do, finish directly + c.Assert(recoveryController.GetStage(), Equals, finished) } -func (s *testUnsafeRecoverSuite) TestReportCollection(c *C) { +func (s *testUnsafeRecoverySuite) TestCreateEmptyRegion(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(3, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.stage = collectingClusterInfo - recoveryController.failedStores = map[uint64]string{ - 3: "", - 4: "", + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 2: {}, + 3: {}, + }, 3660), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + StartKey: []byte("a"), + EndKey: []byte("b"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 2}, {Id: 13, StoreId: 3}}}}}, + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1002, + StartKey: []byte("e"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 2}, {Id: 13, StoreId: 3}}}}}, + }}, } - recoveryController.storeReports[uint64(1)] = nil - recoveryController.storeReports[uint64(2)] = nil - store1Report := &pdpb.StoreReport{ - PeerReports: []*pdpb.PeerReport{ + + advanceUntilFinished(c, recoveryController, reports) + + expects := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 1, - RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Id: 1001, + StartKey: []byte("a"), + EndKey: []byte("b"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 2, Role: metapb.PeerRole_Learner}, {Id: 13, StoreId: 3, Role: metapb.PeerRole_Learner}}}}}, + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1002, + StartKey: []byte("e"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}, {Id: 41, StoreId: 4}}}}}, - }} - store2Report := &pdpb.StoreReport{ - PeerReports: []*pdpb.PeerReport{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 2, Role: metapb.PeerRole_Learner}, {Id: 13, StoreId: 3, Role: metapb.PeerRole_Learner}}}}}, { - RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ Id: 1, + StartKey: []byte(""), + EndKey: []byte("a"), RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, - Peers: []*metapb.Peer{ - {Id: 11, StoreId: 1}, {Id: 21, StoreId: 2}, {Id: 31, StoreId: 3}, {Id: 41, StoreId: 4}}}}}, - }} - heartbeat := &pdpb.StoreHeartbeatRequest{Stats: &pdpb.StoreStats{StoreId: 1}} - resp := &pdpb.StoreHeartbeatResponse{} - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(resp.RequireDetailedReport, Equals, true) - // Second and following heartbeats in a short period of time are ignored. - resp = &pdpb.StoreHeartbeatResponse{} - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(resp.RequireDetailedReport, Equals, false) - - heartbeat.StoreReport = store1Report - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(recoveryController.numStoresReported, Equals, 1) - c.Assert(recoveryController.storeReports[uint64(1)], Equals, store1Report) - - heartbeat.Stats.StoreId = uint64(2) - heartbeat.StoreReport = store2Report - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(recoveryController.numStoresReported, Equals, 2) - c.Assert(recoveryController.storeReports[uint64(2)], Equals, store2Report) + Peers: []*metapb.Peer{{Id: 2, StoreId: 1}}}}}, + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 3, + StartKey: []byte("b"), + EndKey: []byte("e"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{{Id: 4, StoreId: 1}}}}}, + }}, + } + + for storeID, report := range reports { + if expect, ok := expects[storeID]; ok { + c.Assert(report.PeerReports, DeepEquals, expect.PeerReports) + } else { + c.Assert(len(report.PeerReports), Equals, 0) + } + } } -func (s *testUnsafeRecoverSuite) TestPlanExecution(c *C) { +// TODO: can't handle this case now +// +──────────────────────────────────+───────────────────+───────────────────+───────────────────+───────────────────+──────────+──────────+ +// | | Store 1 | Store 2 | Store 3 | Store 4 | Store 5 | Store 6 | +// +──────────────────────────────────+───────────────────+───────────────────+───────────────────+───────────────────+──────────+──────────+ +// | Initial | A=[a,m), B=[m,z) | A=[a,m), B=[m,z) | A=[a,m), B=[m,z) | | | | +// | A merge B | isolate | A=[a,z) | A=[a,z) | | | | +// | Conf Change A: store 1 -> 4 | | A=[a,z) | A=[a,z) | A=[a,z) | | | +// | A split C | | isolate | C=[a,g), A=[g,z) | C=[a,g), A=[g,z) | | | +// | Conf Change A: store 3,4 -> 5,6 | | | C=[a,g) | C=[a,g) | A=[g,z) | A=[g,z) | +// | Store 4, 5 and 6 fail | A=[a,m), B=[m,z) | A=[a,z) | C=[a,g) | fail | fail | fail | +// +──────────────────────────────────+───────────────────+───────────────────+───────────────────+───────────────────+──────────+──────────+ + +func (s *testUnsafeRecoverySuite) TestRangeOverlap1(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) - // Manually fill the coordinator up to allow calling on cluster.PauseOrResumeSchedulers(). cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(5, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } recoveryController := newUnsafeRecoveryController(cluster) - recoveryController.stage = recovering - recoveryController.failedStores = map[uint64]string{ - 3: "", - 4: "", - } - recoveryController.storeReports[uint64(1)] = nil - recoveryController.storeReports[uint64(2)] = nil - recoveryController.storeRecoveryPlans[uint64(1)] = &pdpb.RecoveryPlan{ - Creates: []*metapb.Region{ - { - Id: 4, - StartKey: []byte("f"), - Peers: []*metapb.Peer{{Id: 14, StoreId: 1}}, - }, - }, - Updates: []*metapb.Region{ + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 4: {}, + 5: {}, + }, 60), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ { - Id: 5, - StartKey: []byte("c"), - EndKey: []byte("f"), - Peers: []*metapb.Peer{{Id: 15, StoreId: 1}}, - }, - }, - } - recoveryController.storeRecoveryPlans[uint64(2)] = &pdpb.RecoveryPlan{ - Updates: []*metapb.Region{ + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 4}, {Id: 13, StoreId: 5}}}}}, + }}, + 2: {PeerReports: []*pdpb.PeerReport{ { - Id: 3, - EndKey: []byte("c"), - Peers: []*metapb.Peer{{Id: 23, StoreId: 2}}, - }, - }, - Deletes: []uint64{2}, + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1002, + StartKey: []byte(""), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 5, Version: 8}, + Peers: []*metapb.Peer{ + {Id: 21, StoreId: 1}, {Id: 22, StoreId: 4}, {Id: 23, StoreId: 5}}}}}, + }}, + 3: {PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1003, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 4, Version: 6}, + Peers: []*metapb.Peer{ + {Id: 31, StoreId: 1}, {Id: 32, StoreId: 4}, {Id: 33, StoreId: 5}}}}}, + }}, } - store1Report := &pdpb.StoreReport{ - PeerReports: []*pdpb.PeerReport{ + + advanceUntilFinished(c, recoveryController, reports) + + expects := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 4, - StartKey: []byte("f"), - Peers: []*metapb.Peer{{Id: 14, StoreId: 1}}}}, - }, + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 4, Role: metapb.PeerRole_Learner}, {Id: 13, StoreId: 5, Role: metapb.PeerRole_Learner}}}}}, + }}, + 3: {PeerReports: []*pdpb.PeerReport{ { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 5, - StartKey: []byte("c"), - EndKey: []byte("f"), - Peers: []*metapb.Peer{{Id: 15, StoreId: 1}}}}, - }, - }} - heartbeat := &pdpb.StoreHeartbeatRequest{Stats: &pdpb.StoreStats{StoreId: 1}, StoreReport: store1Report} - resp := &pdpb.StoreHeartbeatResponse{} - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(recoveryController.numStoresPlanExecuted, Equals, 1) + Id: 1003, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 5, Version: 6}, + Peers: []*metapb.Peer{ + {Id: 31, StoreId: 1}, {Id: 32, StoreId: 4, Role: metapb.PeerRole_Learner}, {Id: 33, StoreId: 5, Role: metapb.PeerRole_Learner}}}}}, + }}, + } - store2Report := &pdpb.StoreReport{ - PeerReports: []*pdpb.PeerReport{ + for storeID, report := range reports { + if result, ok := expects[storeID]; ok { + c.Assert(report.PeerReports, DeepEquals, result.PeerReports) + } else { + c.Assert(len(report.PeerReports), Equals, 0) + } + } +} + +func (s *testUnsafeRecoverySuite) TestRangeOverlap2(c *C) { + _, opt, _ := newTestScheduleConfig() + cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(5, "6.0.0") { + c.Assert(cluster.PutStore(store.GetMeta()), IsNil) + } + recoveryController := newUnsafeRecoveryController(cluster) + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 4: {}, + 5: {}, + }, 60), IsNil) + + reports := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 2, - StartKey: []byte("g"), - Peers: []*metapb.Peer{{Id: 12, StoreId: 2}}}}, - }, - }} - heartbeat.Stats.StoreId = uint64(2) - heartbeat.StoreReport = store2Report - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(recoveryController.numStoresPlanExecuted, Equals, 1) - - store2Report = &pdpb.StoreReport{ - PeerReports: []*pdpb.PeerReport{ + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 4}, {Id: 13, StoreId: 5}}}}}, + }}, + 2: {PeerReports: []*pdpb.PeerReport{ { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 3, - EndKey: []byte("f"), - Peers: []*metapb.Peer{{Id: 13, StoreId: 2}}}}, - }, - }} - heartbeat.StoreReport = store2Report - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(recoveryController.numStoresPlanExecuted, Equals, 1) + Id: 1002, + StartKey: []byte(""), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 5, Version: 8}, + Peers: []*metapb.Peer{ + {Id: 24, StoreId: 1}, {Id: 22, StoreId: 4}, {Id: 23, StoreId: 5}}}}}, + }}, + 3: {PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1002, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 4, Version: 6}, + Peers: []*metapb.Peer{ + {Id: 21, StoreId: 1}, {Id: 22, StoreId: 4}, {Id: 23, StoreId: 5}}}}}, + }}, + } - store2Report = &pdpb.StoreReport{ - PeerReports: []*pdpb.PeerReport{ + advanceUntilFinished(c, recoveryController, reports) + + expects := map[uint64]*pdpb.StoreReport{ + 1: {PeerReports: []*pdpb.PeerReport{ { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, RegionState: &raft_serverpb.RegionLocalState{ Region: &metapb.Region{ - Id: 3, - EndKey: []byte("c"), - Peers: []*metapb.Peer{{Id: 13, StoreId: 2}}}}, - }, - }} - heartbeat.StoreReport = store2Report - recoveryController.HandleStoreHeartbeat(heartbeat, resp) - c.Assert(recoveryController.numStoresPlanExecuted, Equals, 2) - c.Assert(recoveryController.stage, Equals, finished) + Id: 1001, + StartKey: []byte(""), + EndKey: []byte("x"), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 8, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 4, Role: metapb.PeerRole_Learner}, {Id: 13, StoreId: 5, Role: metapb.PeerRole_Learner}}}}}, + // newly created empty region + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1, + StartKey: []byte("x"), + EndKey: []byte(""), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + Peers: []*metapb.Peer{ + {Id: 2, StoreId: 1}}}}}, + }}, + } + + for storeID, report := range reports { + if result, ok := expects[storeID]; ok { + c.Assert(report.PeerReports, DeepEquals, result.PeerReports) + } else { + c.Assert(len(report.PeerReports), Equals, 0) + } + } } -func (s *testUnsafeRecoverSuite) TestRemoveFailedStores(c *C) { +func (s *testUnsafeRecoverySuite) TestRemoveFailedStores(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) @@ -637,12 +1234,16 @@ func (s *testUnsafeRecoverSuite) TestRemoveFailedStores(c *C) { c.Assert(cluster.PutStore(store.GetMeta()), IsNil) } recoveryController := newUnsafeRecoveryController(cluster) - failedStores := map[uint64]string{ - 1: "", - 3: "", - } - c.Assert(recoveryController.RemoveFailedStores(failedStores), IsNil) + // Store 3 doesn't exist, reject to remove. + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 1: {}, + 3: {}, + }, 60), NotNil) + + c.Assert(recoveryController.RemoveFailedStores(map[uint64]struct{}{ + 1: {}, + }, 60), IsNil) c.Assert(cluster.GetStore(uint64(1)).IsRemoved(), IsTrue) for _, s := range cluster.GetSchedulers() { paused, err := cluster.IsSchedulerAllowed(s) @@ -651,14 +1252,13 @@ func (s *testUnsafeRecoverSuite) TestRemoveFailedStores(c *C) { } // Store 2's last heartbeat is recent, and is not allowed to be removed. - failedStores = map[uint64]string{ - 2: "", - } - - c.Assert(recoveryController.RemoveFailedStores(failedStores), NotNil) + c.Assert(recoveryController.RemoveFailedStores( + map[uint64]struct{}{ + 2: {}, + }, 60), NotNil) } -func (s *testUnsafeRecoverSuite) TestSplitPaused(c *C) { +func (s *testUnsafeRecoverySuite) TestSplitPaused(c *C) { _, opt, _ := newTestScheduleConfig() cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) cluster.coordinator = newCoordinator(s.ctx, cluster, hbstream.NewTestHeartbeatStreams(s.ctx, cluster.meta.GetId(), cluster, true)) @@ -672,10 +1272,10 @@ func (s *testUnsafeRecoverSuite) TestSplitPaused(c *C) { cluster.Lock() cluster.unsafeRecoveryController = recoveryController cluster.Unlock() - failedStores := map[uint64]string{ - 1: "", + failedStores := map[uint64]struct{}{ + 1: {}, } - c.Assert(recoveryController.RemoveFailedStores(failedStores), IsNil) + c.Assert(recoveryController.RemoveFailedStores(failedStores, 60), IsNil) askSplitReq := &pdpb.AskSplitRequest{} _, err := cluster.HandleAskSplit(askSplitReq) c.Assert(err.Error(), Equals, "[PD:unsaferecovery:ErrUnsafeRecoveryIsRunning]unsafe recovery is running") diff --git a/server/core/basic_cluster.go b/server/core/basic_cluster.go index 071847a8b89..9100636d376 100644 --- a/server/core/basic_cluster.go +++ b/server/core/basic_cluster.go @@ -431,6 +431,13 @@ func (bc *BasicCluster) RemoveRegionIfExist(id uint64) { } } +// ResetRegionCache drops all region cache. +func (bc *BasicCluster) ResetRegionCache() { + bc.Lock() + defer bc.Unlock() + bc.Regions = NewRegionsInfo() +} + // RemoveRegion removes RegionInfo from regionTree and regionMap. func (bc *BasicCluster) RemoveRegion(region *RegionInfo) { bc.Lock() diff --git a/server/grpc_service.go b/server/grpc_service.go index 0b57bf9fe6b..a3bb35bcd11 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -585,20 +585,19 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear return &pdpb.StoreHeartbeatResponse{Header: s.notBootstrappedHeader()}, nil } + if pberr := checkStore(rc, request.GetStats().GetStoreId()); pberr != nil { + return &pdpb.StoreHeartbeatResponse{ + Header: s.errorHeader(pberr), + }, nil + } + storeID := request.GetStats().GetStoreId() + store := rc.GetStore(storeID) + if store == nil { + return nil, errors.Errorf("store %v not found", storeID) + } + // Bypass stats handling if the store report for unsafe recover is not empty. if request.GetStoreReport() == nil { - if pberr := checkStore(rc, request.GetStats().GetStoreId()); pberr != nil { - return &pdpb.StoreHeartbeatResponse{ - Header: s.errorHeader(pberr), - }, nil - } - - storeID := request.GetStats().GetStoreId() - store := rc.GetStore(storeID) - if store == nil { - return nil, errors.Errorf("store %v not found", storeID) - } - storeAddress := store.GetAddress() storeLabel := strconv.FormatUint(storeID, 10) start := time.Now() @@ -622,9 +621,7 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear ReplicationStatus: rc.GetReplicationMode().GetReplicationStatus(), ClusterVersion: rc.GetClusterVersion(), } - if rc.GetUnsafeRecoveryController() != nil { - rc.GetUnsafeRecoveryController().HandleStoreHeartbeat(request, resp) - } + rc.GetUnsafeRecoveryController().HandleStoreHeartbeat(request, resp) return resp, nil } diff --git a/tests/client/go.mod b/tests/client/go.mod index ddd1ce30edb..93fb9d96eaa 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -7,7 +7,7 @@ require ( github.com/golang/protobuf v1.5.2 // indirect github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20220429093005-2839fa5a1ed6 + github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738 diff --git a/tests/client/go.sum b/tests/client/go.sum index d7cf3966001..0d8d9932f2b 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -408,9 +408,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/pingcap/kvproto v0.0.0-20220429093005-2839fa5a1ed6 h1:gT4uxwuZzTniXdzp4mPoZjhNkDNEuZBt7HESOuLRyMI= -github.com/pingcap/kvproto v0.0.0-20220429093005-2839fa5a1ed6/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a h1:TxdHGOFeNa1q1mVv6TgReayf26iI4F8PQUm6RnZ/V/E= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= diff --git a/tests/pdctl/unsafe/unsafe_operation_test.go b/tests/pdctl/unsafe/unsafe_operation_test.go index a03ec0d811f..4bbe2309dc3 100644 --- a/tests/pdctl/unsafe/unsafe_operation_test.go +++ b/tests/pdctl/unsafe/unsafe_operation_test.go @@ -49,6 +49,12 @@ func (s *unsafeOperationTestSuite) TestRemoveFailedStores(c *C) { args := []string{"-u", pdAddr, "unsafe", "remove-failed-stores", "1,2,3"} _, err = pdctl.ExecuteCommand(cmd, args...) c.Assert(err, IsNil) + args = []string{"-u", pdAddr, "unsafe", "remove-failed-stores", "1,2,3", "--timeout", "3600"} + _, err = pdctl.ExecuteCommand(cmd, args...) + c.Assert(err, IsNil) + args = []string{"-u", pdAddr, "unsafe", "remove-failed-stores", "1,2,3", "--timeout", "abc"} + _, err = pdctl.ExecuteCommand(cmd, args...) + c.Assert(err, Not(IsNil)) args = []string{"-u", pdAddr, "unsafe", "remove-failed-stores", "show"} _, err = pdctl.ExecuteCommand(cmd, args...) c.Assert(err, IsNil) diff --git a/tools/pd-ctl/pdctl/command/unsafe_command.go b/tools/pd-ctl/pdctl/command/unsafe_command.go index 65b3daf62d3..513be6d6171 100644 --- a/tools/pd-ctl/pdctl/command/unsafe_command.go +++ b/tools/pd-ctl/pdctl/command/unsafe_command.go @@ -42,8 +42,8 @@ func NewRemoveFailedStoresCommand() *cobra.Command { Short: "Remove failed stores unsafely", Run: removeFailedStoresCommandFunc, } + cmd.PersistentFlags().Float64("timeout", 300, "timeout in seconds") cmd.AddCommand(NewRemoveFailedStoresShowCommand()) - cmd.AddCommand(NewRemoveFailedStoresHistoryCommand()) return cmd } @@ -56,15 +56,6 @@ func NewRemoveFailedStoresShowCommand() *cobra.Command { } } -// NewRemoveFailedStoresHistoryCommand returns the unsafe remove failed stores history command. -func NewRemoveFailedStoresHistoryCommand() *cobra.Command { - return &cobra.Command{ - Use: "history", - Short: "Show the history of failed stores removal", - Run: removeFailedStoresHistoryCommandFunc, - } -} - func removeFailedStoresCommandFunc(cmd *cobra.Command, args []string) { prefix := fmt.Sprintf("%s/remove-failed-stores", unsafePrefix) if len(args) < 1 { @@ -76,7 +67,7 @@ func removeFailedStoresCommandFunc(cmd *cobra.Command, args []string) { for _, strStore := range strStores { store, err := strconv.ParseUint(strStore, 10, 64) if err != nil { - cmd.Usage() + cmd.Println(err) return } stores = append(stores, store) @@ -84,25 +75,20 @@ func removeFailedStoresCommandFunc(cmd *cobra.Command, args []string) { postInput := map[string]interface{}{ "stores": stores, } - postJSON(cmd, prefix, postInput) -} - -func removeFailedStoresShowCommandFunc(cmd *cobra.Command, args []string) { - var resp string - var err error - prefix := fmt.Sprintf("%s/remove-failed-stores/show", unsafePrefix) - resp, err = doRequest(cmd, prefix, http.MethodGet, http.Header{}) + timeout, err := cmd.Flags().GetFloat64("timeout") if err != nil { cmd.Println(err) return + } else if timeout != 300 { + postInput["timeout"] = timeout } - cmd.Println(resp) + postJSON(cmd, prefix, postInput) } -func removeFailedStoresHistoryCommandFunc(cmd *cobra.Command, args []string) { +func removeFailedStoresShowCommandFunc(cmd *cobra.Command, args []string) { var resp string var err error - prefix := fmt.Sprintf("%s/remove-failed-stores/history", unsafePrefix) + prefix := fmt.Sprintf("%s/remove-failed-stores/show", unsafePrefix) resp, err = doRequest(cmd, prefix, http.MethodGet, http.Header{}) if err != nil { cmd.Println(err) diff --git a/tools/pd-tso-bench/go.sum b/tools/pd-tso-bench/go.sum index 1e0eaf8b258..5cd3368af61 100644 --- a/tools/pd-tso-bench/go.sum +++ b/tools/pd-tso-bench/go.sum @@ -108,8 +108,8 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748 h1:i4MBe1zGq9/r3BH6rTRunizi4T59fpNk8hvBCrB5UAY= -github.com/pingcap/kvproto v0.0.0-20220330070404-8c4cd3f93748/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a h1:TxdHGOFeNa1q1mVv6TgReayf26iI4F8PQUm6RnZ/V/E= +github.com/pingcap/kvproto v0.0.0-20220510035547-0e2f26c0a46a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee h1:VO2t6IBpfvW34TdtD/G10VvnGqjLic1jzOuHjUb5VqM= github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=