diff --git a/storage/client_raft_test.go b/storage/client_raft_test.go index c46708a733b2..80a36d1562fc 100644 --- a/storage/client_raft_test.go +++ b/storage/client_raft_test.go @@ -1557,6 +1557,23 @@ func TestReplicaRemovalCampaign(t *testing.T) { } replica2 := store0.LookupReplica(roachpb.RKey(key2), nil) + + rg2 := func(s *storage.Store) client.Sender { + return client.Wrap(s, func(ba roachpb.BatchRequest) roachpb.BatchRequest { + if ba.RangeID == 0 { + ba.RangeID = replica2.RangeID + } + return ba + }) + } + + // Raft processing is initialized lazily; issue a no-op write request to + // ensure that the Raft group has been started. + incArgs := incrementArgs(key2, 0) + if _, err := client.SendWrapped(rg2(store0), nil, &incArgs); err != nil { + t.Fatal(err) + } + if td.remove { // Simulate second replica being transferred by removing it. if err := store0.RemoveReplica(replica2, *replica2.Desc(), true); err != nil { diff --git a/storage/client_test.go b/storage/client_test.go index 45b0721f713c..f88f255720a1 100644 --- a/storage/client_test.go +++ b/storage/client_test.go @@ -635,6 +635,7 @@ func (m *multiTestContext) addStore(idx int) { } } } + store.AllowIdleReplicaCampaign() ln, err := netutil.ListenAndServeGRPC(m.transportStopper, grpcServer, util.TestAddr) if err != nil { diff --git a/storage/helpers_test.go b/storage/helpers_test.go index 44ff3c5cfa39..0cb13156fcab 100644 --- a/storage/helpers_test.go +++ b/storage/helpers_test.go @@ -158,6 +158,12 @@ func (s *Store) ManualReplicaGC(repl *Replica) error { return s.gcQueue.process(s.Ctx(), s.Clock().Now(), repl, cfg) } +func (s *Store) AllowIdleReplicaCampaign() { + s.idleReplicaElectionTime.Lock() + s.idleReplicaElectionTime.at = s.Clock().PhysicalTime() + s.idleReplicaElectionTime.Unlock() +} + func (r *Replica) RaftLock() { r.raftMu.Lock() } diff --git a/storage/store.go b/storage/store.go index 492f4aedb0c8..e846444e9517 100644 --- a/storage/store.go +++ b/storage/store.go @@ -1504,68 +1504,7 @@ func splitTriggerPostCommit( r.store.replicateQueue.MaybeAdd(r, now) r.store.replicateQueue.MaybeAdd(rightRng, now) - // To avoid leaving the RHS range unavailable as it waits to elect - // its leader, one (and only one) of the nodes should start an - // election as soon as the split is processed. - // - // If there is only one replica, raft instantly makes it the leader. - // Otherwise, we must trigger a campaign here. - // - // TODO(bdarnell): The asynchronous campaign can cause a problem - // with merges, by recreating a replica that should have been - // destroyed. This will probably be addressed as a part of the - // general work to be done for merges - // (https://github.com/cockroachdb/cockroach/issues/2433), but for - // now we're safe because we only perform the asynchronous - // campaign when there are multiple replicas, and we only perform - // merges in testing scenarios with a single replica. - // Note that in multi-node scenarios the async campaign is safe - // because it has exactly the same behavior as an incoming message - // from another node; the problem here is only with merges. - rightRng.mu.Lock() - shouldCampaign := rightRng.mu.state.Lease.OwnedBy(r.store.StoreID()) - rightRng.mu.Unlock() - if len(split.RightDesc.Replicas) > 1 && shouldCampaign { - // Schedule the campaign a short time in the future. As - // followers process the split, they destroy and recreate their - // raft groups, which can cause messages to be dropped. In - // general a shorter delay (perhaps all the way down to zero) is - // better in production, because the race is rare and the worst - // case scenario is that we simply wait for an election timeout. - // However, the test for this feature disables election timeouts - // and relies solely on this campaign trigger, so it is unacceptably - // flaky without a bit of a delay. - // - // Note: you must not use the context inside of this task since it may - // contain a finished trace by the time it runs. - if err := r.store.stopper.RunAsyncTask(ctx, func(ctx context.Context) { - time.Sleep(10 * time.Millisecond) - // Make sure that rightRng hasn't been removed. - replica, err := r.store.GetReplica(rightRng.RangeID) - if err != nil { - if _, ok := err.(*roachpb.RangeNotFoundError); ok { - log.Infof(ctx, "%s: RHS replica %d removed before campaigning", - r, r.mu.replicaID) - } else { - log.Infof(ctx, "%s: RHS replica %d unable to campaign: %s", - r, r.mu.replicaID, err) - } - return - } - - if err := replica.withRaftGroup(func(raftGroup *raft.RawNode) (bool, error) { - if err := raftGroup.Campaign(); err != nil { - log.Warningf(ctx, "%s: error %v", r, err) - } - return true, nil - }); err != nil { - panic(err) - } - }); err != nil { - log.Warningf(ctx, "%s: error %v", r, err) - return - } - } else if len(split.RightDesc.Replicas) == 1 { + if len(split.RightDesc.Replicas) == 1 { // TODO(peter): In single-node clusters, we enqueue the right-hand side of // the split (the new range) for Raft processing so that the corresponding // Raft group is created. This shouldn't be necessary for correctness, but