From 06f7d7664796558f4e8df1faab63e0be5f8a5fde Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Fri, 15 Dec 2017 12:51:45 -0500 Subject: [PATCH] storage: Always simulate RemoveTarget when rebalancing Skipping the simulation when raftStatus.Progress is nil can make for undesirable thrashing of replicas, as seen when testing #20241. It's better to run the simulation without properly filtering replicas than to not run it at all. Release note: None --- pkg/storage/allocator.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pkg/storage/allocator.go b/pkg/storage/allocator.go index 964a74e13d59..99e9c0245e36 100644 --- a/pkg/storage/allocator.go +++ b/pkg/storage/allocator.go @@ -518,9 +518,6 @@ func (a Allocator) RebalanceTarget( } // We could make a simulation here to verify whether we'll remove the target we'll rebalance to. for len(candidates) > 0 { - if raftStatus == nil || raftStatus.Progress == nil { - break - } newReplica := roachpb.ReplicaDescriptor{ NodeID: target.store.Node.NodeID, StoreID: target.store.StoreID, @@ -532,7 +529,15 @@ func (a Allocator) RebalanceTarget( desc.Replicas = append(desc.Replicas[:len(desc.Replicas):len(desc.Replicas)], newReplica) rangeInfo.Desc = &desc - replicaCandidates := simulateFilterUnremovableReplicas(raftStatus, desc.Replicas, newReplica.ReplicaID) + // If we can, filter replicas as we would if we were actually removing one. + // If we can't (e.g. because we're the leaseholder but not the raft leader), + // it's better to simulate the removal with the info that we do have than to + // assume that the rebalance is ok (#20241). + replicaCandidates := desc.Replicas + if raftStatus != nil && raftStatus.Progress != nil { + replicaCandidates = simulateFilterUnremovableReplicas( + raftStatus, desc.Replicas, newReplica.ReplicaID) + } removeReplica, _, err := a.simulateRemoveTarget(ctx, target.store.StoreID, constraints, replicaCandidates, rangeInfo) if err != nil {