Skip to content

Commit

Permalink
kvserver: fix bug that could lead to invalid non-voter rebalance atte…
Browse files Browse the repository at this point in the history
…mpts

A bug was introduced in cockroachdb#61239
which made it such that non-voters could, in certain edge case scenarios,
attempt to rebalance to stores that already had a voting replica for the same
range. However, this bug wouldn't actually materialize into user observable
symptoms because such rebalance decisions were rejected before the
`replicateQueue` ever tried to execute them. See:
https://github.com/cockroachdb/cockroach/blob/8bcd038ab6b76f6bf5894f2ffbbdbf10108b4c32/pkg/kv/kvserver/replicate_queue.go#L1257-L1271.
This commit cleans this up and adds a test demonstrating one such edge case.

Release note: None
  • Loading branch information
aayushshah15 committed Dec 10, 2021
1 parent dadff0d commit 6c829c2
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
19 changes: 18 additions & 1 deletion pkg/kv/kvserver/allocator_scorer.go
Original file line number Diff line number Diff line change
Expand Up @@ -884,11 +884,28 @@ func rankedCandidateListForRebalancing(
)
continue
}

// `replsOnExemptedStores` is used during non-voting replica rebalancing
// to ignore stores that already have a voting replica for the same range.
// When rebalancing a voter, `replsOnExemptedStores` is empty since voters
// are allowed to "displace" non-voting replicas (we correctly turn such
// actions into non-voter promotions, see replicationChangesForRebalance()).
var exempted bool
for _, replOnExemptedStore := range replicasOnExemptedStores {
if store.StoreID == replOnExemptedStore.StoreID {
continue
log.VEventf(
ctx,
6,
"s%d is not a possible rebalance candidate for non-voters because it already has a voter of the range; ignoring",
store.StoreID,
)
exempted = true
break
}
}
if exempted {
continue
}

constraintsOK, necessary := rebalanceConstraintsChecker(store, existing.store)
maxCapacityOK := maxCapacityCheck(store)
Expand Down
63 changes: 63 additions & 0 deletions pkg/kv/kvserver/allocator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4090,6 +4090,69 @@ func TestVotersCanRebalanceToNonVoterStores(t *testing.T) {
}
}

// TestNonVotersCannotRebalanceToVoterStores ensures that non-voting replicas
// cannot rebalance to stores that already have a voting replica for the range.
func TestNonVotersCannotRebalanceToVoterStores(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
ctx, finishAndGetRecording := tracing.ContextWithRecordingSpan(
context.Background(), tracing.NewTracer(), "test",
)

stopper, g, _, a, _ := createTestAllocator(2, false /* deterministic */)
defer stopper.Stop(context.Background())
sg := gossiputil.NewStoreGossiper(g)

// Create 2 stores. Store 2 has a voting replica and store 1 has a non-voting
// replica. Make it such that store 1 has a full disk so the allocator will
// want to rebalance it away. However, the only possible candidate is store 2
// which already has a voting replica. Thus, this rebalance attempt should
// fail.
stores := []*roachpb.StoreDescriptor{
{
StoreID: 1,
Capacity: roachpb.StoreCapacity{
Capacity: 100,
Available: 0,
},
},
{
StoreID: 2,
Capacity: roachpb.StoreCapacity{
Capacity: 100,
Available: 100,
},
},
}
existingNonVoters := replicas(1)
existingVoters := replicas(2)

sg.GossipStores(stores, t)
var rangeUsageInfo RangeUsageInfo
add, remove, _, ok := a.RebalanceNonVoter(
ctx,
emptySpanConfig(),
nil,
existingVoters,
existingNonVoters,
rangeUsageInfo,
storeFilterThrottled,
a.scorerOptions(),
)

require.Falsef(
t, ok, "expected no action; got rebalance from s%d to s%d", remove.StoreID, add.StoreID,
)
trace := finishAndGetRecording().String()
require.Regexpf(
t,
"it already has a voter",
trace,
"expected the voter store to be explicitly ignored; got %s",
trace,
)
}

func TestRebalanceCandidatesNumReplicasConstraints(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down

0 comments on commit 6c829c2

Please sign in to comment.