-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
storage: use atomic replication changes in RelocateRange
Touches #12768. Release justification: the previous code would enter vulnerable replication configurations when it wasn't necessary, thus undermining what we wanted to achieve in #12768. PS: the newly added test passes when manually overriding useAtomic with `false`, and I verified that it indeed uses no atomic replication changes. Release note: None
- Loading branch information
Showing
6 changed files
with
495 additions
and
175 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
// Copyright 2019 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package storage_test | ||
|
||
import ( | ||
"context" | ||
"sort" | ||
"testing" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/base" | ||
"github.com/cockroachdb/cockroach/pkg/keys" | ||
"github.com/cockroachdb/cockroach/pkg/roachpb" | ||
"github.com/cockroachdb/cockroach/pkg/storage" | ||
"github.com/cockroachdb/cockroach/pkg/testutils" | ||
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster" | ||
"github.com/cockroachdb/cockroach/pkg/util/leaktest" | ||
"github.com/cockroachdb/errors" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func relocateAndCheck( | ||
t *testing.T, | ||
tc *testcluster.TestCluster, | ||
startKey roachpb.RKey, | ||
targets []roachpb.ReplicationTarget, | ||
) { | ||
require.NoError(t, tc.Servers[0].DB().AdminRelocateRange(context.Background(), startKey.AsRawKey(), targets)) | ||
desc, err := tc.Servers[0].LookupRange(startKey.AsRawKey()) | ||
require.NoError(t, err) | ||
requireDescMembers(t, desc, targets) | ||
requireLeaseAt(t, tc, desc, targets[0]) | ||
} | ||
|
||
func requireDescMembers( | ||
t *testing.T, desc roachpb.RangeDescriptor, targets []roachpb.ReplicationTarget, | ||
) { | ||
t.Helper() | ||
targets = append([]roachpb.ReplicationTarget(nil), targets...) | ||
sort.Slice(targets, func(i, j int) bool { return targets[i].StoreID < targets[j].StoreID }) | ||
|
||
have := make([]roachpb.ReplicationTarget, 0, len(targets)) | ||
for _, rDesc := range desc.Replicas().All() { | ||
have = append(have, roachpb.ReplicationTarget{ | ||
NodeID: rDesc.NodeID, | ||
StoreID: rDesc.StoreID, | ||
}) | ||
} | ||
sort.Slice(have, func(i, j int) bool { return have[i].StoreID < have[j].StoreID }) | ||
require.Equal(t, targets, have) | ||
} | ||
|
||
func requireLeaseAt( | ||
t *testing.T, | ||
tc *testcluster.TestCluster, | ||
desc roachpb.RangeDescriptor, | ||
target roachpb.ReplicationTarget, | ||
) { | ||
t.Helper() | ||
// NB: under stressrace the lease will sometimes be inactive by the time | ||
// it's returned here, so don't use FindRangeLeaseHolder which fails when | ||
// that happens. | ||
testutils.SucceedsSoon(t, func() error { | ||
lease, _, err := tc.FindRangeLease(desc, &target) | ||
if err != nil { | ||
return err | ||
} | ||
if target != (roachpb.ReplicationTarget{ | ||
NodeID: lease.Replica.NodeID, | ||
StoreID: lease.Replica.StoreID, | ||
}) { | ||
return errors.Errorf("lease %v is not held by %+v", lease, target) | ||
} | ||
return nil | ||
}) | ||
} | ||
|
||
func TestAdminRelocateRange(t *testing.T) { | ||
defer leaktest.AfterTest(t)() | ||
|
||
ctx := context.Background() | ||
|
||
type intercept struct { | ||
ops []roachpb.ReplicationChange | ||
leaseTarget *roachpb.ReplicationTarget | ||
err error | ||
} | ||
var intercepted []intercept | ||
|
||
requireNumAtomic := func(expAtomic int, expSingle int, f func()) { | ||
t.Helper() | ||
intercepted = nil | ||
f() | ||
var actAtomic, actSingle int | ||
for _, ic := range intercepted { | ||
if ic.err != nil { | ||
continue | ||
} | ||
if len(ic.ops) == 2 && ic.ops[0].ChangeType == roachpb.ADD_REPLICA && ic.ops[1].ChangeType == roachpb.REMOVE_REPLICA { | ||
actAtomic++ | ||
} else { | ||
actSingle++ | ||
} | ||
} | ||
require.Equal(t, expAtomic, actAtomic, "wrong number of atomic changes: %+v", intercepted) | ||
require.Equal(t, expSingle, actSingle, "wrong number of single changes: %+v", intercepted) | ||
} | ||
|
||
knobs := base.TestingKnobs{ | ||
Store: &storage.StoreTestingKnobs{ | ||
BeforeRelocateOne: func(ops []roachpb.ReplicationChange, leaseTarget *roachpb.ReplicationTarget, err error) { | ||
intercepted = append(intercepted, intercept{ | ||
ops: ops, | ||
leaseTarget: leaseTarget, | ||
err: err, | ||
}) | ||
}, | ||
}, | ||
} | ||
args := base.TestClusterArgs{ | ||
ServerArgs: base.TestServerArgs{Knobs: knobs}, | ||
ReplicationMode: base.ReplicationManual, | ||
} | ||
tc := testcluster.StartTestCluster(t, 6, args) | ||
defer tc.Stopper().Stop(ctx) | ||
|
||
// s1 (LH) ---> s2 (LH) s1 s3 | ||
// Pure upreplication. | ||
k := keys.MustAddr(tc.ScratchRange(t)) | ||
{ | ||
targets := tc.Targets(1, 0, 2) | ||
// Expect two single additions, and that's it. | ||
requireNumAtomic(0, 2, func() { | ||
relocateAndCheck(t, tc, k, targets) | ||
}) | ||
} | ||
|
||
// s1 (LH) s2 s3 ---> s4 (LH) s5 s6. | ||
// This is trickier because the leaseholder gets removed, and so do all | ||
// other replicas (i.e. a simple lease transfer at the beginning won't solve | ||
// the problem). | ||
{ | ||
targets := tc.Targets(3, 4, 5) | ||
// Should carry out three swaps. Note that the leaseholder gets removed | ||
// in the process (i.e. internally the lease must've been moved around | ||
// to achieve that). | ||
requireNumAtomic(3, 0, func() { | ||
relocateAndCheck(t, tc, k, targets) | ||
}) | ||
} | ||
|
||
// s4 (LH) s5 s6 ---> s5 (LH) | ||
// Pure downreplication. | ||
{ | ||
requireNumAtomic(0, 2, func() { | ||
relocateAndCheck(t, tc, k, tc.Targets(4)) | ||
}) | ||
} | ||
|
||
// s5 (LH) ---> s3 (LH) | ||
// Lateral movement while at replication factor one. In this case atomic | ||
// replication changes cannot be used; we add-then-remove instead. | ||
{ | ||
requireNumAtomic(0, 2, func() { | ||
relocateAndCheck(t, tc, k, tc.Targets(2)) | ||
}) | ||
} | ||
|
||
// s3 (LH) ---> s2 (LH) s4 s1 --> s4 (LH) s2 s6 s1 --> s3 (LH) s5 | ||
// A grab bag. | ||
{ | ||
// s3 -(add)-> s3 s2 -(swap)-> s4 s2 -(add)-> s4 s2 s1 (=s2 s4 s1) | ||
requireNumAtomic(1, 2, func() { | ||
relocateAndCheck(t, tc, k, tc.Targets(1, 3, 0)) | ||
}) | ||
// s2 s4 s1 -(add)-> s2 s4 s1 s6 (=s4 s2 s6 s1) | ||
requireNumAtomic(0, 1, func() { | ||
relocateAndCheck(t, tc, k, tc.Targets(3, 1, 5, 0)) | ||
}) | ||
// s4 s2 s6 s1 -(swap)-> s3 s2 s6 s1 -(swap)-> s3 s5 s6 s1 -(del)-> s3 s5 s6 -(del)-> s3 s5 | ||
requireNumAtomic(2, 2, func() { | ||
relocateAndCheck(t, tc, k, tc.Targets(2, 4)) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.