Skip to content

Commit 8c12c3e

Browse files
committed
storage: indicate when a split causes a Raft snapshot
Touches cockroachdb#31409. Release note: None
1 parent 5bf312e commit 8c12c3e

File tree

3 files changed

+60
-5
lines changed

3 files changed

+60
-5
lines changed

pkg/storage/allocator_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ func replicas(storeIDs ...roachpb.StoreID) []roachpb.ReplicaDescriptor {
306306
for i, storeID := range storeIDs {
307307
res[i].NodeID = roachpb.NodeID(storeID)
308308
res[i].StoreID = storeID
309+
res[i].ReplicaID = roachpb.ReplicaID(i + 1)
309310
}
310311
return res
311312
}

pkg/storage/replica_command.go

+24-5
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ import (
2222
"strings"
2323
"time"
2424

25-
"github.com/pkg/errors"
26-
"go.etcd.io/etcd/raft/raftpb"
27-
2825
"github.com/cockroachdb/cockroach/pkg/base"
2926
"github.com/cockroachdb/cockroach/pkg/internal/client"
3027
"github.com/cockroachdb/cockroach/pkg/keys"
@@ -40,6 +37,9 @@ import (
4037
"github.com/cockroachdb/cockroach/pkg/util/log"
4138
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
4239
"github.com/cockroachdb/cockroach/pkg/util/retry"
40+
"github.com/pkg/errors"
41+
"go.etcd.io/etcd/raft"
42+
"go.etcd.io/etcd/raft/raftpb"
4343
)
4444

4545
// evaluateCommand delegates to the eval method for the given
@@ -197,6 +197,23 @@ func maybeDescriptorChangedError(desc *roachpb.RangeDescriptor, err error) (stri
197197
return "", false
198198
}
199199

200+
func splitSnapshotWarningStr(rangeID roachpb.RangeID, status *raft.Status) string {
201+
var s string
202+
if status != nil && status.RaftState == raft.StateLeader {
203+
for replicaID, pr := range status.Progress {
204+
if replicaID == status.Lead {
205+
// TODO(tschottdorf): remove this line once we have picked up
206+
// https://github.com/etcd-io/etcd/pull/10279
207+
continue
208+
}
209+
if pr.State != raft.ProgressStateReplicate {
210+
s += fmt.Sprintf("; may cause Raft snapshot to r%d/%d: %v", rangeID, replicaID, &pr)
211+
}
212+
}
213+
}
214+
return s
215+
}
216+
200217
// adminSplitWithDescriptor divides the range into into two ranges, using
201218
// either args.SplitKey (if provided) or an internally computed key that aims
202219
// to roughly equipartition the range by size. The split is done inside of a
@@ -295,8 +312,10 @@ func (r *Replica) adminSplitWithDescriptor(
295312
}
296313
leftDesc.EndKey = splitKey
297314

298-
log.Infof(ctx, "initiating a split of this range at key %s [r%d]",
299-
splitKey, rightDesc.RangeID)
315+
extra := splitSnapshotWarningStr(r.RangeID, r.RaftStatus())
316+
317+
log.Infof(ctx, "initiating a split of this range at key %s [r%d]%s",
318+
splitKey, rightDesc.RangeID, extra)
300319

301320
if err := r.store.DB().Txn(ctx, func(ctx context.Context, txn *client.Txn) error {
302321
log.Event(ctx, "split closure begins")

pkg/storage/replica_test.go

+35
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"github.com/gogo/protobuf/proto"
3434
"github.com/kr/pretty"
3535
"github.com/pkg/errors"
36+
"github.com/stretchr/testify/assert"
3637
"github.com/stretchr/testify/require"
3738
"go.etcd.io/etcd/raft"
3839
"go.etcd.io/etcd/raft/raftpb"
@@ -134,6 +135,22 @@ func leaseExpiry(repl *Replica) int64 {
134135
return l.Expiration.WallTime + 1
135136
}
136137

138+
// Create a Raft status that shows everyone fully up to date.
139+
func upToDateRaftStatus(repls []roachpb.ReplicaDescriptor) *raft.Status {
140+
prs := make(map[uint64]raft.Progress)
141+
for _, repl := range repls {
142+
prs[uint64(repl.ReplicaID)] = raft.Progress{
143+
State: raft.ProgressStateReplicate,
144+
Match: 100,
145+
}
146+
}
147+
return &raft.Status{
148+
HardState: raftpb.HardState{Commit: 100},
149+
SoftState: raft.SoftState{Lead: 1, RaftState: raft.StateLeader},
150+
Progress: prs,
151+
}
152+
}
153+
137154
// testContext contains all the objects necessary to test a Range.
138155
// In most cases, simply call Start(t) (and later Stop()) on a zero-initialized
139156
// testContext{}. Any fields which are initialized to non-nil values
@@ -10856,3 +10873,21 @@ func TestRollbackMissingTxnRecordNoError(t *testing.T) {
1085610873
t.Errorf("expected %s; got %v", expErr, pErr)
1085710874
}
1085810875
}
10876+
10877+
func TestSplitSnapshotWarningStr(t *testing.T) {
10878+
defer leaktest.AfterTest(t)()
10879+
10880+
status := upToDateRaftStatus(replicas(1, 3, 5))
10881+
assert.Equal(t, "", splitSnapshotWarningStr(12, status))
10882+
10883+
pr := status.Progress[2]
10884+
pr.State = raft.ProgressStateProbe
10885+
status.Progress[2] = pr
10886+
10887+
assert.Equal(
10888+
t,
10889+
"; may cause Raft snapshot to r12/2: next = 0, match = 100, state = ProgressStateProbe,"+
10890+
" waiting = false, pendingSnapshot = 0",
10891+
splitSnapshotWarningStr(12, status),
10892+
)
10893+
}

0 commit comments

Comments
 (0)