Skip to content

Commit

Permalink
Merge pull request #20939 from bdarnell/cherrypick-split-atomic
Browse files Browse the repository at this point in the history
cherrypick-1.1: storage: Write HardState atomically with committing splits
  • Loading branch information
bdarnell authored Dec 20, 2017
2 parents 5fe4455 + d2e0bec commit 02bdfdf
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 13 deletions.
22 changes: 21 additions & 1 deletion pkg/storage/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
"github.com/coreos/etcd/raft/raftpb"
"github.com/google/btree"
"github.com/kr/pretty"
"github.com/opentracing/opentracing-go"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"golang.org/x/net/context"

Expand Down Expand Up @@ -4414,6 +4414,26 @@ func (r *Replica) processRaftCommand(
raftCmd.ReplicatedEvalResult.AddSSTable = nil
}

if raftCmd.ReplicatedEvalResult.Split != nil {
// Splits require a new HardState to be written to the new RHS
// range (and this needs to be atomic with the main batch). This
// cannot be constructed at evaluation time because it differs
// on each replica (votes may have already been cast on the
// uninitialized replica). Transform the write batch to add the
// updated HardState.
// See https://github.com/cockroachdb/cockroach/issues/20629
//
// This is not the most efficient, but it only happens on splits,
// which are relatively infrequent and don't write much data.
tmpBatch := r.store.engine.NewBatch()
if err := tmpBatch.ApplyBatchRepr(writeBatch.Data, false); err != nil {
log.Fatal(ctx, err)
}
splitPreApply(ctx, r.store.cfg.Settings, tmpBatch, raftCmd.ReplicatedEvalResult.Split.SplitTrigger)
writeBatch.Data = tmpBatch.Repr()
tmpBatch.Close()
}

{
var err error
raftCmd.ReplicatedEvalResult.Delta, err = r.applyRaftCommand(
Expand Down
28 changes: 16 additions & 12 deletions pkg/storage/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/google/btree"
"github.com/opentracing/opentracing-go"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"golang.org/x/net/context"
"golang.org/x/time/rate"
Expand Down Expand Up @@ -1797,6 +1797,21 @@ func (s *Store) NewRangeDescriptor(
return desc, nil
}

// splitPreApply is called when the raft command is applied. Any
// changes to the given ReadWriter will be written atomically with the
// split commit.
func splitPreApply(
ctx context.Context, st *cluster.Settings, eng engine.ReadWriter, split roachpb.SplitTrigger,
) {
// Update the raft HardState with the new Commit value now that the
// replica is initialized (combining it with existing or default
// Term and Vote).
rsl := makeReplicaStateLoader(split.RightDesc.RangeID)
if err := rsl.synthesizeRaftState(ctx, eng); err != nil {
log.Fatal(ctx, err)
}
}

// splitPostApply is the part of the split trigger which coordinates the actual
// split with the Store. Requires that Replica.raftMu is held.
//
Expand All @@ -1821,18 +1836,7 @@ func splitPostApply(
}
}

// Finish up the initialization of the RHS' RaftState now that we have
// committed the split Batch (which included the initialization of the
// ReplicaState). This will synthesize and persist the correct lastIndex and
// HardState.
if err := makeReplicaStateLoader(split.RightDesc.RangeID).synthesizeRaftState(
ctx, r.store.Engine(),
); err != nil {
log.Fatal(ctx, err)
}

// Finish initialization of the RHS.

r.mu.Lock()
rightRng.mu.Lock()
// Copy the minLeaseProposedTS from the LHS.
Expand Down

0 comments on commit 02bdfdf

Please sign in to comment.