Skip to content

Commit fe8cdcd

Browse files
committed
Puts FSM commits and restores into a single pipe so we control order.
1 parent 41b9e23 commit fe8cdcd

File tree

3 files changed

+84
-63
lines changed

3 files changed

+84
-63
lines changed

api.go

+10-8
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,14 @@ type Raft struct {
6565
// FSM is the client state machine to apply commands to
6666
fsm FSM
6767

68-
// fsmCommitCh is used to trigger async application of logs to the fsm
69-
fsmCommitCh chan commitTuple
70-
71-
// fsmRestoreCh is used to trigger a restore from snapshot
72-
fsmRestoreCh chan *restoreFuture
68+
// fsmMutateCh is used to send state-changing updates to the FSM. This
69+
// receives pointers to commitTuple structures when applying logs or
70+
// pointers to restoreFuture structures when restoring a snapshot. We
71+
// need control over the order of these operations when doing user
72+
// restores so that we finish applying any old log applies before we
73+
// take a user snapshot on the leader, otherwise we might restore the
74+
// snapshot and apply old logs to it that were in the pipe.
75+
fsmMutateCh chan interface{}
7376

7477
// fsmSnapshotCh is used to trigger a new snapshot being taken
7578
fsmSnapshotCh chan *reqSnapshotFuture
@@ -434,8 +437,7 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
434437
applyCh: make(chan *logFuture),
435438
conf: *conf,
436439
fsm: fsm,
437-
fsmCommitCh: make(chan commitTuple, 128),
438-
fsmRestoreCh: make(chan *restoreFuture),
440+
fsmMutateCh: make(chan interface{}, 128),
439441
fsmSnapshotCh: make(chan *reqSnapshotFuture),
440442
leaderCh: make(chan bool),
441443
localID: localID,
@@ -936,7 +938,7 @@ func (r *Raft) Stats() map[string]string {
936938
"last_log_term": toString(lastLogTerm),
937939
"commit_index": toString(r.getCommitIndex()),
938940
"applied_index": toString(r.getLastApplied()),
939-
"fsm_pending": toString(uint64(len(r.fsmCommitCh))),
941+
"fsm_pending": toString(uint64(len(r.fsmMutateCh))),
940942
"last_snapshot_index": toString(lastSnapIndex),
941943
"last_snapshot_term": toString(lastSnapTerm),
942944
"protocol_version": toString(uint64(r.protocolVersion)),

fsm.go

+71-52
Original file line numberDiff line numberDiff line change
@@ -48,68 +48,87 @@ type FSMSnapshot interface {
4848
// the FSM to block our internal operations.
4949
func (r *Raft) runFSM() {
5050
var lastIndex, lastTerm uint64
51-
for {
52-
select {
53-
case req := <-r.fsmRestoreCh:
54-
// Open the snapshot
55-
meta, source, err := r.snapshots.Open(req.ID)
56-
if err != nil {
57-
req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err))
58-
continue
59-
}
6051

61-
// Attempt to restore
52+
commit := func(req *commitTuple) {
53+
// Apply the log if a command
54+
var resp interface{}
55+
if req.log.Type == LogCommand {
6256
start := time.Now()
63-
if err := r.fsm.Restore(source); err != nil {
64-
req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err))
65-
source.Close()
66-
continue
67-
}
57+
resp = r.fsm.Apply(req.log)
58+
metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start)
59+
}
60+
61+
// Update the indexes
62+
lastIndex = req.log.Index
63+
lastTerm = req.log.Term
64+
65+
// Invoke the future if given
66+
if req.future != nil {
67+
req.future.response = resp
68+
req.future.respond(nil)
69+
}
70+
}
71+
72+
restore := func(req *restoreFuture) {
73+
// Open the snapshot
74+
meta, source, err := r.snapshots.Open(req.ID)
75+
if err != nil {
76+
req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err))
77+
return
78+
}
79+
80+
// Attempt to restore
81+
start := time.Now()
82+
if err := r.fsm.Restore(source); err != nil {
83+
req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err))
6884
source.Close()
69-
metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start)
85+
return
86+
}
87+
source.Close()
88+
metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start)
7089

71-
// Update the last index and term
72-
lastIndex = meta.Index
73-
lastTerm = meta.Term
74-
req.respond(nil)
90+
// Update the last index and term
91+
lastIndex = meta.Index
92+
lastTerm = meta.Term
93+
req.respond(nil)
94+
}
7595

76-
case req := <-r.fsmSnapshotCh:
77-
// Is there something to snapshot?
78-
if lastIndex == 0 {
79-
req.respond(ErrNothingNewToSnapshot)
80-
continue
81-
}
96+
snapshot := func(req *reqSnapshotFuture) {
97+
// Is there something to snapshot?
98+
if lastIndex == 0 {
99+
req.respond(ErrNothingNewToSnapshot)
100+
return
101+
}
82102

83-
// Start a snapshot
84-
start := time.Now()
85-
snap, err := r.fsm.Snapshot()
86-
metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start)
87-
88-
// Respond to the request
89-
req.index = lastIndex
90-
req.term = lastTerm
91-
req.snapshot = snap
92-
req.respond(err)
93-
94-
case commitEntry := <-r.fsmCommitCh:
95-
// Apply the log if a command
96-
var resp interface{}
97-
if commitEntry.log.Type == LogCommand {
98-
start := time.Now()
99-
resp = r.fsm.Apply(commitEntry.log)
100-
metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start)
101-
}
103+
// Start a snapshot
104+
start := time.Now()
105+
snap, err := r.fsm.Snapshot()
106+
metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start)
102107

103-
// Update the indexes
104-
lastIndex = commitEntry.log.Index
105-
lastTerm = commitEntry.log.Term
108+
// Respond to the request
109+
req.index = lastIndex
110+
req.term = lastTerm
111+
req.snapshot = snap
112+
req.respond(err)
113+
}
114+
115+
for {
116+
select {
117+
case ptr := <-r.fsmMutateCh:
118+
switch req := ptr.(type) {
119+
case *commitTuple:
120+
commit(req)
106121

107-
// Invoke the future if given
108-
if commitEntry.future != nil {
109-
commitEntry.future.response = resp
110-
commitEntry.future.respond(nil)
122+
case *restoreFuture:
123+
restore(req)
124+
125+
default:
126+
panic(fmt.Errorf("bad type passed to fsmMutateCh: %#v", ptr))
111127
}
112128

129+
case req := <-r.fsmSnapshotCh:
130+
snapshot(req)
131+
113132
case <-r.shutdownCh:
114133
return
115134
}

raft.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,7 @@ func (r *Raft) restoreUserSnapshot(meta *SnapshotMeta, reader io.ReadCloser) err
757757
fsm := &restoreFuture{ID: sink.ID()}
758758
fsm.init()
759759
select {
760-
case r.fsmRestoreCh <- fsm:
760+
case r.fsmMutateCh <- fsm:
761761
case <-r.shutdownCh:
762762
return ErrRaftShutdown
763763
}
@@ -902,7 +902,7 @@ func (r *Raft) processLog(l *Log, future *logFuture) {
902902
case LogCommand:
903903
// Forward to the fsm handler
904904
select {
905-
case r.fsmCommitCh <- commitTuple{l, future}:
905+
case r.fsmMutateCh <- &commitTuple{l, future}:
906906
case <-r.shutdownCh:
907907
if future != nil {
908908
future.respond(ErrRaftShutdown)
@@ -1302,7 +1302,7 @@ func (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) {
13021302
future := &restoreFuture{ID: sink.ID()}
13031303
future.init()
13041304
select {
1305-
case r.fsmRestoreCh <- future:
1305+
case r.fsmMutateCh <- future:
13061306
case <-r.shutdownCh:
13071307
future.respond(ErrRaftShutdown)
13081308
return

0 commit comments

Comments
 (0)