@@ -3,6 +3,7 @@ package raft
3
3
import (
4
4
"errors"
5
5
"fmt"
6
+ "io"
6
7
"log"
7
8
"os"
8
9
"strconv"
@@ -64,11 +65,14 @@ type Raft struct {
64
65
// FSM is the client state machine to apply commands to
65
66
fsm FSM
66
67
67
- // fsmCommitCh is used to trigger async application of logs to the fsm
68
- fsmCommitCh chan commitTuple
69
-
70
- // fsmRestoreCh is used to trigger a restore from snapshot
71
- fsmRestoreCh chan * restoreFuture
68
+ // fsmMutateCh is used to send state-changing updates to the FSM. This
69
+ // receives pointers to commitTuple structures when applying logs or
70
+ // pointers to restoreFuture structures when restoring a snapshot. We
71
+ // need control over the order of these operations when doing user
72
+ // restores so that we finish applying any old log applies before we
73
+ // take a user snapshot on the leader, otherwise we might restore the
74
+ // snapshot and apply old logs to it that were in the pipe.
75
+ fsmMutateCh chan interface {}
72
76
73
77
// fsmSnapshotCh is used to trigger a new snapshot being taken
74
78
fsmSnapshotCh chan * reqSnapshotFuture
@@ -118,8 +122,12 @@ type Raft struct {
118
122
// snapshots is used to store and retrieve snapshots
119
123
snapshots SnapshotStore
120
124
121
- // snapshotCh is used for user triggered snapshots
122
- snapshotCh chan * snapshotFuture
125
+ // userSnapshotCh is used for user-triggered snapshots
126
+ userSnapshotCh chan * userSnapshotFuture
127
+
128
+ // userRestoreCh is used for user-triggered restores of external
129
+ // snapshots
130
+ userRestoreCh chan * userRestoreFuture
123
131
124
132
// stable is a StableStore implementation for durable state
125
133
// It provides stable storage for many fields in raftState
@@ -429,8 +437,7 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
429
437
applyCh : make (chan * logFuture ),
430
438
conf : * conf ,
431
439
fsm : fsm ,
432
- fsmCommitCh : make (chan commitTuple , 128 ),
433
- fsmRestoreCh : make (chan * restoreFuture ),
440
+ fsmMutateCh : make (chan interface {}, 128 ),
434
441
fsmSnapshotCh : make (chan * reqSnapshotFuture ),
435
442
leaderCh : make (chan bool ),
436
443
localID : localID ,
@@ -441,7 +448,8 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna
441
448
configurations : configurations {},
442
449
rpcCh : trans .Consumer (),
443
450
snapshots : snaps ,
444
- snapshotCh : make (chan * snapshotFuture ),
451
+ userSnapshotCh : make (chan * userSnapshotFuture ),
452
+ userRestoreCh : make (chan * userRestoreFuture ),
445
453
shutdownCh : make (chan struct {}),
446
454
stable : stable ,
447
455
trans : trans ,
@@ -792,18 +800,78 @@ func (r *Raft) Shutdown() Future {
792
800
return & shutdownFuture {nil }
793
801
}
794
802
795
- // Snapshot is used to manually force Raft to take a snapshot.
796
- // Returns a future that can be used to block until complete.
797
- func (r * Raft ) Snapshot () Future {
798
- snapFuture := & snapshotFuture {}
799
- snapFuture .init ()
803
+ // Snapshot is used to manually force Raft to take a snapshot. Returns a future
804
+ // that can be used to block until complete, and that contains a function that
805
+ // can be used to open the snapshot.
806
+ func (r * Raft ) Snapshot () SnapshotFuture {
807
+ future := & userSnapshotFuture {}
808
+ future .init ()
809
+ select {
810
+ case r .userSnapshotCh <- future :
811
+ return future
812
+ case <- r .shutdownCh :
813
+ future .respond (ErrRaftShutdown )
814
+ return future
815
+ }
816
+ }
817
+
818
+ // Restore is used to manually force Raft to consume an external snapshot, such
819
+ // as if restoring from a backup. We will use the current Raft configuration,
820
+ // not the one from the snapshot, so that we can restore into a new cluster. We
821
+ // will also use the higher of the index of the snapshot, or the current index,
822
+ // and then add 1 to that, so we force a new state with a hole in the Raft log,
823
+ // so that the snapshot will be sent to followers and used for any new joiners.
824
+ // This can only be run on the leader, and returns a future that can be used to
825
+ // block until complete.
826
+ //
827
+ // WARNING! This operation has the leader take on the state of the snapshot and
828
+ // then sets itself up so that it replicates that to its followers though the
829
+ // install snapshot process. This involves a potentially dangerous period where
830
+ // the leader commits ahead of its followers, so should only be used for disaster
831
+ // recovery into a fresh cluster, and should not be used in normal operations.
832
+ func (r * Raft ) Restore (meta * SnapshotMeta , reader io.ReadCloser , timeout time.Duration ) Future {
833
+ metrics .IncrCounter ([]string {"raft" , "restore" }, 1 )
834
+ var timer <- chan time.Time
835
+ if timeout > 0 {
836
+ timer = time .After (timeout )
837
+ }
838
+
839
+ // Perform the restore.
840
+ restore := & userRestoreFuture {
841
+ meta : meta ,
842
+ reader : reader ,
843
+ }
844
+ restore .init ()
800
845
select {
801
- case r . snapshotCh <- snapFuture :
802
- return snapFuture
846
+ case <- timer :
847
+ return errorFuture { ErrEnqueueTimeout }
803
848
case <- r .shutdownCh :
804
849
return errorFuture {ErrRaftShutdown }
850
+ case r .userRestoreCh <- restore :
851
+ // If the restore is ingested then wait for it to complete.
852
+ if err := restore .Error (); err != nil {
853
+ return restore
854
+ }
805
855
}
806
856
857
+ // Apply a no-op log entry. Waiting for this allows us to wait until the
858
+ // followers have gotten the restore and replicated at least this new
859
+ // entry, which shows that we've also faulted and installed the
860
+ // snapshot with the contents of the restore.
861
+ noop := & logFuture {
862
+ log : Log {
863
+ Type : LogNoop ,
864
+ },
865
+ }
866
+ noop .init ()
867
+ select {
868
+ case <- timer :
869
+ return errorFuture {ErrEnqueueTimeout }
870
+ case <- r .shutdownCh :
871
+ return errorFuture {ErrRaftShutdown }
872
+ case r .applyCh <- noop :
873
+ return noop
874
+ }
807
875
}
808
876
809
877
// State is used to return the current raft state.
@@ -870,7 +938,7 @@ func (r *Raft) Stats() map[string]string {
870
938
"last_log_term" : toString (lastLogTerm ),
871
939
"commit_index" : toString (r .getCommitIndex ()),
872
940
"applied_index" : toString (r .getLastApplied ()),
873
- "fsm_pending" : toString (uint64 (len (r .fsmCommitCh ))),
941
+ "fsm_pending" : toString (uint64 (len (r .fsmMutateCh ))),
874
942
"last_snapshot_index" : toString (lastSnapIndex ),
875
943
"last_snapshot_term" : toString (lastSnapTerm ),
876
944
"protocol_version" : toString (uint64 (r .protocolVersion )),
0 commit comments