Skip to content

Commit 41c5318

Browse files
committed
proof of concept for injecting faults into FSM.Apply
This changeset is a proof-of-concept for a fault injection interface into the `FSM.Apply` function. This would allow us to introduce timeouts or errors in unit testing by adding a LogApplier implementation to a map of `interceptionAppliers`. This is similar to how we register LogAppliers for the enterprise FSM functions currently. Most interception appliers are expected to then call the normal applier directly. This was developed initially for #13407 but can't be used to reproduce that particular bug. But I'm opening this PR for further discussion about whether this is a worthwhile tool to have for testing otherwise.
1 parent 5e0964e commit 41c5318

File tree

2 files changed

+32
-9
lines changed

2 files changed

+32
-9
lines changed

nomad/fsm.go

+19-9
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ type nomadFSM struct {
9292
// enterpriseRestorers holds the set of enterprise only snapshot restorers
9393
enterpriseRestorers SnapshotRestorers
9494

95+
// faultInjectionAppliers holds a set of test-only LogAppliers
96+
// used to intercept raft messages to inject faults
97+
interceptionAppliers LogAppliers
98+
9599
// stateLock is only used to protect outside callers to State() from
96100
// racing with Restore(), which is called by Raft (it puts in a totally
97101
// new state store). Everything internal here is synchronized by the
@@ -153,15 +157,16 @@ func NewFSM(config *FSMConfig) (*nomadFSM, error) {
153157
}
154158

155159
fsm := &nomadFSM{
156-
evalBroker: config.EvalBroker,
157-
periodicDispatcher: config.Periodic,
158-
blockedEvals: config.Blocked,
159-
logger: config.Logger.Named("fsm"),
160-
config: config,
161-
state: state,
162-
timetable: NewTimeTable(timeTableGranularity, timeTableLimit),
163-
enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8),
164-
enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8),
160+
evalBroker: config.EvalBroker,
161+
periodicDispatcher: config.Periodic,
162+
blockedEvals: config.Blocked,
163+
logger: config.Logger.Named("fsm"),
164+
config: config,
165+
state: state,
166+
timetable: NewTimeTable(timeTableGranularity, timeTableLimit),
167+
enterpriseAppliers: make(map[structs.MessageType]LogApplier, 8),
168+
enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8),
169+
interceptionAppliers: make(map[structs.MessageType]LogApplier, 8),
165170
}
166171

167172
// Register all the log applier functions
@@ -207,6 +212,11 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} {
207212
ignoreUnknown = true
208213
}
209214

215+
// Check interception message types.
216+
if applier, ok := n.interceptionAppliers[msgType]; ok {
217+
return applier(buf[1:], log.Index)
218+
}
219+
210220
switch msgType {
211221
case structs.NodeRegisterRequestType:
212222
return n.applyUpsertNode(msgType, buf[1:], log.Index)

nomad/plan_endpoint_test.go

+13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package nomad
22

33
import (
4+
"fmt"
45
"sync"
56
"testing"
67
"time"
@@ -140,6 +141,18 @@ func TestPlanEndpoint_ApplyConcurrent(t *testing.T) {
140141
defer cleanupS1()
141142
testutil.WaitForLeader(t, s1.RPC)
142143

144+
planApplyFn := func(buf []byte, index uint64) interface{} {
145+
if index == 8 {
146+
fmt.Println("introducing delay")
147+
time.Sleep(6000 * time.Millisecond)
148+
}
149+
return s1.fsm.applyPlanResults(structs.MsgTypeTestSetup, buf, index)
150+
}
151+
152+
s1.fsm.interceptionAppliers = map[structs.MessageType]LogApplier{
153+
structs.ApplyPlanResultsRequestType: planApplyFn,
154+
}
155+
143156
plans := []*structs.Plan{}
144157

145158
for i := 0; i < 5; i++ {

0 commit comments

Comments
 (0)