Skip to content

Commit 2f7f862

Browse files
committed
fix deadlock in plan_apply
The plan applier has to get a snapshot with a minimum index for the plan it's working on in order to ensure consistency. Under heavy raft loads, we can exceed the timeout. When this happens, we hit a bug where the plan applier blocks waiting on the `indexCh` forever, and all schedulers will block in `Plan.Submit`. Closing the `indexCh` when the `asyncPlanWait` is done with it will prevent the deadlock without impacting correctness of the previous snapshot index. This changeset includes the a PoC failing test that works by injecting a large timeout into the state store. We need to turn this into a test we can run normally without breaking the state store before we can merge this PR.
1 parent 37ee500 commit 2f7f862

File tree

3 files changed

+82
-3
lines changed

3 files changed

+82
-3
lines changed

nomad/plan_apply.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,15 @@ func (p *planner) planApply() {
153153
// Ensure any parallel apply is complete before starting the next one.
154154
// This also limits how out of date our snapshot can be.
155155
if planIndexCh != nil {
156+
fmt.Println("waiting for idx...") // DEBUG
156157
idx := <-planIndexCh
158+
fmt.Println("got index", idx) // DEBUG
157159
prevPlanResultIndex = max(prevPlanResultIndex, idx)
158160
snap, err = p.snapshotMinIndex(prevPlanResultIndex, pending.plan.SnapshotIndex)
159161
if err != nil {
160162
p.logger.Error("failed to update snapshot state", "error", err)
161163
pending.respond(nil, err)
164+
planIndexCh = nil
162165
continue
163166
}
164167
}
@@ -368,14 +371,12 @@ func updateAllocTimestamps(allocations []*structs.Allocation, timestamp int64) {
368371
func (p *planner) asyncPlanWait(indexCh chan<- uint64, future raft.ApplyFuture,
369372
result *structs.PlanResult, pending *pendingPlan) {
370373
defer metrics.MeasureSince([]string{"nomad", "plan", "apply"}, time.Now())
374+
defer close(indexCh)
371375

372376
// Wait for the plan to apply
373377
if err := future.Error(); err != nil {
374378
p.logger.Error("failed to apply plan", "error", err)
375379
pending.respond(nil, err)
376-
377-
// Close indexCh on error
378-
close(indexCh)
379380
return
380381
}
381382

nomad/plan_endpoint_test.go

+68
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package nomad
22

33
import (
4+
"sync"
45
"testing"
56
"time"
67

@@ -9,6 +10,7 @@ import (
910
"github.com/hashicorp/nomad/nomad/mock"
1011
"github.com/hashicorp/nomad/nomad/structs"
1112
"github.com/hashicorp/nomad/testutil"
13+
"github.com/stretchr/testify/assert"
1214
"github.com/stretchr/testify/require"
1315
)
1416

@@ -128,3 +130,69 @@ func TestPlanEndpoint_Submit_Bad(t *testing.T) {
128130
// Ensure no plans were enqueued
129131
require.Zero(t, s1.planner.planQueue.Stats().Depth)
130132
}
133+
134+
func TestPlanEndpoint_ApplyDeadlock(t *testing.T) {
135+
t.Parallel()
136+
137+
s1, cleanupS1 := TestServer(t, func(c *Config) {
138+
c.NumSchedulers = 0
139+
})
140+
defer cleanupS1()
141+
testutil.WaitForLeader(t, s1.RPC)
142+
143+
plans := []*structs.Plan{}
144+
145+
for i := 0; i < 5; i++ {
146+
147+
// Create a node to place on
148+
node := mock.Node()
149+
store := s1.fsm.State()
150+
require.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 100, node))
151+
152+
// Create the eval
153+
eval1 := mock.Eval()
154+
s1.evalBroker.Enqueue(eval1)
155+
require.NoError(t, store.UpsertEvals(
156+
structs.MsgTypeTestSetup, 150, []*structs.Evaluation{eval1}))
157+
158+
evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second)
159+
require.NoError(t, err)
160+
require.Equal(t, eval1, evalOut)
161+
162+
// Submit a plan
163+
plan := mock.Plan()
164+
plan.EvalID = eval1.ID
165+
plan.EvalToken = token
166+
plan.Job = mock.Job()
167+
168+
alloc := mock.Alloc()
169+
alloc.JobID = plan.Job.ID
170+
alloc.Job = plan.Job
171+
172+
plan.NodeAllocation = map[string][]*structs.Allocation{
173+
node.ID: []*structs.Allocation{alloc}}
174+
175+
plans = append(plans, plan)
176+
}
177+
178+
var wg sync.WaitGroup
179+
180+
for _, plan := range plans {
181+
plan := plan
182+
wg.Add(1)
183+
go func() {
184+
185+
req := &structs.PlanRequest{
186+
Plan: plan,
187+
WriteRequest: structs.WriteRequest{Region: "global"},
188+
}
189+
var resp structs.PlanResponse
190+
err := s1.RPC("Plan.Submit", req, &resp)
191+
assert.NoError(t, err)
192+
assert.NotNil(t, resp.Result, "missing result")
193+
wg.Done()
194+
}()
195+
}
196+
197+
wg.Wait()
198+
}

nomad/state/state_store.go

+10
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ func (s *StateStore) Snapshot() (*StateSnapshot, error) {
204204
return snap, nil
205205
}
206206

207+
// DEBUG: this is to introduce a one-time timeout
208+
var stop = true
209+
207210
// SnapshotMinIndex is used to create a state snapshot where the index is
208211
// guaranteed to be greater than or equal to the index parameter.
209212
//
@@ -222,6 +225,13 @@ func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*State
222225
var retries uint
223226
var retryTimer *time.Timer
224227

228+
// DEBUG: this is to introduce a one-time timeout
229+
if index == 7 && stop {
230+
stop = false
231+
time.Sleep(6000 * time.Millisecond)
232+
return nil, ctx.Err()
233+
}
234+
225235
// XXX: Potential optimization is to set up a watch on the state
226236
// store's index table and only unblock via a trigger rather than
227237
// polling.

0 commit comments

Comments
 (0)