Skip to content

Commit 720513f

Browse files
committed
scheduler: prevent spurious placement on reconnect
When a client reconnects it makes two independent RPC calls: - `Node.UpdateStatus` to heartbeat and set its status as `ready`. - `Node.UpdateAlloc` to update the status of its allocations. These two calls can happen in any order, and in case the allocations are updated before a heartbeat it causes the state to be the same as a node being disconnected: the node status will still be `disconnected` while the allocation `ClientStatus` is set to `running`. The current implementation did not handle this order of events properly, and the scheduler would create an unnecessary placement since it considered the allocation was being disconnected. This extra allocation would then be quickly stopped by the heartbeat eval. This commit adds a new code path to handle this order of events. If the node is `disconnected` and the allocation `ClientStatus` is `running` the scheduler will check if the allocation is actually reconnecting using its `AllocState` events.
1 parent 882904b commit 720513f

File tree

5 files changed

+131
-0
lines changed

5 files changed

+131
-0
lines changed

scheduler/reconcile_test.go

+29
Original file line numberDiff line numberDiff line change
@@ -5608,6 +5608,35 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
56085608
},
56095609
},
56105610
},
5611+
{
5612+
// This case tests when the disconnected client reconnects but
5613+
// sends a Node.UpdateAlloc RPC before a Node.UpdateStatus
5614+
// heartbeat, setting the ClientStatus for the previously
5615+
// disconnected allocs to "running" before the node status is set
5616+
// to "ready".
5617+
name: "reconnect-with-alloc-update-before-heartbeat",
5618+
allocCount: 5,
5619+
replace: true,
5620+
disconnectedAllocCount: 2,
5621+
disconnectedAllocStatus: structs.AllocClientStatusRunning,
5622+
disconnectedAllocStates: []*structs.AllocState{{
5623+
Field: structs.AllocStateFieldClientStatus,
5624+
Value: structs.AllocClientStatusUnknown,
5625+
Time: time.Now(),
5626+
}},
5627+
serverDesiredStatus: structs.AllocDesiredStatusRun,
5628+
nodeStatusDisconnected: true,
5629+
expected: &resultExpectation{
5630+
stop: 2,
5631+
reconnectUpdates: 2,
5632+
desiredTGUpdates: map[string]*structs.DesiredUpdates{
5633+
"web": {
5634+
Stop: 2, // stop the 2 replacements
5635+
Ignore: 5, // ignore other allocs since they are all still running
5636+
},
5637+
},
5638+
},
5639+
},
56115640
}
56125641

56135642
for _, tc := range testCases {

scheduler/reconcile_util.go

+9
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,15 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
262262
if supportsDisconnectedClients {
263263
// Filter running allocs on a node that is disconnected to be marked as unknown.
264264
if alloc.ClientStatus == structs.AllocClientStatusRunning {
265+
// Handle the case where the client updates its allocs
266+
// before sending a heartbeat. The alloc will be set to
267+
// running, but the client is still considered to be
268+
// disconnected.
269+
if reconnect {
270+
reconnecting[alloc.ID] = alloc
271+
continue
272+
}
273+
265274
disconnecting[alloc.ID] = alloc
266275
continue
267276
}

scheduler/reconcile_util_test.go

+63
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,69 @@ func TestAllocSet_filterByTainted(t *testing.T) {
783783
ignore: allocSet{},
784784
lost: allocSet{},
785785
},
786+
{
787+
// On reconnect the node may send a Node.UpdateAlloc RPC before it
788+
// heartbeats, so the alloc ClientStatus changes from "unknown" to
789+
// "running" before the client itself is marked as "ready".
790+
// This scenario is similar to a client disconnecting for the first
791+
// time, but we _should not_ create a replacement when the client
792+
// reconnects.
793+
name: "disco-client-no-placement-on-reconnect",
794+
supportsDisconnectedClients: true,
795+
now: time.Now(),
796+
taintedNodes: nodes,
797+
skipNilNodeTest: false,
798+
all: allocSet{
799+
"running-replacement": {
800+
ID: "running-replacement",
801+
Name: "web",
802+
ClientStatus: structs.AllocClientStatusRunning,
803+
DesiredStatus: structs.AllocDesiredStatusRun,
804+
Job: testJob,
805+
NodeID: "normal",
806+
TaskGroup: "web",
807+
PreviousAllocation: "running-original",
808+
},
809+
"running-original": {
810+
ID: "running-original",
811+
Name: "web",
812+
ClientStatus: structs.AllocClientStatusRunning,
813+
DesiredStatus: structs.AllocDesiredStatusRun,
814+
Job: testJob,
815+
NodeID: "disconnected",
816+
TaskGroup: "web",
817+
AllocStates: unknownAllocState,
818+
},
819+
},
820+
untainted: allocSet{
821+
"running-replacement": {
822+
ID: "running-replacement",
823+
Name: "web",
824+
ClientStatus: structs.AllocClientStatusRunning,
825+
DesiredStatus: structs.AllocDesiredStatusRun,
826+
Job: testJob,
827+
NodeID: "normal",
828+
TaskGroup: "web",
829+
PreviousAllocation: "running-original",
830+
},
831+
},
832+
migrate: allocSet{},
833+
disconnecting: allocSet{},
834+
reconnecting: allocSet{
835+
"running-original": {
836+
ID: "running-original",
837+
Name: "web",
838+
ClientStatus: structs.AllocClientStatusRunning,
839+
DesiredStatus: structs.AllocDesiredStatusRun,
840+
Job: testJob,
841+
NodeID: "disconnected",
842+
TaskGroup: "web",
843+
AllocStates: unknownAllocState,
844+
},
845+
},
846+
ignore: allocSet{},
847+
lost: allocSet{},
848+
},
786849
{
787850
// After an alloc is reconnected, it should be considered
788851
// "untainted" instead of "reconnecting" to allow changes such as

scheduler/util.go

+14
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,20 @@ func diffSystemAllocsForNode(
195195
node.Status == structs.NodeStatusDisconnected &&
196196
exist.ClientStatus == structs.AllocClientStatusRunning {
197197

198+
// Handle the case where the client updates its allocs before
199+
// sending a heartbeat. The alloc will be set to running, but
200+
// the client is still considered to be disconnected.
201+
if reconnect {
202+
reconnecting := exist.Copy()
203+
reconnecting.AppendState(structs.AllocStateFieldClientStatus, exist.ClientStatus)
204+
result.reconnecting = append(result.reconnecting, allocTuple{
205+
Name: name,
206+
TaskGroup: tg,
207+
Alloc: reconnecting,
208+
})
209+
continue
210+
}
211+
198212
disconnect := exist.Copy()
199213
disconnect.ClientStatus = structs.AllocClientStatusUnknown
200214
disconnect.AppendState(structs.AllocStateFieldClientStatus, structs.AllocClientStatusUnknown)

scheduler/util_test.go

+16
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,22 @@ func TestDiffSystemAllocsForNode_DisconnectedNode(t *testing.T) {
355355
reconnecting: 1,
356356
},
357357
},
358+
{
359+
name: "disconnected alloc reconnects even when AllocUpdate happens before heartbeat",
360+
node: disconnectedNode,
361+
allocFn: func(alloc *structs.Allocation) {
362+
alloc.ClientStatus = structs.AllocClientStatusRunning
363+
364+
alloc.AllocStates = []*structs.AllocState{{
365+
Field: structs.AllocStateFieldClientStatus,
366+
Value: structs.AllocClientStatusUnknown,
367+
Time: time.Now().Add(-time.Minute),
368+
}}
369+
},
370+
expect: diffResultCount{
371+
reconnecting: 1,
372+
},
373+
},
358374
{
359375
name: "alloc not reconnecting after it reconnects",
360376
node: readyNode,

0 commit comments

Comments
 (0)