@@ -297,16 +297,18 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
297
297
}
298
298
299
299
// If the eval is from a running "batch" job we don't want to garbage
300
- // collect its allocations. If there is a long running batch job and its
301
- // terminal allocations get GC'd the scheduler would re-run the
302
- // allocations .
300
+ // collect its most current allocations. If there is a long running batch job and its
301
+ // terminal allocations get GC'd the scheduler would re-run the allocations. However,
302
+ // we do want to GC old Evals and Allocs if there are newer ones due to an update .
303
303
if eval .Type == structs .JobTypeBatch {
304
304
// Check if the job is running
305
305
306
- // Can collect if:
307
- // Job doesn't exist
308
- // Job is Stopped and dead
309
- // allowBatch and the job is dead
306
+ // Can collect if either holds:
307
+ // - Job doesn't exist
308
+ // - Job is Stopped and dead
309
+ // - allowBatch and the job is dead
310
+ //
311
+ // If we cannot collect outright, check if a partial GC may occur
310
312
collect := false
311
313
if job == nil {
312
314
collect = true
@@ -318,12 +320,9 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
318
320
collect = true
319
321
}
320
322
321
- // We don't want to gc anything related to a job which is not dead
322
- // If the batch job doesn't exist we can GC it regardless of allowBatch
323
323
if ! collect {
324
- // Find allocs associated with older (based on createindex) and GC them if terminal
325
- oldAllocs := olderVersionTerminalAllocs (allocs , job )
326
- return false , oldAllocs , nil
324
+ oldAllocs , gcEval := olderVersionTerminalAllocs (allocs , job , thresholdIndex )
325
+ return gcEval , oldAllocs , nil
327
326
}
328
327
}
329
328
@@ -344,16 +343,20 @@ func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64,
344
343
return gcEval , gcAllocIDs , nil
345
344
}
346
345
347
- // olderVersionTerminalAllocs returns terminal allocations whose job create index
348
- // is older than the job's create index
349
- func olderVersionTerminalAllocs (allocs []* structs.Allocation , job * structs.Job ) []string {
346
+ // olderVersionTerminalAllocs returns a tuplie ([]string, bool). The first element is the list of
347
+ // terminal allocations which may be garbage collected for batch jobs. The second element indicates
348
+ // whether or not the allocation itself may be garbage collected.
349
+ func olderVersionTerminalAllocs (allocs []* structs.Allocation , job * structs.Job , thresholdIndex uint64 ) ([]string , bool ) {
350
350
var ret []string
351
+ var mayGCEval = true
351
352
for _ , alloc := range allocs {
352
- if alloc .Job != nil && alloc .Job . CreateIndex < job . CreateIndex && alloc .TerminalStatus () {
353
+ if alloc .CreateIndex < job . JobModifyIndex && alloc .ModifyIndex < thresholdIndex && alloc .TerminalStatus () {
353
354
ret = append (ret , alloc .ID )
355
+ } else {
356
+ mayGCEval = false
354
357
}
355
358
}
356
- return ret
359
+ return ret , mayGCEval
357
360
}
358
361
359
362
// evalReap contacts the leader and issues a reap on the passed evals and
@@ -1121,4 +1124,4 @@ func (c *CoreScheduler) getOldestAllocationIndex() (uint64, error) {
1121
1124
}
1122
1125
}
1123
1126
return 0 , nil
1124
- }
1127
+ }
0 commit comments