Skip to content

Commit dd628be

Browse files
committed
[wip] CSI: unique volume per allocation
1 parent f270a44 commit dd628be

File tree

14 files changed

+70
-10
lines changed

14 files changed

+70
-10
lines changed

api/tasks.go

+1
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ type VolumeRequest struct {
382382
Source string `hcl:"source,optional"`
383383
ReadOnly bool `hcl:"read_only,optional"`
384384
MountOptions *CSIMountOptions `hcl:"mount_options,block"`
385+
PerAlloc bool `hcl:"per_alloc,optional"`
385386
ExtraKeysHCL []string `hcl1:",unusedKeys,optional" json:"-"`
386387
}
387388

command/agent/job_endpoint.go

+1
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,7 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta
944944
Type: v.Type,
945945
ReadOnly: v.ReadOnly,
946946
Source: v.Source,
947+
PerAlloc: v.PerAlloc,
947948
}
948949

949950
if v.MountOptions != nil {

nomad/structs/diff_test.go

+7
Original file line numberDiff line numberDiff line change
@@ -3639,6 +3639,7 @@ func TestTaskGroupDiff(t *testing.T) {
36393639
Type: "host",
36403640
Source: "foo-src",
36413641
ReadOnly: true,
3642+
PerAlloc: true,
36423643
},
36433644
},
36443645
},
@@ -3656,6 +3657,12 @@ func TestTaskGroupDiff(t *testing.T) {
36563657
Old: "",
36573658
New: "foo",
36583659
},
3660+
{
3661+
Type: DiffTypeAdded,
3662+
Name: "PerAlloc",
3663+
Old: "",
3664+
New: "true",
3665+
},
36593666
{
36603667
Type: DiffTypeAdded,
36613668
Name: "ReadOnly",

nomad/structs/funcs.go

+11
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,17 @@ func AllocName(job, group string, idx uint) string {
329329
return fmt.Sprintf("%s.%s[%d]", job, group, idx)
330330
}
331331

332+
// AllocSuffix returns the alloc index suffix that was added by the AllocName
333+
// function above.
334+
func AllocSuffix(name string) string {
335+
idx := strings.LastIndex(name, "[")
336+
if idx == -1 {
337+
return ""
338+
}
339+
suffix := name[idx:]
340+
return suffix
341+
}
342+
332343
// ACLPolicyListHash returns a consistent hash for a set of policies.
333344
func ACLPolicyListHash(policies []*ACLPolicy) string {
334345
cacheKeyHash, err := blake2b.New256(nil)

nomad/structs/structs.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -6097,14 +6097,19 @@ func (tg *TaskGroup) Validate(j *Job) error {
60976097
mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader"))
60986098
}
60996099

6100-
// Validate the Host Volumes
6100+
// Validate the volume requests
61016101
for name, decl := range tg.Volumes {
61026102
if !(decl.Type == VolumeTypeHost ||
61036103
decl.Type == VolumeTypeCSI) {
61046104
mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has unrecognised type %s", name, decl.Type))
61056105
continue
61066106
}
61076107

6108+
if decl.PerAlloc && tg.Update != nil && tg.Update.Canary > 0 {
6109+
mErr.Errors = append(mErr.Errors,
6110+
fmt.Errorf("Volume %s cannot be per_alloc when canaries are in use", name))
6111+
}
6112+
61086113
if decl.Source == "" {
61096114
mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has an empty source", name))
61106115
}

nomad/structs/structs_test.go

+22
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,28 @@ func TestTaskGroup_Validate(t *testing.T) {
11061106
err = tg.Validate(&Job{})
11071107
require.Contains(t, err.Error(), `Volume foo has an empty source`)
11081108

1109+
tg = &TaskGroup{
1110+
Name: "group-a",
1111+
Update: &UpdateStrategy{
1112+
Canary: 1,
1113+
},
1114+
Volumes: map[string]*VolumeRequest{
1115+
"foo": {
1116+
Type: "csi",
1117+
PerAlloc: true,
1118+
},
1119+
},
1120+
Tasks: []*Task{
1121+
{
1122+
Name: "task-a",
1123+
Resources: &Resources{},
1124+
},
1125+
},
1126+
}
1127+
err = tg.Validate(&Job{})
1128+
require.Contains(t, err.Error(), `Volume foo has an empty source`)
1129+
require.Contains(t, err.Error(), `Volume foo cannot be per_alloc when canaries are in use`)
1130+
11091131
tg = &TaskGroup{
11101132
Volumes: map[string]*VolumeRequest{
11111133
"foo": {

nomad/structs/volumes.go

+1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ type VolumeRequest struct {
9191
Source string
9292
ReadOnly bool
9393
MountOptions *CSIMountOptions
94+
PerAlloc bool
9495
}
9596

9697
func (v *VolumeRequest) Copy() *VolumeRequest {

scheduler/feasible.go

+11-3
Original file line numberDiff line numberDiff line change
@@ -227,15 +227,23 @@ func (c *CSIVolumeChecker) SetNamespace(namespace string) {
227227
c.namespace = namespace
228228
}
229229

230-
func (c *CSIVolumeChecker) SetVolumes(volumes map[string]*structs.VolumeRequest) {
230+
func (c *CSIVolumeChecker) SetVolumes(allocName string, volumes map[string]*structs.VolumeRequest) {
231+
231232
xs := make(map[string]*structs.VolumeRequest)
233+
232234
// Filter to only CSI Volumes
233235
for alias, req := range volumes {
234236
if req.Type != structs.VolumeTypeCSI {
235237
continue
236238
}
237-
238-
xs[alias] = req
239+
if req.PerAlloc {
240+
// provide a unique volume source per allocation
241+
copied := req.Copy()
242+
copied.Source = copied.Source + structs.AllocSuffix(allocName)
243+
xs[alias] = copied
244+
} else {
245+
xs[alias] = req
246+
}
239247
}
240248
c.volumes = xs
241249
}

scheduler/feasible_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ func TestCSIVolumeChecker(t *testing.T) {
390390
}
391391

392392
for i, c := range cases {
393-
checker.SetVolumes(c.RequestedVolumes)
393+
checker.SetVolumes("group.task[0]", c.RequestedVolumes)
394394
if act := checker.Feasible(c.Node); act != c.Result {
395395
t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result)
396396
}

scheduler/generic_sched.go

+1
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
547547

548548
// Compute penalty nodes for rescheduled allocs
549549
selectOptions := getSelectOptions(prevAllocation, preferredNode)
550+
selectOptions.AllocName = missing.Name()
550551
option := s.selectNextOption(tg, selectOptions)
551552

552553
// Store the available nodes by datacenter

scheduler/stack.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ type SelectOptions struct {
3535
PenaltyNodeIDs map[string]struct{}
3636
PreferredNodes []*structs.Node
3737
Preempt bool
38+
AllocName string
3839
}
3940

4041
// GenericStack is the Stack used for the Generic scheduler. It is
@@ -143,7 +144,7 @@ func (s *GenericStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ra
143144
s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
144145
s.taskGroupDevices.SetTaskGroup(tg)
145146
s.taskGroupHostVolumes.SetVolumes(tg.Volumes)
146-
s.taskGroupCSIVolumes.SetVolumes(tg.Volumes)
147+
s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes)
147148
if len(tg.Networks) > 0 {
148149
s.taskGroupNetwork.SetNetwork(tg.Networks[0])
149150
}
@@ -297,7 +298,7 @@ func (s *SystemStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ran
297298
s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
298299
s.taskGroupDevices.SetTaskGroup(tg)
299300
s.taskGroupHostVolumes.SetVolumes(tg.Volumes)
300-
s.taskGroupCSIVolumes.SetVolumes(tg.Volumes)
301+
s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes)
301302
if len(tg.Networks) > 0 {
302303
s.taskGroupNetwork.SetNetwork(tg.Networks[0])
303304
}

scheduler/system_sched.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
284284
s.stack.SetNodes(nodes)
285285

286286
// Attempt to match the task group
287-
option := s.stack.Select(missing.TaskGroup, nil)
287+
option := s.stack.Select(missing.TaskGroup, &SelectOptions{AllocName: missing.Name})
288288

289289
if option == nil {
290290
// If the task can't be placed on this node, update reporting data

scheduler/util.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,8 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
695695
ctx.Plan().AppendStoppedAlloc(update.Alloc, allocInPlace, "", "")
696696

697697
// Attempt to match the task group
698-
option := stack.Select(update.TaskGroup, nil) // This select only looks at one node so we don't pass selectOptions
698+
option := stack.Select(update.TaskGroup,
699+
&SelectOptions{AllocName: update.Alloc.Name})
699700

700701
// Pop the allocation
701702
ctx.Plan().PopUpdate(update.Alloc)
@@ -977,7 +978,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
977978
ctx.Plan().AppendStoppedAlloc(existing, allocInPlace, "", "")
978979

979980
// Attempt to match the task group
980-
option := stack.Select(newTG, nil) // This select only looks at one node so we don't pass selectOptions
981+
option := stack.Select(newTG, &SelectOptions{AllocName: existing.Name})
981982

982983
// Pop the allocation
983984
ctx.Plan().PopUpdate(existing)

vendor/github.com/hashicorp/nomad/api/tasks.go

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)