Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

networking: Inject implicit constraints on CNI plugins when using bridge mode #15473

Merged
merged 4 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/15473.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
networking: Inject constraints on CNI plugins when using bridge networking
```
3 changes: 2 additions & 1 deletion client/allocrunner/networking_bridge_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ func buildNomadBridgeNetConfig(b bridgeNetworkConfigurator) []byte {
}

// Update website/content/docs/networking/cni.mdx when the bridge configuration
// is modified.
// is modified. If CNI plugins are added or versions need to be updated for new
// fields, add a new constraint to nomad/job_endpoint_hooks.go
const nomadCNIConfigTemplate = `{
"cniVersion": "0.4.0",
"name": "nomad",
Expand Down
68 changes: 67 additions & 1 deletion nomad/job_endpoint_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,24 @@ import (
"golang.org/x/exp/maps"
)

// Node attributes acquired via fingerprinting.
const (
attrVaultVersion = `${attr.vault.version}`
attrConsulVersion = `${attr.consul.version}`
attrNomadVersion = `${attr.nomad.version}`
attrNomadServiceDisco = `${attr.nomad.service_discovery}`
attrBridgeCNI = `${attr.plugins.cni.version.bridge}`
attrFirewallCNI = `${attr.plugins.cni.version.firewall}`
attrHostLocalCNI = `${attr.plugins.cni.version.host-local}`
attrLoopbackCNI = `${attr.plugins.cni.version.loopback}`
attrPortMapCNI = `${attr.plugins.cni.version.portmap}`
)

// cniMinVersion is the version expression for the minimum CNI version supported
// for the CNI container-networking plugins. Support was added at v0.4.0, so
// we set the minimum to that.
const cniMinVersion = ">= 0.4.0"

var (
// vaultConstraint is the implicit constraint added to jobs requesting a
// Vault token
Expand Down Expand Up @@ -78,6 +89,51 @@ var (
RTarget: "linux",
Operand: "=",
}

// cniBridgeConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniBridgeConstraint = &structs.Constraint{
LTarget: attrBridgeCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniFirewallConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniFirewallConstraint = &structs.Constraint{
LTarget: attrFirewallCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniHostLocalConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniHostLocalConstraint = &structs.Constraint{
LTarget: attrHostLocalCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniLoopbackConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniLoopbackConstraint = &structs.Constraint{
LTarget: attrLoopbackCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniPortMapConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniPortMapConstraint = &structs.Constraint{
LTarget: attrPortMapCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}
)

type admissionController interface {
Expand Down Expand Up @@ -192,12 +248,14 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro
// Identify which task groups are utilizing NUMA resources.
numaTaskGroups := j.RequiredNUMA()

bridgeNetworkingTaskGroups := j.RequiredBridgeNetwork()

// Hot path where none of our things require constraints.
//
// [UPDATE THIS] if you are adding a new constraint thing!
if len(signals) == 0 && len(vaultBlocks) == 0 &&
nativeServiceDisco.Empty() && len(consulServiceDisco) == 0 &&
numaTaskGroups.Empty() {
numaTaskGroups.Empty() && bridgeNetworkingTaskGroups.Empty() {
return j, nil, nil
}

Expand Down Expand Up @@ -254,6 +312,14 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro
}
}
}

if bridgeNetworkingTaskGroups.Contains(tg.Name) {
mutateConstraint(constraintMatcherLeft, tg, cniBridgeConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniFirewallConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniHostLocalConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniLoopbackConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniPortMapConstraint)
}
}

return j, nil, nil
Expand Down
34 changes: 34 additions & 0 deletions nomad/job_endpoint_hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,40 @@ func Test_jobImpliedConstraints_Mutate(t *testing.T) {
expectedOutputWarnings: nil,
expectedOutputError: nil,
},
{
inputJob: &structs.Job{
Name: "example",
TaskGroups: []*structs.TaskGroup{
{
Name: "group-with-bridge",
Networks: []*structs.NetworkResource{
{Mode: "bridge"},
},
},
},
},
expectedOutputJob: &structs.Job{
Name: "example",
TaskGroups: []*structs.TaskGroup{
{
Name: "group-with-bridge",
Networks: []*structs.NetworkResource{
{Mode: "bridge"},
},
Constraints: []*structs.Constraint{
cniBridgeConstraint,
cniFirewallConstraint,
cniHostLocalConstraint,
cniLoopbackConstraint,
cniPortMapConstraint,
},
},
},
},
expectedOutputWarnings: nil,
expectedOutputError: nil,
name: "task group with bridge network",
},
}

for _, tc := range testCases {
Expand Down
12 changes: 12 additions & 0 deletions nomad/structs/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,15 @@ func (j *Job) RequiredNUMA() set.Collection[string] {
}
return result
}

// RequiredBridgeNetwork identifies which task groups, if any, within the job
// contain networks requesting bridge networking.
func (j *Job) RequiredBridgeNetwork() set.Collection[string] {
result := set.New[string](len(j.TaskGroups))
for _, tg := range j.TaskGroups {
if tg.Networks.Modes().Contains("bridge") {
result.Insert(tg.Name)
}
}
return result
}
7 changes: 7 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -3006,6 +3006,13 @@ func (ns Networks) NetIndex(n *NetworkResource) int {
return -1
}

// Modes returns the set of network modes used by our NetworkResource blocks.
func (ns Networks) Modes() *set.Set[string] {
return set.FromFunc(ns, func(nr *NetworkResource) string {
return nr.Mode
})
}

// RequestedDevice is used to request a device for a task.
type RequestedDevice struct {
// Name is the request name. The possible values are as follows:
Expand Down
18 changes: 16 additions & 2 deletions website/content/docs/upgrade/upgrade-specific.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,26 @@ their upgrades as a result of new features or changed behavior. This page is
used to document those details separately from the standard upgrade flow.

## Nomad 1.8.0

#### Deprecated Disconnect Fields

Nomad 1.8.0 introduces a `disconnect` block meant to group all the configuration
options related to disconnected client's and server's behavior, causing the
deprecation of the fileds `stop_after_client_disconnect`, `max_client_disconnect`
options related to disconnected client's and server's behavior, causing the
deprecation of the fields `stop_after_client_disconnect`, `max_client_disconnect`
and `prevent_reschedule_on_lost`. This block also introduces new options for
allocations reconciliation if the client regains connectivity.

#### CNI Constraints

In Nomad 1.8.0, jobs with `bridge` networking will have constraints added during
job submit that require CNI plugins to be present on the node. Nodes have
fingerprinted the available CNI plugins starting in Nomad 1.5.0.

If you are upgrading from Nomad 1.5.0 or later to 1.8.0 or later, there's
nothing additional for you to do. It's not recommended to skip more than 2
versions of Nomad. But if you upgrade from earlier than 1.5.0 to 1.8.0 or later,
you will need to ensure that clients have been upgraded before submitting any
jobs that use `bridge` networking.

#### Removal of `raw_exec` option `no_cgroups`

Expand Down
Loading