Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

oci: cgroupsv2 namespace / mount handling #3542

Merged
merged 1 commit into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@

- Skip attempting to bind inaccessible mount points when handling the
`mount hostfs = yes` configuration option.
- In OCI mode, on a cgroups v2 system with functioning systemd cgroup
management, a cgroup namespace is created for the container, and
`/sys/fs/cgroup` is mounted. The cgroups mount is read-only by default, or
read-write if the `--keep-privs` flag is used.
- In OCI mode, a cgroup is now created for the container when possible, even
where resource limits have not been requested.

### Bug Fixes

Expand Down
8 changes: 7 additions & 1 deletion e2e/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,13 @@ func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profil
shellCmd := fmt.Sprintf("cat /sys/fs/cgroup$(cat /proc/self/cgroup | grep '^0::' | cut -d ':' -f 3)/%s", tt.resourceV2)

args := tt.args
args = append(args, "-B", "/sys/fs/cgroup", imageRef, "/bin/sh", "-c", shellCmd)
// OCI-mode with v2 cgroups will have a namespaced cgroup mount in the
// container. In other flows we need to bind from the host to see the
// cgroups tree.
if !profile.OCI() {
args = append(args, "-B", "/sys/fs/cgroup")
}
args = append(args, imageRef, "/bin/sh", "-c", shellCmd)

c.env.RunSingularity(
t,
Expand Down
3 changes: 1 addition & 2 deletions internal/pkg/cgroups/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"strconv"
"strings"

"github.com/opencontainers/runc/libcontainer/cgroups"
lccgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/sylabs/singularity/v4/internal/pkg/util/fs"
"github.com/sylabs/singularity/v4/internal/pkg/util/rootless"
Expand Down Expand Up @@ -149,7 +148,7 @@ func CanUseCgroups(systemd bool, warn bool) bool {

rootlessOK := true

if !cgroups.IsCgroup2UnifiedMode() {
if !lccgroups.IsCgroup2UnifiedMode() {
rootlessOK = false
if warn {
sylog.Warningf("Rootless cgroups require the system to be configured for cgroups v2 in unified mode.")
Expand Down
31 changes: 25 additions & 6 deletions internal/pkg/runtime/launcher/oci/launcher_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/ccoveille/go-safecast"
"github.com/google/uuid"
lccgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/samber/lo"
"github.com/sylabs/singularity/v4/internal/pkg/buildcfg"
Expand Down Expand Up @@ -87,6 +88,10 @@ type Launcher struct {
// defaultTmpMountIndices contains the indices of mounts added by
// addTmpMounts() within the spec.Mounts slice.
defaultTmpMountIndices []int
// cgroupsSupport indicates if cgroups management is supported
cgroupsSupport bool
// cgroupsV2 indicates if the system is running cgroups v2
cgroupsV2 bool
}

// NewLauncher returns a oci.Launcher with an initial configuration set by opts.
Expand Down Expand Up @@ -123,6 +128,9 @@ func NewLauncher(opts ...launcher.Option) (*Launcher, error) {
lo.WritableTmpfs = false
}

cgroupsv2 := lccgroups.IsCgroup2UnifiedMode()
cgroupsSupport := cgroups.CanUseCgroups(c.SystemdCgroups, true)

return &Launcher{
cfg: lo,
singularityConf: c,
Expand All @@ -131,6 +139,8 @@ func NewLauncher(opts ...launcher.Option) (*Launcher, error) {
homeDest: homeDest,
imageMountsByImagePath: make(map[string]*fuse.ImageMount),
imageMountsByMountpoint: make(map[string]*fuse.ImageMount),
cgroupsV2: cgroupsv2,
cgroupsSupport: cgroupsSupport,
}, nil
}

Expand Down Expand Up @@ -263,7 +273,7 @@ func (l *Launcher) createSpec() (spec *specs.Spec, err error) {
// inferred by default in OCI mode. See NewLauncher().
spec.Root.Readonly = !l.cfg.WritableTmpfs && !l.cfg.Writable

err = addNamespaces(spec, l.cfg.Namespaces)
err = l.addNamespaces(spec, l.cfg.Namespaces)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -827,8 +837,7 @@ func (l *Launcher) RunWrapped(ctx context.Context, containerID, bundlePath, pidF
// If singularity.conf is set to use systemd for cgroup management, but
// we cannot due faulty configuration / environment (e.g. no Dbus),
// don't ask runc/crun to use systemd.
if systemdCgroups && !cgroups.CanUseCgroups(true, false) {
sylog.Infof("System configuration does not support cgroup management - starting container in current cgroup")
if systemdCgroups && !l.cgroupsSupport {
systemdCgroups = false
}

Expand All @@ -846,15 +855,25 @@ func (l *Launcher) RunWrapped(ctx context.Context, containerID, bundlePath, pidF

// getCgroup will return a cgroup path and resources for the runtime to create.
func (l *Launcher) getCgroup() (path string, resources *specs.LinuxResources, err error) {
if l.cfg.CGroupsJSON == "" {
// We can't create a cgroup, but we don't have any resource limits to apply.
// Run in the current cgroup.
if !l.cgroupsSupport && l.cfg.CGroupsJSON == "" {
sylog.Infof("System configuration does not support cgroup management - starting container in current cgroup")
return "", nil, nil
}

if !cgroups.CanUseCgroups(l.singularityConf.SystemdCgroups, true) {
// We can't create a cgroup, and we have been asked to apply resource limits.
// Fatal error - requested limits can't be applied
if !l.cgroupsSupport && l.cfg.CGroupsJSON != "" {
return "", nil, fmt.Errorf("system configuration does not support cgroup management")
}

path = cgroups.DefaultPathForPid(l.singularityConf.SystemdCgroups, -1)
resources = &specs.LinuxResources{}

if l.cfg.CGroupsJSON == "" {
return path, resources, nil
}

resources, err = cgroups.UnmarshalJSONResources(l.cfg.CGroupsJSON)
if err != nil {
return "", nil, err
Expand Down
15 changes: 15 additions & 0 deletions internal/pkg/runtime/launcher/oci/launcher_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"reflect"
"testing"

lccgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/sylabs/singularity/v4/internal/pkg/cgroups"
"github.com/sylabs/singularity/v4/internal/pkg/runtime/launcher"
"github.com/sylabs/singularity/v4/internal/pkg/test"
"github.com/sylabs/singularity/v4/internal/pkg/util/fs/fuse"
Expand All @@ -31,6 +33,9 @@ func TestNewLauncher(t *testing.T) {
t.Fatalf("while getting current user: %s", err)
}

cgroupsV2 := lccgroups.IsCgroup2UnifiedMode()
cgroupsSupport := cgroups.CanUseCgroups(sc.SystemdCgroups, false)

tests := []struct {
name string
opts []launcher.Option
Expand All @@ -47,6 +52,8 @@ func TestNewLauncher(t *testing.T) {
homeDest: u.HomeDir,
imageMountsByImagePath: make(map[string]*fuse.ImageMount),
imageMountsByMountpoint: make(map[string]*fuse.ImageMount),
cgroupsV2: cgroupsV2,
cgroupsSupport: cgroupsSupport,
},
},
{
Expand All @@ -62,6 +69,8 @@ func TestNewLauncher(t *testing.T) {
homeDest: "/home/dest",
imageMountsByImagePath: make(map[string]*fuse.ImageMount),
imageMountsByMountpoint: make(map[string]*fuse.ImageMount),
cgroupsV2: cgroupsV2,
cgroupsSupport: cgroupsSupport,
},
wantErr: false,
},
Expand All @@ -78,6 +87,8 @@ func TestNewLauncher(t *testing.T) {
homeDest: "/home/dest",
imageMountsByImagePath: make(map[string]*fuse.ImageMount),
imageMountsByMountpoint: make(map[string]*fuse.ImageMount),
cgroupsV2: cgroupsV2,
cgroupsSupport: cgroupsSupport,
},
wantErr: false,
},
Expand All @@ -94,6 +105,8 @@ func TestNewLauncher(t *testing.T) {
homeDest: u.HomeDir,
imageMountsByImagePath: make(map[string]*fuse.ImageMount),
imageMountsByMountpoint: make(map[string]*fuse.ImageMount),
cgroupsV2: cgroupsV2,
cgroupsSupport: cgroupsSupport,
},
wantErr: false,
},
Expand All @@ -111,6 +124,8 @@ func TestNewLauncher(t *testing.T) {
homeDest: u.HomeDir,
imageMountsByImagePath: make(map[string]*fuse.ImageMount),
imageMountsByMountpoint: make(map[string]*fuse.ImageMount),
cgroupsV2: cgroupsV2,
cgroupsSupport: cgroupsSupport,
},
wantErr: false,
},
Expand Down
14 changes: 14 additions & 0 deletions internal/pkg/runtime/launcher/oci/mounts_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,20 @@ func (l *Launcher) addSysMount(mounts *[]specs.Mount) error {
})
}

if l.cgroupsV2 {
cgroupRORW := "ro"
if l.cfg.KeepPrivs {
cgroupRORW = "rw"
}
*mounts = append(*mounts,
specs.Mount{
Source: "cgroup",
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", cgroupRORW},
})
}

return nil
}

Expand Down
9 changes: 8 additions & 1 deletion internal/pkg/runtime/launcher/oci/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func minimalSpec() runtimespec.Spec {

// addNamespaces adds requested namespace, if appropriate, to an existing spec.
// It is assumed that spec contains at least the defaultNamespaces.
func addNamespaces(spec *runtimespec.Spec, ns launcher.Namespaces) error {
func (l *Launcher) addNamespaces(spec *runtimespec.Spec, ns launcher.Namespaces) error {
if ns.IPC {
sylog.Infof("--oci runtime always uses an IPC namespace, ipc flag is redundant.")
}
Expand Down Expand Up @@ -116,6 +116,13 @@ func addNamespaces(spec *runtimespec.Spec, ns launcher.Namespaces) error {
)
}

if l.cgroupsV2 && l.cgroupsSupport {
spec.Linux.Namespaces = append(
spec.Linux.Namespaces,
runtimespec.LinuxNamespace{Type: runtimespec.CgroupNamespace},
)
}

return nil
}

Expand Down
24 changes: 18 additions & 6 deletions internal/pkg/runtime/launcher/oci/spec_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
func Test_addNamespaces(t *testing.T) {
test.DropPrivilege(t)
defer test.ResetPrivilege(t)

defaultPlusPID := append(defaultNamespaces,
specs.LinuxNamespace{Type: specs.PIDNamespace})
defaultPlusNetPID := append(defaultNamespaces,
Expand All @@ -28,11 +27,14 @@ func Test_addNamespaces(t *testing.T) {
defaultPlusPIDUTS := append(defaultNamespaces,
specs.LinuxNamespace{Type: specs.PIDNamespace},
specs.LinuxNamespace{Type: specs.UTSNamespace})

defaultPlusPIDCgroups := append(defaultNamespaces,
specs.LinuxNamespace{Type: specs.PIDNamespace},
specs.LinuxNamespace{Type: specs.CgroupNamespace})
tests := []struct {
name string
ns launcher.Namespaces
wantNS []specs.LinuxNamespace
name string
ns launcher.Namespaces
cgroupsv2Support bool
wantNS []specs.LinuxNamespace
}{
{
name: "none",
Expand Down Expand Up @@ -69,13 +71,23 @@ func Test_addNamespaces(t *testing.T) {
ns: launcher.Namespaces{UTS: true},
wantNS: defaultPlusPIDUTS,
},
{
name: "cgroupsv2",
ns: launcher.Namespaces{},
cgroupsv2Support: true,
wantNS: defaultPlusPIDCgroups,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
l := &Launcher{
cgroupsV2: tt.cgroupsv2Support,
cgroupsSupport: tt.cgroupsv2Support,
}
ms := minimalSpec()
spec := &ms
err := addNamespaces(spec, tt.ns)
err := l.addNamespaces(spec, tt.ns)
if err != nil {
t.Errorf("addNamespaces() returned an unexpected error: %v", err)
}
Expand Down