Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cgroup2: exec: join the cgroup of the init process on EBUSY #2416

Merged
merged 1 commit into from
May 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
initProcessPid: state.InitProcessPid,
}, nil
}

Expand Down
24 changes: 23 additions & 1 deletion libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,22 @@ package libcontainer
import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/logs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -66,6 +69,7 @@ type setnsProcess struct {
fds []string
process *Process
bootstrapData io.Reader
initProcessPid int
}

func (p *setnsProcess) startTime() (uint64, error) {
Expand Down Expand Up @@ -100,7 +104,25 @@ func (p *setnsProcess) start() (err error) {
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
// On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY.
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
// Try to join the cgroup of InitProcessPid.
if cgroups.IsCgroup2UnifiedMode() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should it not be done only with err == EBUSY?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it is worth trying to join pid1 even on other errors

initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
if initCgErr == nil {
if initCgPath, ok := initCg[""]; ok {
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
}
}
}
if err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
}
if p.intelRdtPath != "" {
Expand Down
46 changes: 46 additions & 0 deletions tests/integration/cgroups.bats
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,49 @@ EOF
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"cgroups_exec"* ]]
}

@test "runc exec (cgroup v2 + init process in non-root cgroup) succeeds" {
requires root cgroups_v2

set_cgroups_path "$BUSYBOX_BUNDLE"
set_cgroup_mount_writable "$BUSYBOX_BUNDLE"

runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_group
[ "$status" -eq 0 ]

runc exec test_cgroups_group cat /sys/fs/cgroup/cgroup.controllers
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"memory"* ]]

runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[0]} == "0::/" ]]

runc exec test_cgroups_group mkdir /sys/fs/cgroup/foo
[ "$status" -eq 0 ]

runc exec test_cgroups_group sh -c "echo 1 > /sys/fs/cgroup/foo/cgroup.procs"
[ "$status" -eq 0 ]

# the init process is now in "/foo", but an exec process can still join "/"
# because we haven't enabled any domain controller.
runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[0]} == "0::/" ]]

# turn on a domain controller (memory)
runc exec test_cgroups_group sh -euxc 'echo $$ > /sys/fs/cgroup/foo/cgroup.procs; echo +memory > /sys/fs/cgroup/cgroup.subtree_control'
[ "$status" -eq 0 ]

# an exec process can no longer join "/" after turning on a domain controller.
# falls back to "/foo".
runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[0]} == "0::/foo" ]]

# teardown: remove "/foo"
runc exec test_cgroups_group sh -uxc 'echo -memory > /sys/fs/cgroup/cgroup.subtree_control; for f in $(cat /sys/fs/cgroup/foo/cgroup.procs); do echo $f > /sys/fs/cgroup/cgroup.procs; done; rmdir /sys/fs/cgroup/foo'
runc exec test_cgroups_group test ! -d /sys/fs/cgroup/foo
[ "$status" -eq 0 ]
#
}
7 changes: 1 addition & 6 deletions tests/integration/delete.bats
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,7 @@ function teardown() {
@test "runc delete --force in cgroupv2 with subcgroups" {
requires cgroups_v2 root
set_cgroups_path "$BUSYBOX_BUNDLE"

# grant `rw` priviledge to `/sys/fs/cgroup`
cat "${BUSYBOX_BUNDLE}/config.json"\
| jq '.mounts |= map((select(.type=="cgroup") | .options -= ["ro"]) // .)'\
> "${BUSYBOX_BUNDLE}/config.json.tmp"
mv "${BUSYBOX_BUNDLE}/config.json"{.tmp,}
set_cgroup_mount_writable "$BUSYBOX_BUNDLE"

# run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
Expand Down
9 changes: 9 additions & 0 deletions tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ function set_resources_limit() {
sed -i 's/\("linux": {\)/\1\n "resources": { "pids": { "limit": 100 } },/' "$bundle/config.json"
}

# Helper function to make /sys/fs/cgroup writable
function set_cgroup_mount_writable() {
bundle="${1:-.}"
cat "$bundle/config.json" \
| jq '.mounts |= map((select(.type == "cgroup") | .options -= ["ro"]) // .)' \
>"$bundle/config.json.tmp"
mv "$bundle/config.json"{.tmp,}
}

# Fails the current test, providing the error given.
function fail() {
echo "$@" >&2
Expand Down