Skip to content

Commit

Permalink
cgroup2: exec: join the cgroup of the init process on EBUSY
Browse files Browse the repository at this point in the history
Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed May 19, 2020
1 parent b207d57 commit 9643a87
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 1 deletion.
1 change: 1 addition & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
initProcessPid: state.InitProcessPid,
}, nil
}

Expand Down
24 changes: 23 additions & 1 deletion libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,22 @@ package libcontainer
import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/logs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -66,6 +69,7 @@ type setnsProcess struct {
fds []string
process *Process
bootstrapData io.Reader
initProcessPid int
}

func (p *setnsProcess) startTime() (uint64, error) {
Expand Down Expand Up @@ -100,7 +104,25 @@ func (p *setnsProcess) start() (err error) {
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
// On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY.
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
// Try to join the cgroup of InitProcessPid.
if cgroups.IsCgroup2UnifiedMode() {
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
if initCgErr == nil {
if initCgPath, ok := initCg[""]; ok {
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
logrus.Warnf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
}
}
}
if err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
}
if p.intelRdtPath != "" {
Expand Down
52 changes: 52 additions & 0 deletions tests/integration/cgroups.bats
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,55 @@ EOF
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"cgroups_exec"* ]]
}

@test "runc exec (cgroup v2 + init process in non-root cgroup) succeeds" {
requires root cgroups_v2

set_cgroups_path "$BUSYBOX_BUNDLE"
# set_memory_limit is currently required to make sure "memory" appear in /sys/fs/cgroup/cgroup.controllers .
# set_memory_limit will no longer be needed after merging "Partially revert "CreateCgroupPath: only enable needed controllers" (#2395)
set_memory_limit "$BUSYBOX_BUNDLE"
set_cgroup_mount_writable "$BUSYBOX_BUNDLE"

runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_group
[ "$status" -eq 0 ]

runc exec test_cgroups_group cat /sys/fs/cgroup/cgroup.controllers
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"memory"* ]]

runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[0]} == "0::/" ]]

runc exec test_cgroups_group mkdir /sys/fs/cgroup/foo
[ "$status" -eq 0 ]

runc exec test_cgroups_group sh -c "echo 1 > /sys/fs/cgroup/foo/cgroup.procs"
[ "$status" -eq 0 ]

# the init process is now in "/foo", but an exec process can still join "/"
# because we haven't enabled any domain controller.
runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[0]} == "0::/" ]]

# turn on a domain controller (memory)
runc exec test_cgroups_group sh -euxc 'echo $$ > /sys/fs/cgroup/foo/cgroup.procs; echo +memory > /sys/fs/cgroup/cgroup.subtree_control'
[ "$status" -eq 0 ]

# an exec process can no longer join "/" after turning on a domain controller.
# falls back to "/foo".
runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
# warning log from runc (stderr)
[[ ${lines[0]} == *"attempting to join"* ]]
# actual container log
[[ ${lines[1]} == "0::/foo" ]]

# teardown: remove "/foo"
runc exec test_cgroups_group sh -uxc 'echo -memory > /sys/fs/cgroup/cgroup.subtree_control; for f in $(cat /sys/fs/cgroup/foo/cgroup.procs); do echo $f > /sys/fs/cgroup/cgroup.procs; done; rmdir /sys/fs/cgroup/foo'
runc exec test_cgroups_group test ! -d /sys/fs/cgroup/foo
[ "$status" -eq 0 ]
#
}
18 changes: 18 additions & 0 deletions tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,24 @@ function set_resources_limit() {
sed -i 's/\("linux": {\)/\1\n "resources": { "pids": { "limit": 100 } },/' "$bundle/config.json"
}

# Helper function to set a memory limit
function set_memory_limit(){
bundle="${1:-.}"
cat "$bundle/config.json" \
| jq '.linux.resources += {"memory": {"limit":33554432}}' \
>"$bundle/config.json.tmp"
mv "$bundle/config.json"{.tmp,}
}

# Helper function to make /sys/fs/cgroup writable
function set_cgroup_mount_writable(){
bundle="${1:-.}"
cat "$bundle/config.json" \
| jq '.mounts |= map((select(.type == "cgroup") | .options -= ["ro"]) // .)' \
>"$bundle/config.json.tmp"
mv "$bundle/config.json"{.tmp,}
}

# Fails the current test, providing the error given.
function fail() {
echo "$@" >&2
Expand Down

0 comments on commit 9643a87

Please sign in to comment.