Skip to content

Commit

Permalink
cgroup2: exec: join the cgroup of the init process on EBUSY
Browse files Browse the repository at this point in the history
Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed May 19, 2020
1 parent b207d57 commit 4ece281
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 1 deletion.
1 change: 1 addition & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
initProcessPid: state.InitProcessPid,
}, nil
}

Expand Down
24 changes: 23 additions & 1 deletion libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,22 @@ package libcontainer
import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/logs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -66,6 +69,7 @@ type setnsProcess struct {
fds []string
process *Process
bootstrapData io.Reader
initProcessPid int
}

func (p *setnsProcess) startTime() (uint64, error) {
Expand Down Expand Up @@ -100,7 +104,25 @@ func (p *setnsProcess) start() (err error) {
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
// On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY.
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
// Try to join the cgroup of InitProcessPid.
if cgroups.IsCgroup2UnifiedMode() {
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
if initCgErr == nil {
if initCgPath, ok := initCg[""]; ok {
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
logrus.Warnf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
}
}
}
if err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
}
if p.intelRdtPath != "" {
Expand Down
45 changes: 45 additions & 0 deletions tests/integration/cgroups.bats
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ function teardown() {
teardown_running_container test_cgroups_kmem
teardown_running_container test_cgroups_permissions
teardown_busybox
cleanup_cgroups_dir
}

function setup() {
Expand Down Expand Up @@ -125,3 +126,47 @@ EOF
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"cgroups_exec"* ]]
}

@test "runc exec (cgroup v2 + init process in non-root cgroup) succeeds" {
requires root cgroups_v2

set_cgroups_path "$BUSYBOX_BUNDLE"
# set_memory_limit is currently required to make sure "memory" appear in /sys/fs/cgroup/cgroup.controllers .
# set_memory_limit will no longer be needed after merging "Partially revert "CreateCgroupPath: only enable needed controllers" (#2395)
set_memory_limit "$BUSYBOX_BUNDLE"
set_cgroup_mount_writable "$BUSYBOX_BUNDLE"
# cat "$BUSYBOX_BUNDLE/config.json"

runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_group
[ "$status" -eq 0 ]

runc exec test_cgroups_group cat /sys/fs/cgroup/cgroup.controllers
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"memory"* ]]

runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[0]} == "0::/" ]]

runc exec test_cgroups_group mkdir /sys/fs/cgroup/foo
[ "$status" -eq 0 ]

runc exec test_cgroups_group sh -c "echo 1 > /sys/fs/cgroup/foo/cgroup.procs"
[ "$status" -eq 0 ]

# the init process is now in "/foo", but an exec process can still join "/"
# because we haven't enabled any domain controller.
runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[0]} == "0::/" ]]

# turn on a domain controller (memory)
runc exec test_cgroups_group sh -c 'echo $$ > /sys/fs/cgroup/foo/cgroup.procs; echo +memory > /sys/fs/cgroup/cgroup.subtree_control'
[ "$status" -eq 0 ]

# an exec process can no longer join "/" after turning on a domain controller.
# falls back to "/foo".
runc exec test_cgroups_group cat /proc/self/cgroup
[ "$status" -eq 0 ]
[[ ${lines[1]} == "0::/foo" ]]
}
30 changes: 30 additions & 0 deletions tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ function init_cgroup_paths() {
fi
}

# Helper function to clean up cgroups dir.
# Removes all subdirectory of $CGROUP_PATH, but does not remove $CGROUP_PATH itself.
# Implemented only for CGROUP_UNIFIED .
function cleanup_cgroups_dir() {
init_cgroup_paths
if [[ "$CGROUP_UNIFIED" = "yes" ]]; then
if [[ -d "$CGROUP_PATH" ]]; then
(find "$CGROUP_PATH" -mindepth 1 -type d | sort -r | xargs rmdir 2> /dev/null) || true
fi
fi
}

# Helper function to set cgroupsPath to the value of $OCI_CGROUPS_PATH
function set_cgroups_path() {
bundle="${1:-.}"
Expand Down Expand Up @@ -197,6 +209,24 @@ function set_resources_limit() {
sed -i 's/\("linux": {\)/\1\n "resources": { "pids": { "limit": 100 } },/' "$bundle/config.json"
}

# Helper function to set a memory limit
function set_memory_limit(){
bundle="${1:-.}"
cat "$bundle/config.json" \
| jq '.linux.resources += {"memory": {"limit":33554432}}' \
>"$bundle/config.json.tmp"
mv "$bundle/config.json"{.tmp,}
}

# Helper function to make /sys/fs/cgroup writable
function set_cgroup_mount_writable(){
bundle="${1:-.}"
cat "$bundle/config.json" \
| jq '.mounts |= map((select(.type == "cgroup") | .options -= ["ro"]) // .)' \
>"$bundle/config.json.tmp"
mv "$bundle/config.json"{.tmp,}
}

# Fails the current test, providing the error given.
function fail() {
echo "$@" >&2
Expand Down

0 comments on commit 4ece281

Please sign in to comment.