From 1b2adcfe5620cac8ce3e52ea066161c72431f74c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 28 Jul 2021 10:34:26 -0700 Subject: [PATCH 1/2] libct/cg/v1: workaround CPU quota period set failure As reported in issue 3084, sometimes setting CPU quota period fails when a new period is lower and a parent cgroup has CPU quota limit set. This happens as in cgroup v1 the quota and the period can not be set together (this is fixed in v2), and since the period is being set first, new_limit = old_quota/new_period may be higher than the parent cgroup limit. The fix is to retry setting the period after the quota, to cover all possible scenarios. Add a test case to cover a regression caused by an earlier version of this patch (ignoring a failure of setting invalid period when quota is not set). Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs/cpu.go | 24 ++++++++++++++++++++++-- tests/integration/update.bats | 9 +++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/libcontainer/cgroups/fs/cpu.go b/libcontainer/cgroups/fs/cpu.go index 04bd5bc9fb9..657570a279f 100644 --- a/libcontainer/cgroups/fs/cpu.go +++ b/libcontainer/cgroups/fs/cpu.go @@ -4,6 +4,7 @@ package fs import ( "bufio" + "errors" "fmt" "os" "strconv" @@ -11,6 +12,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" + "golang.org/x/sys/unix" ) type CpuGroup struct{} @@ -71,15 +73,33 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead) } } + + var period string if r.CpuPeriod != 0 { - if err := cgroups.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil { - return err + period = strconv.FormatUint(r.CpuPeriod, 10) + if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { + // Sometimes when the period to be set is smaller + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_period exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } else { + period = "" } } if r.CpuQuota != 0 { if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { return err } + if period != "" { + if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { + return err + } + } } return s.SetRtSched(path, r) } diff --git a/tests/integration/update.bats b/tests/integration/update.bats index 2004d9efc2e..b9a5b602cb8 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -345,6 +345,15 @@ EOF check_cpu_quota -1 1000000 "infinity" } +@test "set cpu period with no quota (invalid period)" { + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + + update_config '.linux.resources.cpu |= { "period": 100 }' + + runc run -d --console-socket "$CONSOLE_SOCKET" test_update + [ "$status" -eq 1 ] +} + @test "set cpu quota with no period" { [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup From 639445789dbb8f5f54f53a1a917d7b6a6159cd44 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 28 Jul 2021 10:37:29 -0700 Subject: [PATCH 2/2] tests/int: add a "update cpu period with pod limit set" test Add a test case for an issue fixed by the previous commit. Unfortunately, this is somewhat complicated as there's no easy way to create a transient unit, so a binary, sd-helper, had to be added. On top of that, an ability to create a parent/pod cgroup is added to helpers.bash, which might be useful for future integration tests. Signed-off-by: Kir Kolyshkin --- .gitignore | 1 + Makefile | 10 ++-- contrib/cmd/sd-helper/helper.go | 83 +++++++++++++++++++++++++++++++++ tests/integration/helpers.bash | 75 +++++++++++++++++++++++++++-- tests/integration/update.bats | 35 ++++++++++++++ 5 files changed, 195 insertions(+), 9 deletions(-) create mode 100644 contrib/cmd/sd-helper/helper.go diff --git a/.gitignore b/.gitignore index 84485cb9432..98775d60658 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ vendor/pkg /runc /runc-* contrib/cmd/recvtty/recvtty +contrib/cmd/sd-helper/sd-helper man/man8 release Vagrantfile diff --git a/Makefile b/Makefile index 4c117a9322c..f83c3ec849a 100644 --- a/Makefile +++ b/Makefile @@ -30,14 +30,15 @@ GO_BUILD_STATIC := CGO_ENABLED=1 $(GO) build -trimpath $(EXTRA_FLAGS) -tags "$(B runc: $(GO_BUILD) -o runc . -all: runc recvtty +all: runc recvtty sd-helper -recvtty: - $(GO_BUILD) -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty +recvtty sd-helper: + $(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@ static: $(GO_BUILD_STATIC) -o runc . $(GO_BUILD_STATIC) -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty + $(GO_BUILD_STATIC) -o contrib/cmd/sd-helper/sd-helper ./contrib/cmd/sd-helper release: script/release.sh -r release/$(VERSION) -v $(VERSION) @@ -110,6 +111,7 @@ install-man: man clean: rm -f runc runc-* rm -f contrib/cmd/recvtty/recvtty + rm -f contrib/cmd/sd-helper/sd-helper rm -rf release rm -rf man/man8 @@ -147,7 +149,7 @@ localcross: CGO_ENABLED=1 GOARCH=arm64 CC=aarch64-linux-gnu-gcc $(GO_BUILD) -o runc-arm64 . CGO_ENABLED=1 GOARCH=ppc64le CC=powerpc64le-linux-gnu-gcc $(GO_BUILD) -o runc-ppc64le . -.PHONY: runc all recvtty static release dbuild lint man runcimage \ +.PHONY: runc all recvtty sd-helper static release dbuild lint man runcimage \ test localtest unittest localunittest integration localintegration \ rootlessintegration localrootlessintegration shell install install-bash \ install-man clean cfmt shfmt shellcheck \ diff --git a/contrib/cmd/sd-helper/helper.go b/contrib/cmd/sd-helper/helper.go new file mode 100644 index 00000000000..156125248a8 --- /dev/null +++ b/contrib/cmd/sd-helper/helper.go @@ -0,0 +1,83 @@ +package main + +import ( + "flag" + "fmt" + "os" + + "github.com/sirupsen/logrus" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func usage() { + fmt.Print(`Open Container Initiative contrib/cmd/sd-helper + +sd-helper is a tool that uses runc/libcontainer/cgroups/systemd package +functionality to communicate to systemd in order to perform various operations. +Currently this is limited to starting and stopping systemd transient slice +units. + +Usage: + sd-helper [-debug] [-parent ] {start|stop} + +Example: + sd-helper -parent system.slice start system-pod123.slice +`) + os.Exit(1) +} + +var ( + debug = flag.Bool("debug", false, "enable debug output") + parent = flag.String("parent", "", "parent unit name") +) + +func main() { + if !systemd.IsRunningSystemd() { + logrus.Fatal("systemd is required") + } + + // Set the flags. + flag.Parse() + if *debug { + logrus.SetLevel(logrus.DebugLevel) + } + if flag.NArg() != 2 { + usage() + } + + cmd := flag.Arg(0) + unit := flag.Arg(1) + + err := unitCommand(cmd, unit, *parent) + if err != nil { + logrus.Fatal(err) + } +} + +func newManager(config *configs.Cgroup) cgroups.Manager { + if cgroups.IsCgroup2UnifiedMode() { + return systemd.NewUnifiedManager(config, "", false) + } + return systemd.NewLegacyManager(config, nil) +} + +func unitCommand(cmd, name, parent string) error { + podConfig := &configs.Cgroup{ + Name: name, + Parent: parent, + Resources: &configs.Resources{}, + } + pm := newManager(podConfig) + + switch cmd { + case "start": + return pm.Apply(-1) + case "stop": + return pm.Destroy() + } + + return fmt.Errorf("unknown command: %s", cmd) +} diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index b4544185e4d..fde767b171d 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -16,6 +16,7 @@ unset IMAGES RUNC="${INTEGRATION_ROOT}/../../runc" RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty" +SD_HELPER="${INTEGRATION_ROOT}/../../contrib/cmd/sd-helper/sd-helper" # Test data path. # shellcheck disable=SC2034 @@ -131,24 +132,85 @@ function init_cgroup_paths() { fi } +function create_parent() { + if [ -n "$RUNC_USE_SYSTEMD" ]; then + [ -z "$SD_PARENT_NAME" ] && return + "$SD_HELPER" --parent machine.slice start "$SD_PARENT_NAME" + else + [ -z "$REL_PARENT_PATH" ] && return + if [ "$CGROUP_UNIFIED" == "yes" ]; then + mkdir "/sys/fs/cgroup$REL_PARENT_PATH" + else + local subsys + for subsys in ${CGROUP_SUBSYSTEMS}; do + # Have to ignore EEXIST (-p) as some subsystems + # are mounted together (e.g. cpu,cpuacct), so + # the path is created more than once. + mkdir -p "/sys/fs/cgroup/$subsys$REL_PARENT_PATH" + done + fi + fi +} + +function remove_parent() { + if [ -n "$RUNC_USE_SYSTEMD" ]; then + [ -z "$SD_PARENT_NAME" ] && return + "$SD_HELPER" --parent machine.slice stop "$SD_PARENT_NAME" + else + [ -z "$REL_PARENT_PATH" ] && return + if [ "$CGROUP_UNIFIED" == "yes" ]; then + rmdir "/sys/fs/cgroup/$REL_PARENT_PATH" + else + local subsys + for subsys in ${CGROUP_SUBSYSTEMS} systemd; do + rmdir "/sys/fs/cgroup/$subsys/$REL_PARENT_PATH" + done + fi + fi + unset SD_PARENT_NAME + unset REL_PARENT_PATH +} + +function set_parent_systemd_properties() { + [ -z "$SD_PARENT_NAME" ] && return + local user + [ "$(id -u)" != "0" ] && user="--user" + systemctl set-property $user "$SD_PARENT_NAME" "$@" +} + # Randomize cgroup path(s), and update cgroupsPath in config.json. # This function sets a few cgroup-related variables. +# +# Optional parameter $1 is a pod/parent name. If set, a parent/pod cgroup is +# created, and variables $REL_PARENT_PATH and $SD_PARENT_NAME can be used to +# refer to it. function set_cgroups_path() { init_cgroup_paths + local pod dash_pod slash_pod pod_slice + if [ "$#" -ne 0 ] && [ "$1" != "" ]; then + # Set up a parent/pod cgroup. + pod="$1" + dash_pod="-$pod" + slash_pod="/$pod" + SD_PARENT_NAME="machine-${pod}.slice" + pod_slice="/$SD_PARENT_NAME" + fi local rnd="$RANDOM" if [ -n "${RUNC_USE_SYSTEMD}" ]; then SD_UNIT_NAME="runc-cgroups-integration-test-${rnd}.scope" if [ "$(id -u)" = "0" ]; then - REL_CGROUPS_PATH="/machine.slice/$SD_UNIT_NAME" - OCI_CGROUPS_PATH="machine.slice:runc-cgroups:integration-test-${rnd}" + REL_PARENT_PATH="/machine.slice${pod_slice}" + OCI_CGROUPS_PATH="machine${dash_pod}.slice:runc-cgroups:integration-test-${rnd}" else - REL_CGROUPS_PATH="/user.slice/user-$(id -u).slice/user@$(id -u).service/machine.slice/$SD_UNIT_NAME" + REL_PARENT_PATH="/user.slice/user-$(id -u).slice/user@$(id -u).service/machine.slice${pod_slice}" # OCI path doesn't contain "/user.slice/user-$(id -u).slice/user@$(id -u).service/" prefix - OCI_CGROUPS_PATH="machine.slice:runc-cgroups:integration-test-${rnd}" + OCI_CGROUPS_PATH="machine${dash_pod}.slice:runc-cgroups:integration-test-${rnd}" fi + REL_CGROUPS_PATH="$REL_PARENT_PATH/$SD_UNIT_NAME" else - REL_CGROUPS_PATH="/runc-cgroups-integration-test/test-cgroup-${rnd}" + REL_PARENT_PATH="/runc-cgroups-integration-test${slash_pod}" + REL_CGROUPS_PATH="$REL_PARENT_PATH/test-cgroup-${rnd}" OCI_CGROUPS_PATH=$REL_CGROUPS_PATH fi @@ -157,6 +219,8 @@ function set_cgroups_path() { CGROUP_PATH=${CGROUP_BASE_PATH}${REL_CGROUPS_PATH} fi + [ -n "$pod" ] && create_parent + update_config '.linux.cgroupsPath |= "'"${OCI_CGROUPS_PATH}"'"' } @@ -479,4 +543,5 @@ function teardown_bundle() { __runc delete -f "$ct" done rm -rf "$ROOT" + remove_parent } diff --git a/tests/integration/update.bats b/tests/integration/update.bats index b9a5b602cb8..01c6915291a 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -392,6 +392,41 @@ EOF check_cpu_quota 30000 100000 "300ms" } +@test "update cpu period in a pod cgroup with pod limit set" { + requires cgroups_v1 + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + + set_cgroups_path "pod_${RANDOM}" + + # Set parent/pod CPU quota limit to 50%. + if [ -n "${RUNC_USE_SYSTEMD}" ]; then + set_parent_systemd_properties CPUQuota="50%" + else + echo 50000 >"/sys/fs/cgroup/cpu/$REL_PARENT_PATH/cpu.cfs_quota_us" + fi + # Sanity checks. + run cat "/sys/fs/cgroup/cpu$REL_PARENT_PATH/cpu.cfs_period_us" + [ "$output" -eq 100000 ] + run cat "/sys/fs/cgroup/cpu$REL_PARENT_PATH/cpu.cfs_quota_us" + [ "$output" -eq 50000 ] + + runc run -d --console-socket "$CONSOLE_SOCKET" test_update + [ "$status" -eq 0 ] + # Get the current period. + local cur + cur=$(get_cgroup_value cpu.cfs_period_us) + + # Sanity check: as the parent cgroup sets the limit to 50%, + # setting a higher limit (e.g. 60%) is expected to fail. + runc update --cpu-quota $((cur * 6 / 10)) test_update + [ "$status" -eq 1 ] + + # Finally, the test itself: set 30% limit but with lower period. + runc update --cpu-period 10000 --cpu-quota 3000 test_update + [ "$status" -eq 0 ] + check_cpu_quota 3000 10000 "300ms" +} + @test "update cgroup v2 resources via unified map" { [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup requires cgroups_v2