Skip to content

Commit

Permalink
runc update: implement memory.checkBeforeUpdate
Browse files Browse the repository at this point in the history
This is aimed at solving the problem of cgroup v2 memory controller
behavior which is not compatible with that of cgroup v1.

In cgroup v1, if the new memory limit being set is lower than the
current usage, setting the new limit fails.

In cgroup v2, same operation succeeds, and the container is OOM killed.

Introduce a new setting, memory.checkBeforeUpdate, and use it to mimic
cgroup v1 behavior.

Note that this is not 100% reliable because of TOCTOU, but this is the
best we can do.

Add some test cases.

Signed-off-by: Kir Kolyshkin <[email protected]>
  • Loading branch information
kolyshkin committed Sep 11, 2022
1 parent 2e8b7a1 commit 4b05d9a
Show file tree
Hide file tree
Showing 11 changed files with 112 additions and 12 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/godbus/dbus/v5 v5.1.0
github.com/moby/sys/mountinfo v0.6.2
github.com/mrunalp/fileutils v0.5.0
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78
github.com/opencontainers/selinux v1.10.1
github.com/seccomp/libseccomp-golang v0.10.0
github.com/sirupsen/logrus v1.9.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vyg
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b h1:udwtfS44rxYE/ViMLchHQBjfE60GZSB1arY7BFbyxLs=
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 h1:R5M2qXZiK/mWPMT4VldCOiSL9HIAMuxQZWdG0CSM5+4=
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGqQpoHsF8w=
github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand Down
29 changes: 29 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,32 @@ func (m *manager) OOMKillCount() (uint64, error) {

return c, err
}

func CheckMemoryUsage(dirPath string, r *configs.Resources) error {
if !r.MemoryCheckBeforeUpdate {
return nil
}

if r.Memory <= 0 && r.MemorySwap <= 0 {
return nil
}

usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current")
if err != nil {
return nil
}

if r.MemorySwap > 0 {
if uint64(r.MemorySwap) <= usage {
return fmt.Errorf("rejecting memory+swap limit %d > usage %d", r.MemorySwap, usage)
}
}

if r.Memory > 0 {
if uint64(r.Memory) <= usage {
return fmt.Errorf("rejecting memory limit %d > usage %d", r.Memory, usage)
}
}

return nil
}
5 changes: 5 additions & 0 deletions libcontainer/cgroups/fs2/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ func setMemory(dirPath string, r *configs.Resources) error {
if !isMemorySet(r) {
return nil
}

if err := CheckMemoryUsage(dirPath, r); err != nil {
return err
}

swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
if err != nil {
return err
Expand Down
8 changes: 6 additions & 2 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,11 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props
return props, nil
}

func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
func genV2ResourcesProperties(dirPath string, r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
if err := fs2.CheckMemoryUsage(dirPath, r); err != nil {
return nil, err
}

var properties []systemdDbus.Property

// NOTE: This is of questionable correctness because we insert our own
Expand Down Expand Up @@ -437,7 +441,7 @@ func (m *unifiedManager) Set(r *configs.Resources) error {
if r == nil {
return nil
}
properties, err := genV2ResourcesProperties(r, m.dbus)
properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus)
if err != nil {
return err
}
Expand Down
5 changes: 5 additions & 0 deletions libcontainer/configs/cgroup_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,9 @@ type Resources struct {
// during Set() to figure out whether the freeze is required. Those
// methods may be relatively slow, thus this flag.
SkipFreezeOnSet bool `json:"-"`

// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
// if the new memory limits (Memory and MemorySwap) being set are lower
// than the current memory usage, and reject if so.
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
}
3 changes: 3 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,9 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi
if r.Memory.DisableOOMKiller != nil {
c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller
}
if r.Memory.CheckBeforeUpdate != nil {
c.Resources.MemoryCheckBeforeUpdate = *r.Memory.CheckBeforeUpdate
}
}
if r.CPU != nil {
if r.CPU.Shares != nil {
Expand Down
40 changes: 40 additions & 0 deletions tests/integration/update.bats
Original file line number Diff line number Diff line change
Expand Up @@ -700,3 +700,43 @@ EOF
runc resume test_update
[ "$status" -eq 0 ]
}

@test "update memory vs CheckBeforeUpdate" {
requires cgroups_v2
[ $EUID -ne 0 ] && requires rootless_cgroup

runc run -d --console-socket "$CONSOLE_SOCKET" test_update
[ "$status" -eq 0 ]

# Setting memory to low value with checkBeforeUpdate=true should fail.
runc update -r - test_update <<EOF
{
"memory": {
"limit": 1024,
"checkBeforeUpdate": true
}
}
EOF
[ "$status" -ne 0 ]
[[ "$output" == *"rejecting memory limit"* ]]
testcontainer test_update running

# Setting memory+swap to low value with checkBeforeUpdate=true should fail.
runc update -r - test_update <<EOF
{
"memory": {
"limit": 1024,
"swap": 2048,
"checkBeforeUpdate": true
}
}
EOF
[ "$status" -ne 0 ]
[[ "$output" == *"rejecting memory+swap limit"* ]]
testcontainer test_update running

# The container will be OOM killed, and runc might either succeed
# or fail depending on the timing, so we don't check its exit code.
runc update test_update --memory 1024
testcontainer test_update stopped
}
16 changes: 10 additions & 6 deletions update.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
func i64Ptr(i int64) *int64 { return &i }
func u64Ptr(i uint64) *uint64 { return &i }
func u16Ptr(i uint16) *uint16 { return &i }
func boolPtr(b bool) *bool { return &b }

var updateCommand = cli.Command{
Name: "update",
Expand All @@ -37,7 +38,8 @@ The accepted format is as follow (unchanged values can be omitted):
"memory": {
"limit": 0,
"reservation": 0,
"swap": 0
"swap": 0,
"checkBeforeUpdate": true
},
"cpu": {
"shares": 0,
Expand Down Expand Up @@ -136,11 +138,12 @@ other options are ignored.

r := specs.LinuxResources{
Memory: &specs.LinuxMemory{
Limit: i64Ptr(0),
Reservation: i64Ptr(0),
Swap: i64Ptr(0),
Kernel: i64Ptr(0),
KernelTCP: i64Ptr(0),
Limit: i64Ptr(0),
Reservation: i64Ptr(0),
Swap: i64Ptr(0),
Kernel: i64Ptr(0),
KernelTCP: i64Ptr(0),
CheckBeforeUpdate: boolPtr(false),
},
CPU: &specs.LinuxCPU{
Shares: u64Ptr(0),
Expand Down Expand Up @@ -293,6 +296,7 @@ other options are ignored.
config.Cgroups.Resources.Memory = *r.Memory.Limit
config.Cgroups.Resources.MemoryReservation = *r.Memory.Reservation
config.Cgroups.Resources.MemorySwap = *r.Memory.Swap
config.Cgroups.Resources.MemoryCheckBeforeUpdate = *r.Memory.CheckBeforeUpdate
config.Cgroups.Resources.PidsLimit = r.Pids.Limit
config.Cgroups.Resources.Unified = r.Unified

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vendor/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ github.com/moby/sys/mountinfo
# github.com/mrunalp/fileutils v0.5.0
## explicit; go 1.13
github.com/mrunalp/fileutils
# github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b
# github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78
## explicit
github.com/opencontainers/runtime-spec/specs-go
# github.com/opencontainers/selinux v1.10.1
Expand Down

0 comments on commit 4b05d9a

Please sign in to comment.