Skip to content

Commit

Permalink
Add initial support for rsvd accounting hugetlb cgroup
Browse files Browse the repository at this point in the history
The previous non-rsvd max/limit_in_bytes does not account for reserved
huge page memory, making it possible for a processes to reserve all the
huge page memory, without being able to allocate it (due to cgroup
restrictions).

In practice this makes it possible to successfully mmap more huge page
memory than allowed via the cgroup settings, but when using the memory
the process will get a SIGBUS and crash. This is bad for applications
trying to mmap at startup (and it succeeds), but the program crashes
when starting to use the memory. eg. postgres is doing this by default.

This also keeps writing to the old max/limit_in_bytes, to make sure some
applications read the wrong value.

More info can be found here: https://lkml.org/lkml/2020/2/3/1153

Signed-off-by: Odin Ugedal <[email protected]>
  • Loading branch information
odinuge committed Apr 28, 2020
1 parent 49ca1fd commit 5c84b1a
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 5 deletions.
32 changes: 29 additions & 3 deletions libcontainer/cgroups/fs/hugetlb.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,30 @@ func (s *HugetlbGroup) Apply(d *cgroupData) error {
return nil
}

// HasReservationAccountingSupport checks if reservation accounting of huge pages in the hugetlb cgroup
// is supported. This is supported from linux 5.7
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/hugetlb.html
func (s *HugetlbGroup) HasReservationAccountingSupport(path string) bool {
if len(HugePageSizes) == 0 {
return false
}
_, err := fscommon.ReadFile(path, strings.Join([]string{"hugetlb", HugePageSizes[0], "rsvd", "limit_in_bytes"}, "."))
return err == nil
}

func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
supportsReservationAccounting := s.HasReservationAccountingSupport(path)
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}

if !supportsReservationAccounting {
continue
}
if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "rsvd", "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}
}

return nil
Expand All @@ -43,22 +62,29 @@ func (s *HugetlbGroup) Remove(d *cgroupData) error {

func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
hugetlbStats := cgroups.HugetlbStats{}
supportsReservationAccounting := s.HasReservationAccountingSupport(path)

for _, pageSize := range HugePageSizes {
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
filenamePrefix := strings.Join([]string{"hugetlb", pageSize}, ".")

if supportsReservationAccounting {
filenamePrefix += ".rsvd"
}
usage := fmt.Sprintf("%s.usage_in_bytes", filenamePrefix)
value, err := fscommon.GetCgroupParamUint(path, usage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", usage, err)
}
hugetlbStats.Usage = value

maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
maxUsage := fmt.Sprintf("%s.max_usage_in_bytes", filenamePrefix)
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
}
hugetlbStats.MaxUsage = value

failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
failcnt := fmt.Sprintf("%s.failcnt", filenamePrefix)
value, err = fscommon.GetCgroupParamUint(path, failcnt)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
Expand Down
57 changes: 57 additions & 0 deletions libcontainer/cgroups/fs/hugetlb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ var (
limit = "hugetlb.%s.limit_in_bytes"
maxUsage = "hugetlb.%s.max_usage_in_bytes"
failcnt = "hugetlb.%s.failcnt"

rsvdUsage = "hugetlb.%s.rsvd.usage_in_bytes"
rsvdLimit = "hugetlb.%s.rsvd.limit_in_bytes"
rsvdMaxUsage = "hugetlb.%s.rsvd.max_usage_in_bytes"
rsvdFailcnt = "hugetlb.%s.rsvd.failcnt"
)

func TestHugetlbSetHugetlb(t *testing.T) {
Expand Down Expand Up @@ -65,6 +70,58 @@ func TestHugetlbSetHugetlb(t *testing.T) {
}
}

func TestHugetlbSetHugetlbWithReservedAccounting(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()

const (
hugetlbBefore = 256
hugetlbAfter = 512
)

for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
})
helper.writeFileContents(map[string]string{
fmt.Sprintf(rsvdLimit, pageSize): strconv.Itoa(hugetlbBefore),
})
}

for _, pageSize := range HugePageSizes {
helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
{
Pagesize: pageSize,
Limit: hugetlbAfter,
},
}
hugetlb := &HugetlbGroup{}
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
}

for _, pageSize := range HugePageSizes {
limitFile := fmt.Sprintf(limit, pageSize)
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, limitFile)
if err != nil {
t.Fatalf("Failed to parse %s - %s", limitFile, err)
}
if value != hugetlbAfter {
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
}

rsvdLimitFile := fmt.Sprintf(rsvdLimit, pageSize)
rsvdValue, err := fscommon.GetCgroupParamUint(helper.CgroupPath, rsvdLimitFile)
if err != nil {
t.Fatalf("Failed to parse %s - %s", rsvdLimitFile, err)
}
if rsvdValue != hugetlbAfter {
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, rsvdValue)
}
}
}

func TestHugetlbStats(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
Expand Down
32 changes: 30 additions & 2 deletions libcontainer/cgroups/fs2/hugetlb.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package fs2

import (
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
Expand All @@ -19,14 +20,33 @@ func isHugeTlbSet(cgroup *configs.Cgroup) bool {
return len(cgroup.Resources.HugetlbLimit) > 0
}

// HasReservationAccountingSupport checks if reservation accounting of huge pages in the hugetlb cgroup
// is supported. This is supported from linux 5.7
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/hugetlb.html
func HasReservationAccountingSupport(dirPath string) bool {
hugePageSizes, err := cgroups.GetHugePageSize()
if err != nil || len(hugePageSizes) == 0 {
return false
}
_, err = fscommon.ReadFile(dirPath, strings.Join([]string{"hugetlb", hugePageSizes[0], "rsvd", "max"}, "."))
return err == nil
}

func setHugeTlb(dirPath string, cgroup *configs.Cgroup) error {
if !isHugeTlbSet(cgroup) {
return nil
}
supportsReservationAccounting := HasReservationAccountingSupport(dirPath)
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
if err := fscommon.WriteFile(dirPath, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "max"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}
if !supportsReservationAccounting {
continue
}
if err := fscommon.WriteFile(dirPath, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "rsvd", "max"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}
}

return nil
Expand All @@ -39,15 +59,23 @@ func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
}
hugetlbStats := cgroups.HugetlbStats{}

supportsReservationAccounting := HasReservationAccountingSupport(dirPath)

for _, pagesize := range hugePageSizes {
usage := strings.Join([]string{"hugetlb", pagesize, "current"}, ".")
filenamePrefix := strings.Join([]string{"hugetlb", pagesize}, ".")

if supportsReservationAccounting {
filenamePrefix += ".rsvd"
}

usage := fmt.Sprintf("%s.current", filenamePrefix)
value, err := fscommon.GetCgroupParamUint(dirPath, usage)
if err != nil {
return errors.Wrapf(err, "failed to parse hugetlb.%s.current file", pagesize)
}
hugetlbStats.Usage = value

fileName := strings.Join([]string{"hugetlb", pagesize, "events"}, ".")
fileName := fmt.Sprintf("%s.events", filenamePrefix)
filePath := filepath.Join(dirPath, fileName)
contents, err := ioutil.ReadFile(filePath)
if err != nil {
Expand Down

0 comments on commit 5c84b1a

Please sign in to comment.