From e7d9fe1cf3ed64403c2a94f284b46fdb74038e3a Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 27 Oct 2018 19:22:03 +0800 Subject: [PATCH] libcontainer: intelrdt: add support for Intel RDT/MBA Software Controller in runc MBA Software Controller feature is introduced in Linux kernel v4.15. It is a software enhancement to mitigate some limitations in MBA which describes in kernel documentation. It also makes the interface more user friendly - we could specify memory bandwidth in "MBps" (Mega Bytes per second) as well as in "percentages". The kernel underneath would use a software feedback mechanism or a "Software Controller" which reads the actual bandwidth using MBM counters and adjust the memory bandwidth percentages to ensure: "actual memory bandwidth < user specified memory bandwidth". We could enable this feature through mount option "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl In runc, we handle both memory bandwidth schemata in unified format: "MB:=bandwidth0;=bandwidth1;..." The unit of memory bandwidth is specified in "percentages" by default, and in "MBps" if MBA Software Controller is enabled. For more information about Intel RDT and MBA Software Controller: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt Signed-off-by: Xiaochen Shen --- libcontainer/SPEC.md | 17 +++++++-- libcontainer/configs/intelrdt.go | 4 ++- libcontainer/intelrdt/intelrdt.go | 50 +++++++++++++++++++++++--- libcontainer/intelrdt/intelrdt_test.go | 38 ++++++++++++++++++++ 4 files changed, 102 insertions(+), 7 deletions(-) diff --git a/libcontainer/SPEC.md b/libcontainer/SPEC.md index 18bf64704b8..8c59ac71c6d 100644 --- a/libcontainer/SPEC.md +++ b/libcontainer/SPEC.md @@ -167,7 +167,8 @@ service (CLOS) and each CLOS has a capacity bitmask (CBM). Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle over memory bandwidth for the software. A user controls the resource by -indicating the percentage of maximum memory bandwidth. +indicating the percentage of maximum memory bandwidth or memory bandwidth limit +in MBps unit if MBA Software Controller is enabled. It can be used to handle L3 cache and memory bandwidth resources allocation for containers if hardware and kernel support Intel RDT CAT and MBA features. @@ -236,7 +237,7 @@ set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. Memory bandwidth schema: It has allocation values for memory bandwidth on each socket, which contains -L3 cache id and memory bandwidth percentage. +L3 cache id and memory bandwidth. ``` Format: "MB:=bandwidth0;=bandwidth1;..." ``` @@ -249,6 +250,18 @@ that is allocated is also dependent on the CPU model and can be looked up at min_bw + N * bw_gran. Intermediate values are rounded to the next control step available on the hardware. +If MBA Software Controller is enabled through mount option "-o mba_MBps" +mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl +We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit +instead of "percentages". The kernel underneath would use a software feedback +mechanism or a "Software Controller" which reads the actual bandwidth using +MBM counters and adjust the memory bandwidth percentages to ensure: +"actual memory bandwidth < user specified memory bandwidth". + +For example, on a two-socket machine, the schema line could be +"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 +and 7000 MBps memory bandwidth limit on socket 1. + For more information about Intel RDT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt diff --git a/libcontainer/configs/intelrdt.go b/libcontainer/configs/intelrdt.go index 6f47aac077d..57e9f037d97 100644 --- a/libcontainer/configs/intelrdt.go +++ b/libcontainer/configs/intelrdt.go @@ -5,7 +5,9 @@ type IntelRdt struct { // Format: "L3:=;=;..." L3CacheSchema string `json:"l3_cache_schema,omitempty"` - // The schema of memory bandwidth percentage per L3 cache id + // The schema of memory bandwidth per L3 cache id // Format: "MB:=bandwidth0;=bandwidth1;..." + // The unit of memory bandwidth is specified in "percentages" by + // default, and in "MBps" if MBA Software Controller is enabled. MemBwSchema string `json:"memBwSchema,omitempty"` } diff --git a/libcontainer/intelrdt/intelrdt.go b/libcontainer/intelrdt/intelrdt.go index 4e081ae6fe8..118d497b6c1 100644 --- a/libcontainer/intelrdt/intelrdt.go +++ b/libcontainer/intelrdt/intelrdt.go @@ -28,7 +28,8 @@ import ( * * Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle * over memory bandwidth for the software. A user controls the resource by - * indicating the percentage of maximum memory bandwidth. + * indicating the percentage of maximum memory bandwidth or memory bandwidth + * limit in MBps unit if MBA Software Controller is enabled. * * More details about Intel RDT CAT and MBA can be found in the section 17.18 * of Intel Software Developer Manual: @@ -95,7 +96,7 @@ import ( * * Memory bandwidth schema: * It has allocation values for memory bandwidth on each socket, which contains - * L3 cache id and memory bandwidth percentage. + * L3 cache id and memory bandwidth. * Format: "MB:=bandwidth0;=bandwidth1;..." * For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" * @@ -106,6 +107,18 @@ import ( * min_bw + N * bw_gran. Intermediate values are rounded to the next control * step available on the hardware. * + * If MBA Software Controller is enabled through mount option "-o mba_MBps": + * mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl + * We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit + * instead of "percentages". The kernel underneath would use a software feedback + * mechanism or a "Software Controller" which reads the actual bandwidth using + * MBM counters and adjust the memory bandwidth percentages to ensure: + * "actual memory bandwidth < user specified memory bandwidth". + * + * For example, on a two-socket machine, the schema line could be + * "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 + * and 7000 MBps memory bandwidth limit on socket 1. + * * For more information about Intel RDT kernel interface: * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt * @@ -165,6 +178,8 @@ var ( isCatEnabled bool // The flag to indicate if Intel RDT/MBA is enabled isMbaEnabled bool + // The flag to indicate if Intel RDT/MBA Software Controller is enabled + isMbaScEnabled bool ) type intelRdtData struct { @@ -197,7 +212,11 @@ func init() { isCatEnabled = true } } - if isMbaFlagSet { + if isMbaScEnabled { + // We confirm MBA Software Controller is enabled in step 2, MBA + // must be enabled for MBA Software Controller depends on MBA + isMbaEnabled = true + } else if isMbaFlagSet { if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil { isMbaEnabled = true } @@ -232,6 +251,11 @@ func findIntelRdtMountpointDir() (string, error) { return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) } + // Check if MBA Software Controller is enabled through mount option "-o mba_MBps" + if strings.Contains(postSeparatorFields[2], "mba_MBps") { + isMbaScEnabled = true + } + return fields[4], nil } } @@ -480,6 +504,11 @@ func IsMbaEnabled() bool { return isMbaEnabled } +// Check if Intel RDT/MBA Software Controller is enabled +func IsMbaScEnabled() bool { + return isMbaScEnabled +} + // Get the 'container_id' path in Intel RDT "resource control" filesystem func GetIntelRdtPath(id string) (string, error) { rootPath, err := getIntelRdtRoot() @@ -633,7 +662,7 @@ func (m *IntelRdtManager) Set(container *configs.Config) error { // // About memory bandwidth schema: // It has allocation values for memory bandwidth on each socket, which - // contains L3 cache id and memory bandwidth percentage. + // contains L3 cache id and memory bandwidth. // Format: "MB:=bandwidth0;=bandwidth1;..." // For example, on a two-socket machine, the schema line could be: // "MB:0=20;1=70" @@ -645,6 +674,19 @@ func (m *IntelRdtManager) Set(container *configs.Config) error { // The available bandwidth control steps are: min_bw + N * bw_gran. // Intermediate values are rounded to the next control step available // on the hardware. + // + // If MBA Software Controller is enabled through mount option + // "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl + // We could specify memory bandwidth in "MBps" (Mega Bytes per second) + // unit instead of "percentages". The kernel underneath would use a + // software feedback mechanism or a "Software Controller" which reads + // the actual bandwidth using MBM counters and adjust the memory + // bandwidth percentages to ensure: + // "actual memory bandwidth < user specified memory bandwidth". + // + // For example, on a two-socket machine, the schema line could be + // "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on + // socket 0 and 7000 MBps memory bandwidth limit on socket 1. if container.IntelRdt != nil { path := m.GetPath() l3CacheSchema := container.IntelRdt.L3CacheSchema diff --git a/libcontainer/intelrdt/intelrdt_test.go b/libcontainer/intelrdt/intelrdt_test.go index fd487915ab6..a19b961b019 100644 --- a/libcontainer/intelrdt/intelrdt_test.go +++ b/libcontainer/intelrdt/intelrdt_test.go @@ -82,3 +82,41 @@ func TestIntelRdtSetMemBwSchema(t *testing.T) { t.Fatal("Got the wrong value, set 'schemata' failed.") } } + +func TestIntelRdtSetMemBwScSchema(t *testing.T) { + if !IsMbaScEnabled() { + return + } + + helper := NewIntelRdtTestUtil(t) + defer helper.cleanup() + + const ( + memBwScSchemaBefore = "MB:0=5000;1=7000" + memBwScSchemeAfter = "MB:0=9000;1=4000" + ) + + helper.writeFileContents(map[string]string{ + "schemata": memBwScSchemaBefore + "\n", + }) + + helper.IntelRdtData.config.IntelRdt.MemBwSchema = memBwScSchemeAfter + intelrdt := &IntelRdtManager{ + Config: helper.IntelRdtData.config, + Path: helper.IntelRdtPath, + } + if err := intelrdt.Set(helper.IntelRdtData.config); err != nil { + t.Fatal(err) + } + + tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata") + if err != nil { + t.Fatalf("Failed to parse file 'schemata' - %s", err) + } + values := strings.Split(tmpStrings, "\n") + value := values[0] + + if value != memBwScSchemeAfter { + t.Fatal("Got the wrong value, set 'schemata' failed.") + } +}