diff --git a/libcontainer/README.md b/libcontainer/README.md index 42f3efe5639..dcff231d8fc 100644 --- a/libcontainer/README.md +++ b/libcontainer/README.md @@ -148,6 +148,7 @@ config := &configs.Config{ {Type: configs.NEWPID}, {Type: configs.NEWUSER}, {Type: configs.NEWNET}, + {Type: configs.NEWCGROUP}, }), Cgroups: &configs.Cgroup{ Name: "test-container", diff --git a/libcontainer/SPEC.md b/libcontainer/SPEC.md index 18bf64704b8..e474038a7c2 100644 --- a/libcontainer/SPEC.md +++ b/libcontainer/SPEC.md @@ -21,16 +21,17 @@ Minimum requirements: ### Namespaces -| Flag | Enabled | -| ------------ | ------- | -| CLONE_NEWPID | 1 | -| CLONE_NEWUTS | 1 | -| CLONE_NEWIPC | 1 | -| CLONE_NEWNET | 1 | -| CLONE_NEWNS | 1 | -| CLONE_NEWUSER | 1 | - -Namespaces are created for the container via the `clone` syscall. +| Flag | Enabled | +| --------------- | ------- | +| CLONE_NEWPID | 1 | +| CLONE_NEWUTS | 1 | +| CLONE_NEWIPC | 1 | +| CLONE_NEWNET | 1 | +| CLONE_NEWNS | 1 | +| CLONE_NEWUSER | 1 | +| CLONE_NEWCGROUP | 1 | + +Namespaces are created for the container via the `unshare` syscall. ### Filesystem diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index 7c61ff13fcc..cd32a673532 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -17,7 +17,7 @@ import ( ) const ( - cgroupNamePrefix = "name=" + CgroupNamePrefix = "name=" CgroupProcesses = "cgroup.procs" ) @@ -156,8 +156,8 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, continue } ss[opt] = true - if strings.HasPrefix(opt, cgroupNamePrefix) { - opt = opt[len(cgroupNamePrefix):] + if strings.HasPrefix(opt, CgroupNamePrefix) { + opt = opt[len(CgroupNamePrefix):] } m.Subsystems = append(m.Subsystems, opt) numFound++ @@ -343,7 +343,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err return p, nil } - if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { + if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { return p, nil } diff --git a/libcontainer/configs/namespaces_syscall.go b/libcontainer/configs/namespaces_syscall.go index f3e2dee831c..2dc7adfc966 100644 --- a/libcontainer/configs/namespaces_syscall.go +++ b/libcontainer/configs/namespaces_syscall.go @@ -8,9 +8,6 @@ func (n *Namespace) Syscall() int { return namespaceInfo[n.Type] } -// This is not yet in the Go stdlib. -const syscall_CLONE_NEWCGROUP = (1 << 29) - var namespaceInfo = map[NamespaceType]int{ NEWNET: unix.CLONE_NEWNET, NEWNS: unix.CLONE_NEWNS, @@ -18,7 +15,7 @@ var namespaceInfo = map[NamespaceType]int{ NEWIPC: unix.CLONE_NEWIPC, NEWUTS: unix.CLONE_NEWUTS, NEWPID: unix.CLONE_NEWPID, - NEWCGROUP: syscall_CLONE_NEWCGROUP, + NEWCGROUP: unix.CLONE_NEWCGROUP, } // CloneFlags parses the container's Namespaces options to set the correct diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 9c78141ebd3..3b42f30107a 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -38,6 +38,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error { if err := v.usernamespace(config); err != nil { return err } + if err := v.cgroupnamespace(config); err != nil { + return err + } if err := v.sysctl(config); err != nil { return err } @@ -116,6 +119,15 @@ func (v *ConfigValidator) usernamespace(config *configs.Config) error { return nil } +func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error { + if config.Namespaces.Contains(configs.NEWCGROUP) { + if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { + return fmt.Errorf("cgroup namespaces aren't enabled in the kernel") + } + } + return nil +} + // sysctl validates that the specified sysctl keys are valid or not. // /proc/sys isn't completely namespaced and depending on which namespaces // are specified, a subset of sysctls are permitted. diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index ac66de75253..2fb2d66f5bd 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1745,7 +1745,6 @@ func (c *linuxContainer) currentState() (*State, error) { // can setns in order. func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { paths := []string{} - for _, ns := range configs.NamespaceTypes() { // Remove namespaces that we don't need to join. diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index 932024d2fa2..bff6981db9f 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -1776,3 +1776,60 @@ func TestTmpfsCopyUp(t *testing.T) { t.Fatalf("/etc/passwd not copied up as expected: %v", outputLs) } } + +func TestCGROUPPrivate(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { + t.Skip("cgroupns is unsupported") + } + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/cgroup") + ok(t, err) + + config := newTemplateConfig(rootfs) + config.Namespaces.Add(configs.NEWCGROUP, "") + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { + t.Fatalf("cgroup link should be private to the container but equals host %q %q", actual, l) + } +} + +func TestCGROUPHost(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { + t.Skip("cgroupns is unsupported") + } + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/cgroup") + ok(t, err) + + config := newTemplateConfig(rootfs) + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { + t.Fatalf("cgroup link not equal to host link %q %q", actual, l) + } +} diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index d7cb0af030e..d0016563db2 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -42,6 +42,12 @@ enum sync_t { SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ }; +/* + * Synchronisation value for cgroup namespace setup. + * The same constant is defined in process_linux.go as "createCgroupns". + */ +#define CREATECGROUPNS 0x80 + /* longjmp() arguments. */ #define JUMP_PARENT 0x00 #define JUMP_CHILD 0xA0 @@ -201,7 +207,8 @@ static void update_setgroups(int pid, enum policy_t setgroup) * open(2) or write(2) will return ENOENT. This is fine. */ if (errno != ENOENT) - bail("failed to write '%s' to /proc/%d/setgroups", policy, pid); + bail("failed to write '%s' to /proc/%d/setgroups", + policy, pid); } } @@ -314,8 +321,8 @@ static int child_func(void *arg) longjmp(*ca->env, ca->jmpval); } -static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline)); -static int clone_parent(jmp_buf *env, int jmpval) +static int clone_parent(jmp_buf * env, int jmpval) __attribute__ ((noinline)); +static int clone_parent(jmp_buf * env, int jmpval) { struct clone_t ca = { .env = env, @@ -399,7 +406,8 @@ static void nl_parse(int fd, struct nlconfig_t *config) size = NLMSG_PAYLOAD(&hdr, 0); current = data = malloc(size); if (!data) - bail("failed to allocate %zu bytes of memory for nl_payload", size); + bail("failed to allocate %zu bytes of memory for nl_payload", + size); len = read(fd, data, size); if (len != size) @@ -450,7 +458,8 @@ static void nl_parse(int fd, struct nlconfig_t *config) config->is_setgroup = readint8(current); break; default: - bail("unknown netlink message type %d", nlattr->nla_type); + bail("unknown netlink message type %d", + nlattr->nla_type); } current += NLA_ALIGN(payload_len); @@ -488,7 +497,8 @@ void join_namespaces(char *nslist) struct namespace_t *ns; /* Resize the namespace array. */ - namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); + namespaces = + realloc(namespaces, ++num * sizeof(struct namespace_t)); if (!namespaces) bail("failed to reallocate namespace array"); ns = &namespaces[num - 1]; @@ -644,7 +654,8 @@ void nsexec(void) bool ready = false; /* For debugging. */ - prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0); + prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, + 0, 0); /* Start the process of getting a container. */ child = clone_parent(&env, JUMP_CHILD); @@ -671,7 +682,8 @@ void nsexec(void) switch (s) { case SYNC_ERR: /* We have to mirror the error code of the child. */ - if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) + if (read(syncfd, &ret, sizeof(ret)) != + sizeof(ret)) bail("failed to sync with child: read(error code)"); exit(ret); @@ -687,15 +699,22 @@ void nsexec(void) * newuidmap/newgidmap shall be used. */ - if (config.is_rootless_euid && !config.is_setgroup) - update_setgroups(child, SETGROUPS_DENY); + if (config.is_rootless_euid + && !config.is_setgroup) + update_setgroups(child, + SETGROUPS_DENY); /* Set up mappings. */ - update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len); - update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len); + update_uidmap(config.uidmappath, child, + config.uidmap, + config.uidmap_len); + update_gidmap(config.gidmappath, child, + config.gidmap, + config.gidmap_len); s = SYNC_USERMAP_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + if (write(syncfd, &s, sizeof(s)) != + sizeof(s)) { kill(child, SIGKILL); bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); } @@ -704,18 +723,45 @@ void nsexec(void) first_child = child; /* Get the init_func pid. */ - if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { - kill(first_child, SIGKILL); + if (read + (syncfd, &child, + sizeof(child)) != + sizeof(child)) { + kill(first_child, + SIGKILL); bail("failed to sync with child: read(childpid)"); } /* Send ACK. */ s = SYNC_RECVPID_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(first_child, SIGKILL); + if (write(syncfd, &s, sizeof(s)) + != sizeof(s)) { + kill(first_child, + SIGKILL); kill(child, SIGKILL); bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); } + + /* Send the init_func pid back to our parent. + * + * Send the init_func pid and the pid of the first child back to our parent. + * We need to send both back because we can't reap the first child we created (CLONE_PARENT). + * It becomes the responsibility of our parent to reap the first child. + */ + len = + snprintf(buf, JSON_MAX, + "{\"pid\": %d, \"pid_first\": %d}\n", + child, + first_child); + if (len < 0) { + kill(child, SIGKILL); + bail("unable to generate JSON for child pid"); + } + if (write(pipenum, buf, len) != + len) { + kill(child, SIGKILL); + bail("unable to send child pid to bootstrapper"); + } } break; case SYNC_CHILD_READY: @@ -748,7 +794,8 @@ void nsexec(void) switch (s) { case SYNC_ERR: /* We have to mirror the error code of the child. */ - if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) + if (read(syncfd, &ret, sizeof(ret)) != + sizeof(ret)) bail("failed to sync with child: read(error code)"); exit(ret); @@ -759,23 +806,6 @@ void nsexec(void) bail("unexpected sync value: %u", s); } } - - /* - * Send the init_func pid and the pid of the first child back to our parent. - * - * We need to send both back because we can't reap the first child we created (CLONE_PARENT). - * It becomes the responsibility of our parent to reap the first child. - */ - len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); - if (len < 0) { - kill(child, SIGKILL); - bail("unable to generate JSON for child pid"); - } - if (write(pipenum, buf, len) != len) { - kill(child, SIGKILL); - bail("unable to send child pid to bootstrapper"); - } - exit(0); } @@ -797,7 +827,8 @@ void nsexec(void) close(sync_child_pipe[1]); /* For debugging. */ - prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0); + prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, + 0, 0); /* * We need to setns first. We cannot do this earlier (in stage 0) @@ -839,7 +870,8 @@ void nsexec(void) /* Switching is only necessary if we joined namespaces. */ if (config.namespaces) { - if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) + if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < + 0) bail("failed to set process as dumpable"); } s = SYNC_USERMAP_PLS; @@ -854,7 +886,8 @@ void nsexec(void) bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); /* Switching is only necessary if we joined namespaces. */ if (config.namespaces) { - if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < + 0) bail("failed to set process as dumpable"); } @@ -862,14 +895,20 @@ void nsexec(void) if (setresuid(0, 0, 0) < 0) bail("failed to become root in user namespace"); } - - /* - * Unshare all of the namespaces. Note that we don't merge this - * with clone() because there were some old kernel versions where - * clone(CLONE_PARENT | CLONE_NEWPID) was broken, so we'll just do - * it the long way. +/* + * Unshare all of the namespaces. Now, it should be noted that this + * ordering might break in the future (especially with rootless + * containers). But for now, it's not possible to split this into + * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. + * + * Note that we don't merge this with clone() because there were + * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) + * was broken, so we'll just do it the long way anyway. */ - if (unshare(config.cloneflags) < 0) + uint32_t flags = config.cloneflags; + if (config.cloneflags & CLONE_NEWCGROUP) + flags &= ~CLONE_NEWCGROUP; + if (unshare(flags) < 0) bail("failed to unshare namespaces"); /* @@ -891,7 +930,8 @@ void nsexec(void) kill(child, SIGKILL); bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); } - if (write(syncfd, &child, sizeof(child)) != sizeof(child)) { + if (write(syncfd, &child, sizeof(child)) != + sizeof(child)) { kill(child, SIGKILL); bail("failed to sync with parent: write(childpid)"); } @@ -937,7 +977,8 @@ void nsexec(void) close(sync_child_pipe[1]); /* For debugging. */ - prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0); + prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, + 0); if (read(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: read(SYNC_GRANDCHILD)"); @@ -958,6 +999,19 @@ void nsexec(void) bail("setgroups failed"); } + /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */ + if (config.cloneflags & CLONE_NEWCGROUP) { + uint8_t value; + if (read(pipenum, &value, sizeof(value)) != + sizeof(value)) + bail("read synchronisation value failed"); + if (value == CREATECGROUPNS) { + if (unshare(CLONE_NEWCGROUP) < 0) + bail("failed to unshare cgroup namespace"); + } else + bail("received unknown synchronisation value"); + } + s = SYNC_CHILD_READY; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with patent: write(SYNC_CHILD_READY)"); diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 217c213f8a3..4e2df593b7e 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -22,6 +22,10 @@ import ( "golang.org/x/sys/unix" ) +// Synchronisation value for cgroup namespace setup. +// The same constant is defined in nsexec.c as "CREATECGROUPNS". +const createCgroupns byte = (1 << 7) + type parentProcess interface { // pid returns the pid for the running process. pid() int @@ -225,12 +229,17 @@ func (p *initProcess) externalDescriptors() []string { return p.fds } -// execSetns runs the process that executes C code to perform the setns calls -// because setns support requires the C process to fork off a child and perform the setns -// before the go runtime boots, we wait on the process to die and receive the child's pid -// over the provided pipe. -// This is called by initProcess.start function -func (p *initProcess) execSetns() error { +// getChildPid receives the final child's pid over the provided pipe. +func (p *initProcess) getChildPid() (int, error) { + var pid pid + if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { + p.cmd.Wait() + return -1, err + } + return pid.Pid, nil +} + +func (p *initProcess) waitForChildExit(childPid int) error { status, err := p.cmd.Process.Wait() if err != nil { p.cmd.Wait() @@ -240,22 +249,8 @@ func (p *initProcess) execSetns() error { p.cmd.Wait() return &exec.ExitError{ProcessState: status} } - var pid *pid - if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { - p.cmd.Wait() - return err - } - - // Clean up the zombie parent process - firstChildProcess, err := os.FindProcess(pid.PidFirstChild) - if err != nil { - return err - } - - // Ignore the error in case the child has already been reaped for any reason - _, _ = firstChildProcess.Wait() - process, err := os.FindProcess(pid.Pid) + process, err := os.FindProcess(childPid) if err != nil { return err } @@ -297,19 +292,47 @@ func (p *initProcess) start() error { if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return newSystemErrorWithCause(err, "copying bootstrap data to pipe") } - - if err := p.execSetns(); err != nil { - return newSystemErrorWithCause(err, "running exec setns process for init") + childPid, err := p.getChildPid() + if err != nil { + return newSystemErrorWithCause(err, "getting the final child's pid from pipe") } // Save the standard descriptor names before the container process // can potentially move them (e.g., via dup2()). If we don't do this now, // we won't know at checkpoint time which file descriptor to look up. - fds, err := getPipeFds(p.pid()) + fds, err := getPipeFds(childPid) if err != nil { - return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) + return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid) } p.setExternalDescriptors(fds) + // Do this before syncing with child so that no children + // can escape the cgroup + if err := p.manager.Apply(childPid); err != nil { + return newSystemErrorWithCause(err, "applying cgroup configuration for process") + } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Apply(childPid); err != nil { + return newSystemErrorWithCause(err, "applying Intel RDT configuration for process") + } + } + // Now it's time to setup cgroup namesapce + if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" { + if _, err := p.parentPipe.Write([]byte{createCgroupns}); err != nil { + return newSystemErrorWithCause(err, "sending synchronization value to init process") + } + } + + // Wait for our first child to exit + if err := p.waitForChildExit(childPid); err != nil { + return newSystemErrorWithCause(err, "waiting for our first child to exit") + } + + defer func() { + if err != nil { + // TODO: should not be the responsibility to call here + p.manager.Destroy() + } + }() if err := p.createNetworkInterfaces(); err != nil { return newSystemErrorWithCause(err, "creating network interfaces") } diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 421abf64bc1..ff31eeed9c4 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -46,6 +46,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { return newSystemErrorWithCause(err, "preparing rootfs") } + hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP) setupDev := needsSetupDev(config) for _, m := range config.Mounts { for _, precmd := range m.PremountCmds { @@ -53,8 +54,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { return newSystemErrorWithCause(err, "running premount command") } } - - if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil { + if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil { return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination) } @@ -182,7 +182,7 @@ func mountCmd(cmd configs.Command) error { return nil } -func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { +func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { var ( dest = m.Destination ) @@ -319,12 +319,33 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { Data: "mode=755", PropagationFlags: m.PropagationFlags, } - if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil { + if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil { return err } for _, b := range binds { - if err := mountToRootfs(b, rootfs, mountLabel); err != nil { - return err + if enableCgroupns { + subsystemPath := filepath.Join(rootfs, b.Destination) + if err := os.MkdirAll(subsystemPath, 0755); err != nil { + return err + } + flags := defaultMountFlags + if m.Flags&unix.MS_RDONLY != 0 { + flags = flags | unix.MS_RDONLY + } + cgroupmount := &configs.Mount{ + Source: "cgroup", + Device: "cgroup", + Destination: subsystemPath, + Flags: flags, + Data: filepath.Base(subsystemPath), + } + if err := mountNewCgroup(cgroupmount); err != nil { + return err + } + } else { + if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil { + return err + } } } for _, mc := range merged { @@ -862,3 +883,18 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { } return nil } + +func mountNewCgroup(m *configs.Mount) error { + var ( + data = m.Data + source = m.Source + ) + if data == "systemd" { + data = cgroups.CgroupNamePrefix + data + source = "systemd" + } + if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil { + return err + } + return nil +}