diff --git a/libcontainer/README.md b/libcontainer/README.md
index 42f3efe5639..dcff231d8fc 100644
--- a/libcontainer/README.md
+++ b/libcontainer/README.md
@@ -148,6 +148,7 @@ config := &configs.Config{
 		{Type: configs.NEWPID},
 		{Type: configs.NEWUSER},
 		{Type: configs.NEWNET},
+		{Type: configs.NEWCGROUP},
 	}),
 	Cgroups: &configs.Cgroup{
 		Name:   "test-container",
diff --git a/libcontainer/SPEC.md b/libcontainer/SPEC.md
index 18bf64704b8..e474038a7c2 100644
--- a/libcontainer/SPEC.md
+++ b/libcontainer/SPEC.md
@@ -21,16 +21,17 @@ Minimum requirements:
 
 ### Namespaces
 
-|     Flag      | Enabled | 
-| ------------  | ------- |
-| CLONE_NEWPID  |    1    |
-| CLONE_NEWUTS  |    1    |
-| CLONE_NEWIPC  |    1    |
-| CLONE_NEWNET  |    1    |
-| CLONE_NEWNS   |    1    |
-| CLONE_NEWUSER |    1    |
-
-Namespaces are created for the container via the `clone` syscall.  
+|     Flag        | Enabled |
+| --------------- | ------- |
+| CLONE_NEWPID    |    1    |
+| CLONE_NEWUTS    |    1    |
+| CLONE_NEWIPC    |    1    |
+| CLONE_NEWNET    |    1    |
+| CLONE_NEWNS     |    1    |
+| CLONE_NEWUSER   |    1    |
+| CLONE_NEWCGROUP |    1    |
+
+Namespaces are created for the container via the `unshare` syscall.
 
 
 ### Filesystem
diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go
index 7c61ff13fcc..cd32a673532 100644
--- a/libcontainer/cgroups/utils.go
+++ b/libcontainer/cgroups/utils.go
@@ -17,7 +17,7 @@ import (
 )
 
 const (
-	cgroupNamePrefix = "name="
+	CgroupNamePrefix = "name="
 	CgroupProcesses  = "cgroup.procs"
 )
 
@@ -156,8 +156,8 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
 				continue
 			}
 			ss[opt] = true
-			if strings.HasPrefix(opt, cgroupNamePrefix) {
-				opt = opt[len(cgroupNamePrefix):]
+			if strings.HasPrefix(opt, CgroupNamePrefix) {
+				opt = opt[len(CgroupNamePrefix):]
 			}
 			m.Subsystems = append(m.Subsystems, opt)
 			numFound++
@@ -343,7 +343,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err
 		return p, nil
 	}
 
-	if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
+	if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
 		return p, nil
 	}
 
diff --git a/libcontainer/configs/namespaces_syscall.go b/libcontainer/configs/namespaces_syscall.go
index f3e2dee831c..2dc7adfc966 100644
--- a/libcontainer/configs/namespaces_syscall.go
+++ b/libcontainer/configs/namespaces_syscall.go
@@ -8,9 +8,6 @@ func (n *Namespace) Syscall() int {
 	return namespaceInfo[n.Type]
 }
 
-// This is not yet in the Go stdlib.
-const syscall_CLONE_NEWCGROUP = (1 << 29)
-
 var namespaceInfo = map[NamespaceType]int{
 	NEWNET:    unix.CLONE_NEWNET,
 	NEWNS:     unix.CLONE_NEWNS,
@@ -18,7 +15,7 @@ var namespaceInfo = map[NamespaceType]int{
 	NEWIPC:    unix.CLONE_NEWIPC,
 	NEWUTS:    unix.CLONE_NEWUTS,
 	NEWPID:    unix.CLONE_NEWPID,
-	NEWCGROUP: syscall_CLONE_NEWCGROUP,
+	NEWCGROUP: unix.CLONE_NEWCGROUP,
 }
 
 // CloneFlags parses the container's Namespaces options to set the correct
diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go
index 9c78141ebd3..3b42f30107a 100644
--- a/libcontainer/configs/validate/validator.go
+++ b/libcontainer/configs/validate/validator.go
@@ -38,6 +38,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
 	if err := v.usernamespace(config); err != nil {
 		return err
 	}
+	if err := v.cgroupnamespace(config); err != nil {
+		return err
+	}
 	if err := v.sysctl(config); err != nil {
 		return err
 	}
@@ -116,6 +119,15 @@ func (v *ConfigValidator) usernamespace(config *configs.Config) error {
 	return nil
 }
 
+func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error {
+	if config.Namespaces.Contains(configs.NEWCGROUP) {
+		if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
+			return fmt.Errorf("cgroup namespaces aren't enabled in the kernel")
+		}
+	}
+	return nil
+}
+
 // sysctl validates that the specified sysctl keys are valid or not.
 // /proc/sys isn't completely namespaced and depending on which namespaces
 // are specified, a subset of sysctls are permitted.
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index ac66de75253..2fb2d66f5bd 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -1745,7 +1745,6 @@ func (c *linuxContainer) currentState() (*State, error) {
 // can setns in order.
 func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
 	paths := []string{}
-
 	for _, ns := range configs.NamespaceTypes() {
 
 		// Remove namespaces that we don't need to join.
diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go
index 932024d2fa2..bff6981db9f 100644
--- a/libcontainer/integration/exec_test.go
+++ b/libcontainer/integration/exec_test.go
@@ -1776,3 +1776,60 @@ func TestTmpfsCopyUp(t *testing.T) {
 		t.Fatalf("/etc/passwd not copied up as expected: %v", outputLs)
 	}
 }
+
+func TestCGROUPPrivate(t *testing.T) {
+	if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
+		t.Skip("cgroupns is unsupported")
+	}
+	if testing.Short() {
+		return
+	}
+
+	rootfs, err := newRootfs()
+	ok(t, err)
+	defer remove(rootfs)
+
+	l, err := os.Readlink("/proc/1/ns/cgroup")
+	ok(t, err)
+
+	config := newTemplateConfig(rootfs)
+	config.Namespaces.Add(configs.NEWCGROUP, "")
+	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup")
+	ok(t, err)
+
+	if exitCode != 0 {
+		t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
+	}
+
+	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
+		t.Fatalf("cgroup link should be private to the container but equals host %q %q", actual, l)
+	}
+}
+
+func TestCGROUPHost(t *testing.T) {
+	if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
+		t.Skip("cgroupns is unsupported")
+	}
+	if testing.Short() {
+		return
+	}
+
+	rootfs, err := newRootfs()
+	ok(t, err)
+	defer remove(rootfs)
+
+	l, err := os.Readlink("/proc/1/ns/cgroup")
+	ok(t, err)
+
+	config := newTemplateConfig(rootfs)
+	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup")
+	ok(t, err)
+
+	if exitCode != 0 {
+		t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
+	}
+
+	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
+		t.Fatalf("cgroup link not equal to host link %q %q", actual, l)
+	}
+}
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
index d7cb0af030e..d0016563db2 100644
--- a/libcontainer/nsenter/nsexec.c
+++ b/libcontainer/nsenter/nsexec.c
@@ -42,6 +42,12 @@ enum sync_t {
 	SYNC_ERR = 0xFF,	/* Fatal error, no turning back. The error code follows. */
 };
 
+/*
+ * Synchronisation value for cgroup namespace setup.
+ * The same constant is defined in process_linux.go as "createCgroupns".
+ */
+#define CREATECGROUPNS 0x80
+
 /* longjmp() arguments. */
 #define JUMP_PARENT 0x00
 #define JUMP_CHILD  0xA0
@@ -201,7 +207,8 @@ static void update_setgroups(int pid, enum policy_t setgroup)
 		 * open(2) or write(2) will return ENOENT. This is fine.
 		 */
 		if (errno != ENOENT)
-			bail("failed to write '%s' to /proc/%d/setgroups", policy, pid);
+			bail("failed to write '%s' to /proc/%d/setgroups",
+			     policy, pid);
 	}
 }
 
@@ -314,8 +321,8 @@ static int child_func(void *arg)
 	longjmp(*ca->env, ca->jmpval);
 }
 
-static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
-static int clone_parent(jmp_buf *env, int jmpval)
+static int clone_parent(jmp_buf * env, int jmpval) __attribute__ ((noinline));
+static int clone_parent(jmp_buf * env, int jmpval)
 {
 	struct clone_t ca = {
 		.env = env,
@@ -399,7 +406,8 @@ static void nl_parse(int fd, struct nlconfig_t *config)
 	size = NLMSG_PAYLOAD(&hdr, 0);
 	current = data = malloc(size);
 	if (!data)
-		bail("failed to allocate %zu bytes of memory for nl_payload", size);
+		bail("failed to allocate %zu bytes of memory for nl_payload",
+		     size);
 
 	len = read(fd, data, size);
 	if (len != size)
@@ -450,7 +458,8 @@ static void nl_parse(int fd, struct nlconfig_t *config)
 			config->is_setgroup = readint8(current);
 			break;
 		default:
-			bail("unknown netlink message type %d", nlattr->nla_type);
+			bail("unknown netlink message type %d",
+			     nlattr->nla_type);
 		}
 
 		current += NLA_ALIGN(payload_len);
@@ -488,7 +497,8 @@ void join_namespaces(char *nslist)
 		struct namespace_t *ns;
 
 		/* Resize the namespace array. */
-		namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
+		namespaces =
+		    realloc(namespaces, ++num * sizeof(struct namespace_t));
 		if (!namespaces)
 			bail("failed to reallocate namespace array");
 		ns = &namespaces[num - 1];
@@ -644,7 +654,8 @@ void nsexec(void)
 			bool ready = false;
 
 			/* For debugging. */
-			prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
+			prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0,
+			      0, 0);
 
 			/* Start the process of getting a container. */
 			child = clone_parent(&env, JUMP_CHILD);
@@ -671,7 +682,8 @@ void nsexec(void)
 				switch (s) {
 				case SYNC_ERR:
 					/* We have to mirror the error code of the child. */
-					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+					if (read(syncfd, &ret, sizeof(ret)) !=
+					    sizeof(ret))
 						bail("failed to sync with child: read(error code)");
 
 					exit(ret);
@@ -687,15 +699,22 @@ void nsexec(void)
 					 * newuidmap/newgidmap shall be used.
 					 */
 
-					if (config.is_rootless_euid && !config.is_setgroup)
-						update_setgroups(child, SETGROUPS_DENY);
+					if (config.is_rootless_euid
+					    && !config.is_setgroup)
+						update_setgroups(child,
+								 SETGROUPS_DENY);
 
 					/* Set up mappings. */
-					update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
-					update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
+					update_uidmap(config.uidmappath, child,
+						      config.uidmap,
+						      config.uidmap_len);
+					update_gidmap(config.gidmappath, child,
+						      config.gidmap,
+						      config.gidmap_len);
 
 					s = SYNC_USERMAP_ACK;
-					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+					if (write(syncfd, &s, sizeof(s)) !=
+					    sizeof(s)) {
 						kill(child, SIGKILL);
 						bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
 					}
@@ -704,18 +723,45 @@ void nsexec(void)
 						first_child = child;
 
 						/* Get the init_func pid. */
-						if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
-							kill(first_child, SIGKILL);
+						if (read
+						    (syncfd, &child,
+						     sizeof(child)) !=
+						    sizeof(child)) {
+							kill(first_child,
+							     SIGKILL);
 							bail("failed to sync with child: read(childpid)");
 						}
 
 						/* Send ACK. */
 						s = SYNC_RECVPID_ACK;
-						if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-							kill(first_child, SIGKILL);
+						if (write(syncfd, &s, sizeof(s))
+						    != sizeof(s)) {
+							kill(first_child,
+							     SIGKILL);
 							kill(child, SIGKILL);
 							bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
 						}
+
+						/* Send the init_func pid back to our parent.
+						 *
+						 * Send the init_func pid and the pid of the first child back to our parent.
+						 * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
+						 * It becomes the responsibility of our parent to reap the first child.
+						 */
+						len =
+						    snprintf(buf, JSON_MAX,
+							     "{\"pid\": %d, \"pid_first\": %d}\n",
+							     child,
+							     first_child);
+						if (len < 0) {
+							kill(child, SIGKILL);
+							bail("unable to generate JSON for child pid");
+						}
+						if (write(pipenum, buf, len) !=
+						    len) {
+							kill(child, SIGKILL);
+							bail("unable to send child pid to bootstrapper");
+						}
 					}
 					break;
 				case SYNC_CHILD_READY:
@@ -748,7 +794,8 @@ void nsexec(void)
 				switch (s) {
 				case SYNC_ERR:
 					/* We have to mirror the error code of the child. */
-					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+					if (read(syncfd, &ret, sizeof(ret)) !=
+					    sizeof(ret))
 						bail("failed to sync with child: read(error code)");
 
 					exit(ret);
@@ -759,23 +806,6 @@ void nsexec(void)
 					bail("unexpected sync value: %u", s);
 				}
 			}
-
-			/*
-			 * Send the init_func pid and the pid of the first child back to our parent.
-			 *
-			 * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
-			 * It becomes the responsibility of our parent to reap the first child.
-			 */
-			len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
-			if (len < 0) {
-				kill(child, SIGKILL);
-				bail("unable to generate JSON for child pid");
-			}
-			if (write(pipenum, buf, len) != len) {
-				kill(child, SIGKILL);
-				bail("unable to send child pid to bootstrapper");
-			}
-
 			exit(0);
 		}
 
@@ -797,7 +827,8 @@ void nsexec(void)
 			close(sync_child_pipe[1]);
 
 			/* For debugging. */
-			prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
+			prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0,
+			      0, 0);
 
 			/*
 			 * We need to setns first. We cannot do this earlier (in stage 0)
@@ -839,7 +870,8 @@ void nsexec(void)
 
 				/* Switching is only necessary if we joined namespaces. */
 				if (config.namespaces) {
-					if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
+					if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) <
+					    0)
 						bail("failed to set process as dumpable");
 				}
 				s = SYNC_USERMAP_PLS;
@@ -854,7 +886,8 @@ void nsexec(void)
 					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
 				/* Switching is only necessary if we joined namespaces. */
 				if (config.namespaces) {
-					if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
+					if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) <
+					    0)
 						bail("failed to set process as dumpable");
 				}
 
@@ -862,14 +895,20 @@ void nsexec(void)
 				if (setresuid(0, 0, 0) < 0)
 					bail("failed to become root in user namespace");
 			}
-
-			/*
-			 * Unshare all of the namespaces. Note that we don't merge this
-			 * with clone() because there were some old kernel versions where
-			 * clone(CLONE_PARENT | CLONE_NEWPID) was broken, so we'll just do
-			 * it the long way.
+/*
+			 * Unshare all of the namespaces. Now, it should be noted that this
+			 * ordering might break in the future (especially with rootless
+			 * containers). But for now, it's not possible to split this into
+			 * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
+			 *
+			 * Note that we don't merge this with clone() because there were
+			 * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
+			 * was broken, so we'll just do it the long way anyway.
 			 */
-			if (unshare(config.cloneflags) < 0)
+			uint32_t flags = config.cloneflags;
+			if (config.cloneflags & CLONE_NEWCGROUP)
+				flags &= ~CLONE_NEWCGROUP;
+			if (unshare(flags) < 0)
 				bail("failed to unshare namespaces");
 
 			/*
@@ -891,7 +930,8 @@ void nsexec(void)
 				kill(child, SIGKILL);
 				bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
 			}
-			if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
+			if (write(syncfd, &child, sizeof(child)) !=
+			    sizeof(child)) {
 				kill(child, SIGKILL);
 				bail("failed to sync with parent: write(childpid)");
 			}
@@ -937,7 +977,8 @@ void nsexec(void)
 			close(sync_child_pipe[1]);
 
 			/* For debugging. */
-			prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
+			prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0,
+			      0);
 
 			if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 				bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
@@ -958,6 +999,19 @@ void nsexec(void)
 					bail("setgroups failed");
 			}
 
+			/* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */
+			if (config.cloneflags & CLONE_NEWCGROUP) {
+				uint8_t value;
+				if (read(pipenum, &value, sizeof(value)) !=
+				    sizeof(value))
+					bail("read synchronisation value failed");
+				if (value == CREATECGROUPNS) {
+					if (unshare(CLONE_NEWCGROUP) < 0)
+						bail("failed to unshare cgroup namespace");
+				} else
+					bail("received unknown synchronisation value");
+			}
+
 			s = SYNC_CHILD_READY;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 				bail("failed to sync with patent: write(SYNC_CHILD_READY)");
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
index 217c213f8a3..4e2df593b7e 100644
--- a/libcontainer/process_linux.go
+++ b/libcontainer/process_linux.go
@@ -22,6 +22,10 @@ import (
 	"golang.org/x/sys/unix"
 )
 
+// Synchronisation value for cgroup namespace setup.
+// The same constant is defined in nsexec.c as "CREATECGROUPNS".
+const createCgroupns byte = (1 << 7)
+
 type parentProcess interface {
 	// pid returns the pid for the running process.
 	pid() int
@@ -225,12 +229,17 @@ func (p *initProcess) externalDescriptors() []string {
 	return p.fds
 }
 
-// execSetns runs the process that executes C code to perform the setns calls
-// because setns support requires the C process to fork off a child and perform the setns
-// before the go runtime boots, we wait on the process to die and receive the child's pid
-// over the provided pipe.
-// This is called by initProcess.start function
-func (p *initProcess) execSetns() error {
+// getChildPid receives the final child's pid over the provided pipe.
+func (p *initProcess) getChildPid() (int, error) {
+	var pid pid
+	if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
+		p.cmd.Wait()
+		return -1, err
+	}
+	return pid.Pid, nil
+}
+
+func (p *initProcess) waitForChildExit(childPid int) error {
 	status, err := p.cmd.Process.Wait()
 	if err != nil {
 		p.cmd.Wait()
@@ -240,22 +249,8 @@ func (p *initProcess) execSetns() error {
 		p.cmd.Wait()
 		return &exec.ExitError{ProcessState: status}
 	}
-	var pid *pid
-	if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
-		p.cmd.Wait()
-		return err
-	}
-
-	// Clean up the zombie parent process
-	firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
-	if err != nil {
-		return err
-	}
-
-	// Ignore the error in case the child has already been reaped for any reason
-	_, _ = firstChildProcess.Wait()
 
-	process, err := os.FindProcess(pid.Pid)
+	process, err := os.FindProcess(childPid)
 	if err != nil {
 		return err
 	}
@@ -297,19 +292,47 @@ func (p *initProcess) start() error {
 	if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
 		return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
 	}
-
-	if err := p.execSetns(); err != nil {
-		return newSystemErrorWithCause(err, "running exec setns process for init")
+	childPid, err := p.getChildPid()
+	if err != nil {
+		return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
 	}
 
 	// Save the standard descriptor names before the container process
 	// can potentially move them (e.g., via dup2()).  If we don't do this now,
 	// we won't know at checkpoint time which file descriptor to look up.
-	fds, err := getPipeFds(p.pid())
+	fds, err := getPipeFds(childPid)
 	if err != nil {
-		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
+		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid)
 	}
 	p.setExternalDescriptors(fds)
+	// Do this before syncing with child so that no children
+	// can escape the cgroup
+	if err := p.manager.Apply(childPid); err != nil {
+		return newSystemErrorWithCause(err, "applying cgroup configuration for process")
+	}
+	if p.intelRdtManager != nil {
+		if err := p.intelRdtManager.Apply(childPid); err != nil {
+			return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
+		}
+	}
+	// Now it's time to setup cgroup namesapce
+	if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" {
+		if _, err := p.parentPipe.Write([]byte{createCgroupns}); err != nil {
+			return newSystemErrorWithCause(err, "sending synchronization value to init process")
+		}
+	}
+
+	// Wait for our first child to exit
+	if err := p.waitForChildExit(childPid); err != nil {
+		return newSystemErrorWithCause(err, "waiting for our first child to exit")
+	}
+
+	defer func() {
+		if err != nil {
+			// TODO: should not be the responsibility to call here
+			p.manager.Destroy()
+		}
+	}()
 	if err := p.createNetworkInterfaces(); err != nil {
 		return newSystemErrorWithCause(err, "creating network interfaces")
 	}
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 421abf64bc1..ff31eeed9c4 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -46,6 +46,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
 		return newSystemErrorWithCause(err, "preparing rootfs")
 	}
 
+	hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP)
 	setupDev := needsSetupDev(config)
 	for _, m := range config.Mounts {
 		for _, precmd := range m.PremountCmds {
@@ -53,8 +54,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
 				return newSystemErrorWithCause(err, "running premount command")
 			}
 		}
-
-		if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
+		if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil {
 			return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
 		}
 
@@ -182,7 +182,7 @@ func mountCmd(cmd configs.Command) error {
 	return nil
 }
 
-func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
+func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
 	var (
 		dest = m.Destination
 	)
@@ -319,12 +319,33 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 			Data:             "mode=755",
 			PropagationFlags: m.PropagationFlags,
 		}
-		if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
+		if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil {
 			return err
 		}
 		for _, b := range binds {
-			if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
-				return err
+			if enableCgroupns {
+				subsystemPath := filepath.Join(rootfs, b.Destination)
+				if err := os.MkdirAll(subsystemPath, 0755); err != nil {
+					return err
+				}
+				flags := defaultMountFlags
+				if m.Flags&unix.MS_RDONLY != 0 {
+					flags = flags | unix.MS_RDONLY
+				}
+				cgroupmount := &configs.Mount{
+					Source:      "cgroup",
+					Device:      "cgroup",
+					Destination: subsystemPath,
+					Flags:       flags,
+					Data:        filepath.Base(subsystemPath),
+				}
+				if err := mountNewCgroup(cgroupmount); err != nil {
+					return err
+				}
+			} else {
+				if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil {
+					return err
+				}
 			}
 		}
 		for _, mc := range merged {
@@ -862,3 +883,18 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
 	}
 	return nil
 }
+
+func mountNewCgroup(m *configs.Mount) error {
+	var (
+		data   = m.Data
+		source = m.Source
+	)
+	if data == "systemd" {
+		data = cgroups.CgroupNamePrefix + data
+		source = "systemd"
+	}
+	if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
+		return err
+	}
+	return nil
+}