Skip to content

Commit

Permalink
Fix race condition when sync with child and grandchild
Browse files Browse the repository at this point in the history
Fixes: #1236

Signed-off-by: Qiang Huang <[email protected]>
  • Loading branch information
hqhq committed Dec 19, 2016
1 parent 27a67c9 commit 481af80
Showing 1 changed file with 73 additions and 35 deletions.
108 changes: 73 additions & 35 deletions libcontainer/nsenter/nsexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ enum sync_t {
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */
SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */

/* XXX: This doesn't help with segfaults and other such issues. */
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
Expand Down Expand Up @@ -413,7 +414,7 @@ void nsexec(void)
{
int pipenum;
jmp_buf env;
int syncpipe[2];
int syncChildPipe[2], syncGrandchildPipe[2];
struct nlconfig_t config = {0};

/*
Expand All @@ -428,9 +429,16 @@ void nsexec(void)
nl_parse(pipenum, &config);

/* Pipe so we can tell the child when we've finished setting up. */
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncChildPipe) < 0)
bail("failed to setup sync pipe between parent and child");

/*
* We need a new socketpair to sync with grandchild so we don't have
* race condition with child.
*/
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncGrandchildPipe) < 0)
bail("failed to setup sync pipe between parent and grandchild");

/* TODO: Currently we aren't dealing with child deaths properly. */

/*
Expand Down Expand Up @@ -489,9 +497,10 @@ void nsexec(void)
* process.
*/
case JUMP_PARENT: {
int len, ready = 0;
int len;
pid_t child;
char buf[JSON_MAX];
bool ready = false;

/* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
Expand All @@ -508,26 +517,23 @@ void nsexec(void)
* ready, so we can receive all possible error codes
* generated by children.
*/
while (ready < 2) {
while (!ready) {
enum sync_t s;
int ret;

/* This doesn't need to be global, we're in the parent. */
int syncfd = syncpipe[1];
syncfd = syncChildPipe[1];
close(syncChildPipe[0]);

if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state");

switch (s) {
case SYNC_ERR: {
/* We have to mirror the error code of the child. */
int ret;
case SYNC_ERR:
/* We have to mirror the error code of the child. */
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)");

if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)");

exit(ret);
}
break;
exit(ret);
case SYNC_USERMAP_PLS:
/* Enable setgroups(2) if we've been asked to. */
if (config.is_setgroup)
Expand All @@ -543,11 +549,6 @@ void nsexec(void)
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
}
break;
case SYNC_USERMAP_ACK:
/* We should _never_ receive acks. */
kill(child, SIGKILL);
bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
break;
case SYNC_RECVPID_PLS: {
pid_t old = child;

Expand All @@ -565,20 +566,46 @@ void nsexec(void)
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
}
}

ready++;
break;
case SYNC_RECVPID_ACK:
/* We should _never_ receive acks. */
kill(child, SIGKILL);
bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
break;
case SYNC_CHILD_READY:
ready++;
ready = true;
break;
default:
bail("unexpected sync value");
bail("unexpected sync value: %u", s);
}
}

/* Now sync with grandchild. */

ready = false;
while (!ready) {
enum sync_t s;
int ret;

syncfd = syncGrandchildPipe[1];
close(syncGrandchildPipe[0]);

s = SYNC_GRANDCHILD;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(child, SIGKILL);
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
}

if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state");

switch (s) {
case SYNC_ERR:
/* We have to mirror the error code of the child. */
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)");

exit(ret);
case SYNC_CHILD_READY:
ready = true;
break;
default:
bail("unexpected sync value: %u", s);
}
}

Expand Down Expand Up @@ -610,7 +637,8 @@ void nsexec(void)
enum sync_t s;

/* We're in a child and thus need to tell the parent if we die. */
syncfd = syncpipe[0];
syncfd = syncChildPipe[0];
close(syncChildPipe[1]);

/* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
Expand Down Expand Up @@ -695,6 +723,10 @@ void nsexec(void)
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
}

s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with patent: write(SYNC_CHILD_READY)");

/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
exit(0);
}
Expand All @@ -713,11 +745,19 @@ void nsexec(void)
enum sync_t s;

/* We're in a child and thus need to tell the parent if we die. */
syncfd = syncpipe[0];
syncfd = syncGrandchildPipe[0];
close(syncGrandchildPipe[1]);
close(syncChildPipe[0]);
close(syncChildPipe[1]);

/* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);

if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
if (s != SYNC_GRANDCHILD)
bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);

if (setsid() < 0)
bail("setsid failed");

Expand All @@ -735,8 +775,7 @@ void nsexec(void)
bail("failed to sync with patent: write(SYNC_CHILD_READY)");

/* Close sync pipes. */
close(syncpipe[0]);
close(syncpipe[1]);
close(syncGrandchildPipe[0]);

/* Free netlink data. */
nl_free(&config);
Expand All @@ -746,7 +785,6 @@ void nsexec(void)
}
default:
bail("unexpected jump value");
break;
}

/* Should never be reached. */
Expand Down

0 comments on commit 481af80

Please sign in to comment.