Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dump and restore containers with external terminals #1355

Merged
merged 9 commits into from
May 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ RUN apt-get update && apt-get install -y \
libcap-dev \
libprotobuf-dev \
libprotobuf-c0-dev \
libnl-3-dev \
libnet-dev \
libseccomp2/jessie-backports \
libseccomp-dev/jessie-backports \
protobuf-c-compiler \
Expand All @@ -38,7 +40,7 @@ RUN cd /tmp \
&& rm -rf /tmp/bats

# install criu
ENV CRIU_VERSION 1.7
ENV CRIU_VERSION 2.12
RUN mkdir -p /usr/src/criu \
&& curl -sSL https://github.com/xemul/criu/archive/v${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
&& cd /usr/src/criu \
Expand Down
2 changes: 1 addition & 1 deletion create.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ command(s) that get executed on start, edit the args parameter of the spec. See
if err != nil {
return err
}
status, err := startContainer(context, spec, true)
status, err := startContainer(context, spec, CT_ACT_CREATE, nil)
if err != nil {
return err
}
Expand Down
1 change: 1 addition & 0 deletions exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ func execProcess(context *cli.Context) (int, error) {
consoleSocket: context.String("console-socket"),
detach: detach,
pidFile: context.String("pid-file"),
action: CT_ACT_RUN,
}
return r.run(p)
}
Expand Down
136 changes: 81 additions & 55 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
Expand All @@ -17,6 +18,8 @@ import (
"syscall"
"time"

"golang.org/x/sys/unix"

"github.com/Sirupsen/logrus"
"github.com/golang/protobuf/proto"
"github.com/opencontainers/runc/libcontainer/cgroups"
Expand Down Expand Up @@ -637,7 +640,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion string) error {
c.criuVersion = x*10000 + y*100 + z

if c.criuVersion < versionReq {
return fmt.Errorf("CRIU version must be %s or higher", minVersion)
return fmt.Errorf("CRIU version %d must be %d or higher", c.criuVersion, versionReq)
}

return nil
Expand Down Expand Up @@ -727,20 +730,26 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
defer imageDir.Close()

rpcOpts := criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String("dump.log"),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String("dump.log"),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true),
}

fcg := c.cgroupManager.GetPaths()["freezer"]
if fcg != "" {
rpcOpts.FreezeCgroup = proto.String(fcg)
}

// append optional criu opts, e.g., page-server and port
Expand Down Expand Up @@ -923,20 +932,21 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
req := &criurpc.CriuReq{
Type: &t,
Opts: &criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String("restore.log"),
RstSibling: proto.Bool(true),
Root: proto.String(root),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String("restore.log"),
RstSibling: proto.Bool(true),
Root: proto.String(root),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true),
},
}

Expand Down Expand Up @@ -1030,15 +1040,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
}

func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
fds, err := unix.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return err
}

logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
criuClientFileCon, err := net.FileConn(criuClient)
criuClient.Close()
if err != nil {
return err
}

criuClientCon := criuClientFileCon.(*net.UnixConn)
defer criuClientCon.Close()

criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
defer criuClient.Close()
defer criuServer.Close()

args := []string{"swrk", "3"}
Expand All @@ -1058,7 +1076,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
criuServer.Close()

defer func() {
criuClient.Close()
criuClientCon.Close()
_, err := cmd.Process.Wait()
if err != nil {
return
Expand Down Expand Up @@ -1101,14 +1119,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
if err != nil {
return err
}
_, err = criuClient.Write(data)
_, err = criuClientCon.Write(data)
if err != nil {
return err
}

buf := make([]byte, 10*4096)
oob := make([]byte, 4096)
for true {
n, err := criuClient.Read(buf)
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
if err != nil {
return err
}
Expand Down Expand Up @@ -1136,7 +1155,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
criuFeatures = resp.GetFeatures()
break
case t == criurpc.CriuReqType_NOTIFY:
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
return err
}
t = criurpc.CriuReqType_NOTIFY
Expand All @@ -1148,45 +1167,37 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
if err != nil {
return err
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my guess is that criuClient is only a file so Close doesnt quite do what it should ?

criuClientCon can be casted to an unix socket, so maybe call Close, or even CloseWrite which is really SHUT_WR like we do below should do the correct thing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know what syscall.Shutdown() does. It work fast and reliable. I called it thouthds of times and it always works as expected;)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dqminh I found that net.FileConn() creates a new file descriptors and it was a reason why criuClientCon.Close() didn't close a socket. The current version of patches doesn't use the raw shutdown() syscall.

}
_, err = criuClient.Write(data)
_, err = criuClientCon.Write(data)
if err != nil {
return err
}
continue
case t == criurpc.CriuReqType_RESTORE:
case t == criurpc.CriuReqType_DUMP:
break
case t == criurpc.CriuReqType_PRE_DUMP:
// In pre-dump mode CRIU is in a loop and waits for
// the final DUMP command.
// The current runc pre-dump approach, however, is
// start criu in PRE_DUMP once for a single pre-dump
// and not the whole series of pre-dump, pre-dump, ...m, dump
// If we got the message CriuReqType_PRE_DUMP it means
// CRIU was successful and we need to forcefully stop CRIU
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
criuClient.Close()
// Process status won't be success, because one end of sockets is closed
_, err := cmd.Process.Wait()
if err != nil {
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
return err
}
return nil
default:
return fmt.Errorf("unable to parse the response %s", resp.String())
}

break
}

criuClientCon.CloseWrite()
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process.
st, err := cmd.Process.Wait()
if err != nil {
return err
}
if !st.Success() {

// In pre-dump mode CRIU is in a loop and waits for
// the final DUMP command.
// The current runc pre-dump approach, however, is
// start criu in PRE_DUMP once for a single pre-dump
// and not the whole series of pre-dump, pre-dump, ...m, dump
// If we got the message CriuReqType_PRE_DUMP it means
// CRIU was successful and we need to forcefully stop CRIU
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
}
return nil
Expand Down Expand Up @@ -1220,11 +1231,12 @@ func unlockNetwork(config *configs.Config) error {
return nil
}

func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
notify := resp.GetNotify()
if notify == nil {
return fmt.Errorf("invalid response: %s", resp.String())
}
logrus.Debugf("notify: %s\n", notify.GetScript())
switch {
case notify.GetScript() == "post-dump":
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
Expand Down Expand Up @@ -1277,6 +1289,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
logrus.Error(err)
}
}
case notify.GetScript() == "orphan-pts-master":
scm, err := syscall.ParseSocketControlMessage(oob)
if err != nil {
return err
}
fds, err := syscall.ParseUnixRights(&scm[0])

master := os.NewFile(uintptr(fds[0]), "orphan-pts-master")
defer master.Close()

// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(process.ConsoleSocket, master); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it's still a two-steps-socket-translation? What #1356 does was try to avoid this, can the master be sent directly to process.consoleSocket in the container?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We get this master form CRIU and CRIU doesn't have access to process.consoleSocket

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But we can make it, right?

You can do something like this in criuSwrk: https://github.com/opencontainers/runc/blob/v1.0.0-rc3/libcontainer/container_linux.go#L373-L378

In criu, you can do similar as: https://github.com/opencontainers/runc/blob/v1.0.0-rc3/libcontainer/factory_linux.go#L251-L258

Then you'll have access to process.consoleSocket in criu.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately it isn't so easy. When a file descriptors are restored, we have to be sure that all restored file descriptors are not intersect with criu service descriptors. So the number of service descriptors are limited. It is one of reasons why we can't pass extra file descriptors to criu restore.

Another reason is that now we have very generic interface to handle external resources and it allows to handle any number of external terminals. It is impossible to pass a separate unix socket for each of them.

I understand your point, but I afraid there is no way to make it more optimal.

return err
}
}
return nil
}
Expand Down
Loading