-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dump and restore containers with external terminals #1355
Changes from all commits
d307e85
f8ca192
ffeedc4
1a8b0ac
a4fcbfb
1c43d09
fe03957
7325881
459a17b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ import ( | |
"fmt" | ||
"io" | ||
"io/ioutil" | ||
"net" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
|
@@ -17,6 +18,8 @@ import ( | |
"syscall" | ||
"time" | ||
|
||
"golang.org/x/sys/unix" | ||
|
||
"github.com/Sirupsen/logrus" | ||
"github.com/golang/protobuf/proto" | ||
"github.com/opencontainers/runc/libcontainer/cgroups" | ||
|
@@ -637,7 +640,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion string) error { | |
c.criuVersion = x*10000 + y*100 + z | ||
|
||
if c.criuVersion < versionReq { | ||
return fmt.Errorf("CRIU version must be %s or higher", minVersion) | ||
return fmt.Errorf("CRIU version %d must be %d or higher", c.criuVersion, versionReq) | ||
} | ||
|
||
return nil | ||
|
@@ -727,20 +730,26 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { | |
defer imageDir.Close() | ||
|
||
rpcOpts := criurpc.CriuOpts{ | ||
ImagesDirFd: proto.Int32(int32(imageDir.Fd())), | ||
WorkDirFd: proto.Int32(int32(workDir.Fd())), | ||
LogLevel: proto.Int32(4), | ||
LogFile: proto.String("dump.log"), | ||
Root: proto.String(c.config.Rootfs), | ||
ManageCgroups: proto.Bool(true), | ||
NotifyScripts: proto.Bool(true), | ||
Pid: proto.Int32(int32(c.initProcess.pid())), | ||
ShellJob: proto.Bool(criuOpts.ShellJob), | ||
LeaveRunning: proto.Bool(criuOpts.LeaveRunning), | ||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished), | ||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), | ||
FileLocks: proto.Bool(criuOpts.FileLocks), | ||
EmptyNs: proto.Uint32(criuOpts.EmptyNs), | ||
ImagesDirFd: proto.Int32(int32(imageDir.Fd())), | ||
WorkDirFd: proto.Int32(int32(workDir.Fd())), | ||
LogLevel: proto.Int32(4), | ||
LogFile: proto.String("dump.log"), | ||
Root: proto.String(c.config.Rootfs), | ||
ManageCgroups: proto.Bool(true), | ||
NotifyScripts: proto.Bool(true), | ||
Pid: proto.Int32(int32(c.initProcess.pid())), | ||
ShellJob: proto.Bool(criuOpts.ShellJob), | ||
LeaveRunning: proto.Bool(criuOpts.LeaveRunning), | ||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished), | ||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), | ||
FileLocks: proto.Bool(criuOpts.FileLocks), | ||
EmptyNs: proto.Uint32(criuOpts.EmptyNs), | ||
OrphanPtsMaster: proto.Bool(true), | ||
} | ||
|
||
fcg := c.cgroupManager.GetPaths()["freezer"] | ||
if fcg != "" { | ||
rpcOpts.FreezeCgroup = proto.String(fcg) | ||
} | ||
|
||
// append optional criu opts, e.g., page-server and port | ||
|
@@ -923,20 +932,21 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { | |
req := &criurpc.CriuReq{ | ||
Type: &t, | ||
Opts: &criurpc.CriuOpts{ | ||
ImagesDirFd: proto.Int32(int32(imageDir.Fd())), | ||
WorkDirFd: proto.Int32(int32(workDir.Fd())), | ||
EvasiveDevices: proto.Bool(true), | ||
LogLevel: proto.Int32(4), | ||
LogFile: proto.String("restore.log"), | ||
RstSibling: proto.Bool(true), | ||
Root: proto.String(root), | ||
ManageCgroups: proto.Bool(true), | ||
NotifyScripts: proto.Bool(true), | ||
ShellJob: proto.Bool(criuOpts.ShellJob), | ||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), | ||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished), | ||
FileLocks: proto.Bool(criuOpts.FileLocks), | ||
EmptyNs: proto.Uint32(criuOpts.EmptyNs), | ||
ImagesDirFd: proto.Int32(int32(imageDir.Fd())), | ||
WorkDirFd: proto.Int32(int32(workDir.Fd())), | ||
EvasiveDevices: proto.Bool(true), | ||
LogLevel: proto.Int32(4), | ||
LogFile: proto.String("restore.log"), | ||
RstSibling: proto.Bool(true), | ||
Root: proto.String(root), | ||
ManageCgroups: proto.Bool(true), | ||
NotifyScripts: proto.Bool(true), | ||
ShellJob: proto.Bool(criuOpts.ShellJob), | ||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), | ||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished), | ||
FileLocks: proto.Bool(criuOpts.FileLocks), | ||
EmptyNs: proto.Uint32(criuOpts.EmptyNs), | ||
OrphanPtsMaster: proto.Bool(true), | ||
}, | ||
} | ||
|
||
|
@@ -1030,15 +1040,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { | |
} | ||
|
||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error { | ||
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) | ||
fds, err := unix.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile()) | ||
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") | ||
criuClientFileCon, err := net.FileConn(criuClient) | ||
criuClient.Close() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
criuClientCon := criuClientFileCon.(*net.UnixConn) | ||
defer criuClientCon.Close() | ||
|
||
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") | ||
defer criuClient.Close() | ||
defer criuServer.Close() | ||
|
||
args := []string{"swrk", "3"} | ||
|
@@ -1058,7 +1076,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * | |
criuServer.Close() | ||
|
||
defer func() { | ||
criuClient.Close() | ||
criuClientCon.Close() | ||
_, err := cmd.Process.Wait() | ||
if err != nil { | ||
return | ||
|
@@ -1101,14 +1119,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * | |
if err != nil { | ||
return err | ||
} | ||
_, err = criuClient.Write(data) | ||
_, err = criuClientCon.Write(data) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
buf := make([]byte, 10*4096) | ||
oob := make([]byte, 4096) | ||
for true { | ||
n, err := criuClient.Read(buf) | ||
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob) | ||
if err != nil { | ||
return err | ||
} | ||
|
@@ -1136,7 +1155,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * | |
criuFeatures = resp.GetFeatures() | ||
break | ||
case t == criurpc.CriuReqType_NOTIFY: | ||
if err := c.criuNotifications(resp, process, opts, extFds); err != nil { | ||
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil { | ||
return err | ||
} | ||
t = criurpc.CriuReqType_NOTIFY | ||
|
@@ -1148,45 +1167,37 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * | |
if err != nil { | ||
return err | ||
} | ||
_, err = criuClient.Write(data) | ||
_, err = criuClientCon.Write(data) | ||
if err != nil { | ||
return err | ||
} | ||
continue | ||
case t == criurpc.CriuReqType_RESTORE: | ||
case t == criurpc.CriuReqType_DUMP: | ||
break | ||
case t == criurpc.CriuReqType_PRE_DUMP: | ||
// In pre-dump mode CRIU is in a loop and waits for | ||
// the final DUMP command. | ||
// The current runc pre-dump approach, however, is | ||
// start criu in PRE_DUMP once for a single pre-dump | ||
// and not the whole series of pre-dump, pre-dump, ...m, dump | ||
// If we got the message CriuReqType_PRE_DUMP it means | ||
// CRIU was successful and we need to forcefully stop CRIU | ||
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service") | ||
criuClient.Close() | ||
// Process status won't be success, because one end of sockets is closed | ||
_, err := cmd.Process.Wait() | ||
if err != nil { | ||
logrus.Debugf("After PRE_DUMP CRIU exiting failed") | ||
return err | ||
} | ||
return nil | ||
default: | ||
return fmt.Errorf("unable to parse the response %s", resp.String()) | ||
} | ||
|
||
break | ||
} | ||
|
||
criuClientCon.CloseWrite() | ||
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. | ||
// Here we want to wait only the CRIU process. | ||
st, err := cmd.Process.Wait() | ||
if err != nil { | ||
return err | ||
} | ||
if !st.Success() { | ||
|
||
// In pre-dump mode CRIU is in a loop and waits for | ||
// the final DUMP command. | ||
// The current runc pre-dump approach, however, is | ||
// start criu in PRE_DUMP once for a single pre-dump | ||
// and not the whole series of pre-dump, pre-dump, ...m, dump | ||
// If we got the message CriuReqType_PRE_DUMP it means | ||
// CRIU was successful and we need to forcefully stop CRIU | ||
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP { | ||
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath) | ||
} | ||
return nil | ||
|
@@ -1220,11 +1231,12 @@ func unlockNetwork(config *configs.Config) error { | |
return nil | ||
} | ||
|
||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error { | ||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error { | ||
notify := resp.GetNotify() | ||
if notify == nil { | ||
return fmt.Errorf("invalid response: %s", resp.String()) | ||
} | ||
logrus.Debugf("notify: %s\n", notify.GetScript()) | ||
switch { | ||
case notify.GetScript() == "post-dump": | ||
f, err := os.Create(filepath.Join(c.root, "checkpoint")) | ||
|
@@ -1277,6 +1289,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc | |
logrus.Error(err) | ||
} | ||
} | ||
case notify.GetScript() == "orphan-pts-master": | ||
scm, err := syscall.ParseSocketControlMessage(oob) | ||
if err != nil { | ||
return err | ||
} | ||
fds, err := syscall.ParseUnixRights(&scm[0]) | ||
|
||
master := os.NewFile(uintptr(fds[0]), "orphan-pts-master") | ||
defer master.Close() | ||
|
||
// While we can access console.master, using the API is a good idea. | ||
if err := utils.SendFd(process.ConsoleSocket, master); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So it's still a two-steps-socket-translation? What #1356 does was try to avoid this, can the master be sent directly to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We get this master form CRIU and CRIU doesn't have access to process.consoleSocket There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But we can make it, right? You can do something like this in In criu, you can do similar as: https://github.com/opencontainers/runc/blob/v1.0.0-rc3/libcontainer/factory_linux.go#L251-L258 Then you'll have access to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately it isn't so easy. When a file descriptors are restored, we have to be sure that all restored file descriptors are not intersect with criu service descriptors. So the number of service descriptors are limited. It is one of reasons why we can't pass extra file descriptors to criu restore. Another reason is that now we have very generic interface to handle external resources and it allows to handle any number of external terminals. It is impossible to pass a separate unix socket for each of them. I understand your point, but I afraid there is no way to make it more optimal. |
||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
my guess is that criuClient is only a file so Close doesnt quite do what it should ?
criuClientCon can be casted to an unix socket, so maybe call
Close
, or evenCloseWrite
which is really SHUT_WR like we do below should do the correct thingThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know what syscall.Shutdown() does. It work fast and reliable. I called it thouthds of times and it always works as expected;)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dqminh I found that net.FileConn() creates a new file descriptors and it was a reason why criuClientCon.Close() didn't close a socket. The current version of patches doesn't use the raw shutdown() syscall.