Skip to content

Commit

Permalink
Merge pull request #2576 from kinvolk/alban/userns-2484-take2
Browse files Browse the repository at this point in the history
Open bind mount sources from the host userns
  • Loading branch information
AkihiroSuda authored Oct 28, 2021
2 parents af16f56 + 8542322 commit 4d17654
Show file tree
Hide file tree
Showing 11 changed files with 589 additions and 25 deletions.
6 changes: 6 additions & 0 deletions libcontainer/configs/mount.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package configs

import "golang.org/x/sys/unix"

const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
Expand Down Expand Up @@ -37,3 +39,7 @@ type Mount struct {
// Optional Command to be run after Source is mounted.
PostmountCmds []Command `json:"postmount_cmds"`
}

func (m *Mount) IsBind() bool {
return m.Flags&unix.MS_BIND != 0
}
83 changes: 79 additions & 4 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,33 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
return cmd
}

// shouldSendMountSources says whether the child process must setup bind mounts with
// the source pre-opened (O_PATH) in the host user namespace.
// See https://github.com/opencontainers/runc/issues/2484
func (c *linuxContainer) shouldSendMountSources() bool {
// Passing the mount sources via SCM_RIGHTS is only necessary when
// both userns and mntns are active.
if !c.config.Namespaces.Contains(configs.NEWUSER) ||
!c.config.Namespaces.Contains(configs.NEWNS) {
return false
}

// nsexec.c send_mountsources() requires setns(mntns) capabilities
// CAP_SYS_CHROOT and CAP_SYS_ADMIN.
if c.config.RootlessEUID {
return false
}

// We need to send sources if there are bind-mounts.
for _, m := range c.config.Mounts {
if m.IsBind() {
return true
}
}

return false
}

func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
Expand All @@ -529,10 +556,40 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
}

if c.shouldSendMountSources() {
// Elements on this slice will be paired with mounts (see StartInitialization() and
// prepareRootfs()). This slice MUST have the same size as c.config.Mounts.
mountFds := make([]int, len(c.config.Mounts))
for i, m := range c.config.Mounts {
if !m.IsBind() {
// Non bind-mounts do not use an fd.
mountFds[i] = -1
continue
}

// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
// to allocate a fd so that we know the number to pass in the environment variable. The fd
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
// lifecycle of that fd is already taken care of.
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
mountFds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
}

mountFdsJson, err := json.Marshal(mountFds)
if err != nil {
return nil, fmt.Errorf("Error creating _LIBCONTAINER_MOUNT_FDS: %w", err)
}

cmd.Env = append(cmd.Env,
"_LIBCONTAINER_MOUNT_FDS="+string(mountFdsJson),
)
}

init := &initProcess{
cmd: cmd,
messageSockPair: messageSockPair,
Expand All @@ -557,7 +614,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
}
// for setns process, we don't have to set cloneflags as the process namespaces
// will only be set via setns syscall
data, err := c.bootstrapData(0, state.NamespacePaths)
data, err := c.bootstrapData(0, state.NamespacePaths, initSetns)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1212,7 +1269,9 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
case "bind":
// The prepareBindMount() function checks if source
// exists. So it cannot be used for other filesystem types.
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
// TODO: pass something else than nil? Not sure if criu is
// impacted by issue #2484
if err := prepareBindMount(m, c.config.Rootfs, nil); err != nil {
return err
}
default:
Expand Down Expand Up @@ -2049,7 +2108,7 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
// such as one that uses nsenter package to bootstrap the container's
// init process correctly, i.e. with correct namespaces, uid/gid
// mapping etc.
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (io.Reader, error) {
// create the netlink message
r := nl.NewNetlinkRequest(int(InitMsg), 0)

Expand Down Expand Up @@ -2131,6 +2190,22 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Value: c.config.RootlessEUID,
})

// Bind mount source to open.
if it == initStandard && c.shouldSendMountSources() {
var mounts []byte
for _, m := range c.config.Mounts {
if m.IsBind() {
mounts = append(mounts, []byte(m.Source)...)
}
mounts = append(mounts, byte(0))
}

r.AddData(&Bytemsg{
Type: MountSourcesAttr,
Value: mounts,
})
}

return bytes.NewReader(r.Serialize()), nil
}

Expand Down
23 changes: 22 additions & 1 deletion libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ func (l *LinuxFactory) StartInitialization() (err error) {
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
}

// Get mount files (O_PATH).
mountFds, err := parseMountFds()
if err != nil {
return err
}

// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
Expand All @@ -305,7 +311,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
}
}()

i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd)
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
if err != nil {
return err
}
Expand Down Expand Up @@ -359,3 +365,18 @@ func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
return nil
}
}

func parseMountFds() ([]int, error) {
fdsJson := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
if fdsJson == "" {
// Always return the nil slice if no fd is present.
return nil, nil
}

var mountFds []int
if err := json.Unmarshal([]byte(fdsJson), &mountFds); err != nil {
return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err)
}

return mountFds, nil
}
8 changes: 7 additions & 1 deletion libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ type initer interface {
Init() error
}

func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) {
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
Expand All @@ -85,6 +85,11 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
}
switch t {
case initSetns:
// mountFds must be nil in this case. We don't mount while doing runc exec.
if mountFds != nil {
return nil, errors.New("mountFds must be nil. Can't mount while doing runc exec.")
}

return &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
Expand All @@ -99,6 +104,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
config: config,
fifoFd: fifoFd,
logFd: logFd,
mountFds: mountFds,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
Expand Down
64 changes: 64 additions & 0 deletions libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package integration
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -1837,3 +1838,66 @@ next_fd:
t.Fatalf("found %d extra fds after container.Run", count)
}
}

// Test that a container using user namespaces is able to bind mount a folder
// that does not have permissions for group/others.
func TestBindMountAndUser(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); errors.Is(err, os.ErrNotExist) {
t.Skip("userns is unsupported")
}

if testing.Short() {
return
}

temphost := t.TempDir()
dirhost := filepath.Join(temphost, "inaccessible", "dir")

err := os.MkdirAll(dirhost, 0o755)
ok(t, err)

err = ioutil.WriteFile(filepath.Join(dirhost, "foo.txt"), []byte("Hello"), 0o755)
ok(t, err)

// Make this dir inaccessible to "group,others".
err = os.Chmod(filepath.Join(temphost, "inaccessible"), 0o700)
ok(t, err)

config := newTemplateConfig(t, &tParam{
userns: true,
})

// Set HostID to 1000 to avoid DAC_OVERRIDE bypassing the purpose of this test.
config.UidMappings[0].HostID = 1000
config.GidMappings[0].HostID = 1000

// Set the owner of rootfs to the effective IDs in the host to avoid errors
// while creating the folders to perform the mounts.
err = os.Chown(config.Rootfs, 1000, 1000)
ok(t, err)

config.Mounts = append(config.Mounts, &configs.Mount{
Source: dirhost,
Destination: "/tmp/mnt1cont",
Device: "bind",
Flags: unix.MS_BIND | unix.MS_REC,
})

container, err := newContainer(t, config)
ok(t, err)
defer container.Destroy() //nolint: errcheck

var stdout bytes.Buffer

pconfig := libcontainer.Process{
Cwd: "/",
Args: []string{"sh", "-c", "stat /tmp/mnt1cont/foo.txt"},
Env: standardEnvironment,
Stdout: &stdout,
Init: true,
}
err = container.Run(&pconfig)
ok(t, err)

waitProcess(&pconfig, t)
}
43 changes: 43 additions & 0 deletions libcontainer/integration/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,52 @@ func newRootfs(t *testing.T) string {
if err := copyBusybox(dir); err != nil {
t.Fatal(err)
}

// Make sure others can read+exec, so all tests (inside userns too) can
// read the rootfs.
if err := traversePath(dir); err != nil {
t.Fatalf("Error making newRootfs path traversable by others: %v", err)
}

return dir
}

// traversePath gives read+execute permissions to others for all elements in tPath below
// os.TempDir() and errors out if elements above it don't have read+exec permissions for others.
// tPath MUST be a descendant of os.TempDir(). The path returned by testing.TempDir() usually is.
func traversePath(tPath string) error {
// Check the assumption that the argument is under os.TempDir().
tempBase := os.TempDir()
if !strings.HasPrefix(tPath, tempBase) {
return fmt.Errorf("traversePath: %q is not a descendant of %q", tPath, tempBase)
}

var path string
for _, p := range strings.SplitAfter(tPath, "/") {
path = path + p
stats, err := os.Stat(path)
if err != nil {
return err
}

perm := stats.Mode().Perm()

if perm&0o5 == 0o5 {
continue
}

if strings.HasPrefix(tempBase, path) {
return fmt.Errorf("traversePath: directory %q MUST have read+exec permissions for others", path)
}

if err := os.Chmod(path, perm|0o5); err != nil {
return err
}
}

return nil
}

func remove(dir string) {
_ = os.RemoveAll(dir)
}
Expand Down
1 change: 1 addition & 0 deletions libcontainer/message_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
RootlessEUIDAttr uint16 = 27287
UidmapPathAttr uint16 = 27288
GidmapPathAttr uint16 = 27289
MountSourcesAttr uint16 = 27290
)

type Int32msg struct {
Expand Down
Loading

0 comments on commit 4d17654

Please sign in to comment.