diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 196b431dba1..3b4e296c657 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -95,7 +95,7 @@ func namespaces(config *configs.Config) error { return errors.New("USER namespaces aren't enabled in the kernel") } } else { - if config.UIDMappings != nil || config.GIDMappings != nil { + if len(config.UIDMappings) != 0 || len(config.GIDMappings) != 0 { return errors.New("User namespace mappings specified, but USER namespace isn't enabled in the config") } } @@ -264,14 +264,8 @@ func checkIDMapMounts(config *configs.Config, m *configs.Mount) error { if config.RootlessEUID { return fmt.Errorf("gidMappings/uidMappings is not supported when runc is being launched with EUID != 0, needs CAP_SYS_ADMIN on the runc parent's user namespace") } - if len(config.UIDMappings) == 0 || len(config.GIDMappings) == 0 { - return fmt.Errorf("not yet supported to use gidMappings/uidMappings in a mount without also using a user namespace") - } - if !sameMapping(config.UIDMappings, m.UIDMappings) { - return fmt.Errorf("not yet supported for the mount uidMappings to be different than user namespace uidMapping") - } - if !sameMapping(config.GIDMappings, m.GIDMappings) { - return fmt.Errorf("not yet supported for the mount gidMappings to be different than user namespace gidMapping") + if len(m.UIDMappings) == 0 || len(m.GIDMappings) == 0 { + return fmt.Errorf("id map mount without UID/GID mappings") } if !filepath.IsAbs(m.Source) { return fmt.Errorf("mount source not absolute") @@ -298,27 +292,6 @@ func mounts(config *configs.Config) error { return nil } -// sameMapping checks if the mappings are the same. If the mappings are the same -// but in different order, it returns false. -func sameMapping(a, b []configs.IDMap) bool { - if len(a) != len(b) { - return false - } - - for i := range a { - if a[i].ContainerID != b[i].ContainerID { - return false - } - if a[i].HostID != b[i].HostID { - return false - } - if a[i].Size != b[i].Size { - return false - } - } - return true -} - func isHostNetNS(path string) (bool, error) { const currentProcessNetns = "/proc/self/ns/net" diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index 58aae7a68f8..c8232fdfc29 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -436,8 +436,7 @@ func TestValidateIDMapMounts(t *testing.T) { }, }, { - name: "idmap mount without userns mappings", - isErr: true, + name: "idmap mount without userns mappings", config: &configs.Config{ Mounts: []*configs.Mount{ { @@ -451,8 +450,7 @@ func TestValidateIDMapMounts(t *testing.T) { }, }, { - name: "idmap mounts with different userns and mount mappings", - isErr: true, + name: "idmap mounts with different userns and mount mappings", config: &configs.Config{ UIDMappings: mapping, GIDMappings: mapping, @@ -474,8 +472,7 @@ func TestValidateIDMapMounts(t *testing.T) { }, }, { - name: "idmap mounts with different userns and mount mappings", - isErr: true, + name: "idmap mounts with different userns and mount mappings", config: &configs.Config{ UIDMappings: mapping, GIDMappings: mapping, diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 2f0a6c64166..a48e1fb76f4 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -558,11 +558,6 @@ func (c *Container) shouldSendIdmapSources() bool { return false } - // For the time being we require userns to be in use. - if !c.config.Namespaces.Contains(configs.NEWUSER) { - return false - } - // We need to send sources if there are idmap bind-mounts. for _, m := range c.config.Mounts { if m.IsBind() && m.IsIDMapped() { @@ -2301,6 +2296,9 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa // Idmap mount sources to open. if it == initStandard && c.shouldSendIdmapSources() { var mounts []byte + var uidmaps []byte + var gidmaps []byte + for _, m := range c.config.Mounts { if m.IsBind() && m.IsIDMapped() { // While other parts of the code check this too (like @@ -2310,11 +2308,35 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa return nil, fmt.Errorf("mount source string contains null byte: %q", m.Source) } + uidBytes, err := encodeIDMapping(m.UIDMappings) + if err != nil { + return nil, err + } + gidBytes, err := encodeIDMapping(m.GIDMappings) + if err != nil { + return nil, err + } + + uidmaps = append(uidmaps, uidBytes...) + gidmaps = append(gidmaps, gidBytes...) mounts = append(mounts, []byte(m.Source)...) } + + uidmaps = append(uidmaps, byte(0)) + gidmaps = append(gidmaps, byte(0)) mounts = append(mounts, byte(0)) } + r.AddData(&Bytemsg{ + Type: IdmapUidmapAttr, + Value: uidmaps, + }) + + r.AddData(&Bytemsg{ + Type: IdmapGidmapAttr, + Value: gidmaps, + }) + r.AddData(&Bytemsg{ Type: IdmapSourcesAttr, Value: mounts, diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 17db81a29f3..e2f13275543 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -23,6 +23,8 @@ const ( GidmapPathAttr uint16 = 27289 MountSourcesAttr uint16 = 27290 IdmapSourcesAttr uint16 = 27291 + IdmapUidmapAttr uint16 = 27292 + IdmapGidmapAttr uint16 = 27293 ) type Int32msg struct { diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 6297276f8b2..394cc0ddca4 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -104,6 +104,14 @@ struct nlconfig_t { /* Idmap sources opened outside the container userns which will be id mapped. */ char *idmapsources; size_t idmapsources_len; + + /* UID map to id map the above idmapsources. */ + char *idmap_uidmap; + size_t idmap_uidmap_len; + + /* GID map to id map the above idmapsources. */ + char *idmap_gidmap; + size_t idmap_gidmap_len; }; /* @@ -122,6 +130,8 @@ struct nlconfig_t { #define GIDMAPPATH_ATTR 27289 #define MOUNT_SOURCES_ATTR 27290 #define IDMAP_SOURCES_ATTR 27291 +#define IDMAP_UIDMAP_ATTR 27292 +#define IDMAP_GIDMAP_ATTR 27293 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -445,6 +455,14 @@ static void nl_parse(int fd, struct nlconfig_t *config) config->idmapsources = current; config->idmapsources_len = payload_len; break; + case IDMAP_UIDMAP_ATTR: + config->idmap_uidmap = current; + config->idmap_uidmap_len = payload_len; + break; + case IDMAP_GIDMAP_ATTR: + config->idmap_gidmap = current; + config->idmap_gidmap_len = payload_len; + break; default: bail("unknown netlink message type %d", nlattr->nla_type); } @@ -664,40 +682,146 @@ void try_unshare(int flags, const char *msg) bail("failed to unshare %s", msg); } -void send_idmapsources(int sockfd, pid_t pid, char *idmap_src, int idmap_src_len) +int child_mount_user_ns(void *unused) { - char proc_user_path[PATH_MAX]; - - /* Open the userns fd only once. - * Currently we only support idmap mounts that use the same mapping than - * the userns. This is validated in libcontainer/configs/validate/validator.go, - * so if we reached here, we know the mapping for the idmap is the same - * as the userns. This is why we just open the userns_fd once from the - * PID of the child process that has the userns already applied. - */ - int ret = snprintf(proc_user_path, sizeof(proc_user_path), "/proc/%d/ns/user", pid); - if (ret < 0 || (size_t)ret >= sizeof(proc_user_path)) { + sigset_t set; + pid_t pid; + int sig; + + pid = getpid(); + + if (sigemptyset(&set)) { sane_kill(pid, SIGKILL); - bail("failed to create userns path string"); + bail("failed to empty sigset"); } + if (sigaddset(&set, SIGKILL)) { + sane_kill(pid, SIGKILL); + bail("failed to add SIGKILL to sigset"); + } + + if (sigwait(&set, &sig)) { + sane_kill(pid, SIGKILL); + bail("failed to sigwait"); + } + + return 0; +} + +void send_idmapsources(int sockfd, pid_t pid, struct nlconfig_t *config) +{ + char *uidmap_src = config->idmap_uidmap; + char *gidmap_src = config->idmap_gidmap; + + char *idmap_src = config->idmapsources; + char *idmap_end = idmap_src + config->idmapsources_len; + + int nr_uidmap = 0; + int nr_gidmap = 0; + int nr_idmap = 0; + + int i; - int userns_fd = open(proc_user_path, O_RDONLY | O_CLOEXEC | O_NOCTTY); - if (userns_fd < 0) { + /* + * There should be as many UID/GID map as mount sources as done in + * bootstrapData(). + * But let's check this nonethelss. + */ + for (i = 0; i < config->idmapsources_len; i++) + if (idmap_src[i] == '\0') + nr_idmap++; + + for (i = 0; i < config->idmap_uidmap_len; i++) + if (uidmap_src[i] == '\0') + nr_uidmap++; + + for (i = 0; i < config->idmap_gidmap_len; i++) + if (gidmap_src[i] == '\0') + nr_gidmap++; + + if (nr_gidmap != nr_uidmap && nr_uidmap != nr_idmap) { sane_kill(pid, SIGKILL); - bail("failed to get user namespace fd"); + bail("there should be as many id map mount sources as UID and GID mappings: %d vs %d and %d", nr_idmap, + nr_uidmap, nr_gidmap); } - char *idmap_end = idmap_src + idmap_src_len; while (idmap_src < idmap_end) { + char proc_user_path[PATH_MAX]; + int child_stack_size = 4096; + void *child_stack; + int child_pid; + int ret; + if (idmap_src[0] == '\0') { idmap_src++; + uidmap_src++; + gidmap_src++; + continue; } + /* + * For each mount source, we have corresponding UID and GID + * mappings. + * Then, we need to apply the man user_namespace for each of + * them. + * That is to say, create a new child with CLONE_NEWUSER, write + * the corresponding UID and GID maps, then doing the ID map + * mount with the child PID. + * This could be parallelized by spawning a thread for each + * triple (mount source, UID map, GID map). + * But for the moment, let's keep it sequential. + */ + child_stack = malloc(child_stack_size); + if (child_stack == NULL) { + sane_kill(pid, SIGKILL); + bail("failed to allocate child stack"); + } + + child_pid = clone(child_mount_user_ns, child_stack + child_stack_size, CLONE_NEWUSER, NULL); + if (child_pid == -1) { + free(child_stack); + sane_kill(pid, SIGKILL); + bail("failed to clone with CLONE_NEWUSER"); + } + + update_setgroups(child_pid, SETGROUPS_DENY); + + int uidmap_len = strlen(uidmap_src); + int gidmap_len = strlen(gidmap_src); + + /* Update child mappings from the parent. */ + update_uidmap(config->uidmappath, child_pid, uidmap_src, uidmap_len); + update_gidmap(config->gidmappath, child_pid, gidmap_src, gidmap_len); + + /* Open the userns fd only once. + * Currently we only support idmap mounts that use the same mapping than + * the userns. This is validated in libcontainer/configs/validate/validator.go, + * so if we reached here, we know the mapping for the idmap is the same + * as the userns. This is why we just open the userns_fd once from the + * PID of the child process that has the userns already applied. + */ + ret = snprintf(proc_user_path, sizeof(proc_user_path), "/proc/%d/ns/user", child_pid); + if (ret < 0 || (size_t)ret >= sizeof(proc_user_path)) { + sane_kill(child_pid, SIGKILL); + free(child_stack); + sane_kill(pid, SIGKILL); + bail("failed to create userns path string"); + } + + int userns_fd = open(proc_user_path, O_RDONLY | O_CLOEXEC | O_NOCTTY); + if (userns_fd < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); + sane_kill(pid, SIGKILL); + bail("failed to get user namespace fd"); + } + int fd_tree = sys_open_tree(-EBADF, idmap_src, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT); if (fd_tree < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); sane_kill(pid, SIGKILL); if (errno == EINVAL) bail("failed to use open_tree(2) with path: %s, the kernel doesn't supports ID-mapped mounts", idmap_src); @@ -712,6 +836,8 @@ void send_idmapsources(int sockfd, pid_t pid, char *idmap_src, int idmap_src_len ret = sys_mount_setattr(fd_tree, "", AT_EMPTY_PATH, &attr, sizeof(attr)); if (ret < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); sane_kill(pid, SIGKILL); if (errno == EINVAL) bail("failed to change mount attributes, maybe the filesystem doesn't supports ID-mapped mounts"); @@ -723,16 +849,25 @@ void send_idmapsources(int sockfd, pid_t pid, char *idmap_src, int idmap_src_len send_fd(sockfd, fd_tree); if (close(fd_tree) < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); sane_kill(pid, SIGKILL); bail("error closing fd_tree"); } - idmap_src += strlen(idmap_src) + 1; - } + if (close(userns_fd) < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); + sane_kill(pid, SIGKILL); + bail("error closing userns fd"); + } - if (close(userns_fd) < 0) { - sane_kill(pid, SIGKILL); - bail("error closing userns fd"); + sane_kill(child_pid, SIGKILL); + free(child_stack); + + idmap_src += strlen(idmap_src) + 1; + uidmap_src += uidmap_len + 1; + gidmap_src += gidmap_len + 1; } } @@ -985,8 +1120,7 @@ void nsexec(void) break; case SYNC_MOUNT_IDMAP_PLS: write_log(DEBUG, "stage-1 requested to open idmap sources"); - send_idmapsources(syncfd, stage1_pid, config.idmapsources, - config.idmapsources_len); + send_idmapsources(syncfd, stage1_pid, &config); s = SYNC_MOUNT_IDMAP_ACK; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { sane_kill(stage1_pid, SIGKILL); diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 7fb67d8eee5..2a4ace35ac3 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -415,6 +415,10 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { return nil, err } } + + config.UIDMappings = toConfigIDMap(spec.Linux.UIDMappings) + config.GIDMappings = toConfigIDMap(spec.Linux.GIDMappings) + config.MaskPaths = spec.Linux.MaskedPaths config.ReadonlyPaths = spec.Linux.ReadonlyPaths config.MountLabel = spec.Linux.MountLabel diff --git a/tests/integration/idmap-with-userns.bats b/tests/integration/idmap-with-userns.bats new file mode 100644 index 00000000000..9d135f832e0 --- /dev/null +++ b/tests/integration/idmap-with-userns.bats @@ -0,0 +1,160 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + requires root + requires_kernel 5.12 + requires_idmap_fs /tmp + + setup_debian + + # Prepare source folders for bind mount + mkdir -p source-{1,2}/ + touch source-{1,2}/foo.txt + + # Use other owner for source-2 + chown 1:1 source-2/foo.txt + + mkdir -p rootfs/{proc,sys,tmp} + mkdir -p rootfs/tmp/mount-{1,2} + mkdir -p rootfs/mnt/bind-mount-{1,2} + + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65536}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65536}] + | .process.args += ["-c", "stat -c =%u=%g= /tmp/mount-1/foo.txt"] + | .mounts += [ + { + "source": "source-1/", + "destination": "/tmp/mount-1", + "options": ["bind"], + "uidMappings": [ { + "containerID": 0, + "hostID": 100000, + "size": 65536 + } + ], + "gidMappings": [ { + "containerID": 0, + "hostID": 100000, + "size": 65536 + } + ] + } + ] ' +} + +function teardown() { + teardown_bundle +} + +@test "simple idmap mount" { + runc run test_debian + [ "$status" -eq 0 ] + [[ "$output" == *"=0=0="* ]] +} + +@test "write to an idmap mount" { + update_config ' .process.args = ["sh", "-c", "touch /tmp/mount-1/bar && stat -c =%u=%g= /tmp/mount-1/bar"]' + runc run test_debian + [ "$status" -eq 0 ] + [[ "$output" == *"=0=0="* ]] +} + +@test "idmap mount with propagation flag" { + update_config ' .process.args = ["sh", "-c", "findmnt -o PROPAGATION /tmp/mount-1"]' + # Add the shared option to the idmap mount + update_config ' .mounts |= map((select(.source == "source-1/") | .options += ["shared"]) // .)' + + runc run test_debian + [ "$status" -eq 0 ] + [[ "$output" == *"shared"* ]] +} + +@test "idmap mount with bind mount" { + update_config ' .mounts += [ + { + "source": "source-2/", + "destination": "/tmp/mount-2", + "options": ["bind"] + } + ] ' + + runc run test_debian + [ "$status" -eq 0 ] + [[ "$output" == *"=0=0="* ]] +} + +@test "two idmap mounts with two bind mounts" { + update_config ' .process.args = ["sh", "-c", "stat -c =%u=%g= /tmp/mount-1/foo.txt /tmp/mount-2/foo.txt"] + | .mounts += [ + { + "source": "source-1/", + "destination": "/mnt/bind-mount-1", + "options": ["bind"] + }, + { + "source": "source-2/", + "destination": "/mnt/bind-mount-2", + "options": ["bind"] + }, + { + "source": "source-2/", + "destination": "/tmp/mount-2", + "options": ["bind"], + "uidMappings": [ { + "containerID": 0, + "hostID": 100000, + "size": 65536 + } + ], + "gidMappings": [ { + "containerID": 0, + "hostID": 100000, + "size": 65536 + } + ] + } + ] ' + + runc run test_debian + [ "$status" -eq 0 ] + [[ "$output" == *"=0=0="* ]] + # source-2/ is owned by 1:1, so we expect this with the idmap mount too. + [[ "$output" == *"=1=1="* ]] +} + +@test "idmap mount without gidMappings fails" { + update_config ' .mounts |= map((select(.source == "source-1/") | del(.gidMappings) ) // .)' + + runc run test_debian + [ "$status" -eq 1 ] + [[ "${output}" == *"invalid mount"* ]] +} + +@test "idmap mount without uidMappings fails" { + update_config ' .mounts |= map((select(.source == "source-1/") | del(.uidMappings) ) // .)' + + runc run test_debian + [ "$status" -eq 1 ] + [[ "${output}" == *"invalid mount"* ]] +} + +@test "idmap mount without bind fails" { + update_config ' .mounts |= map((select(.source == "source-1/") | .options = [""]) // .)' + + runc run test_debian + [ "$status" -eq 1 ] + [[ "${output}" == *"invalid mount"* ]] +} + +@test "idmap mount with different mapping than userns" { + update_config ' .linux.uidMappings[0].hostID = 99999' + + runc run test_debian + [ "$status" -eq 0 ] + # We have 1 as UID as user ns first UID is 99999 and id map mount is 10000. + # We still have 0 as GID was did not modify it. + [[ "${output}" == *"=1=0="* ]] +} diff --git a/tests/integration/idmap.bats b/tests/integration/idmap.bats index 375191bf705..839cb8ebe44 100644 --- a/tests/integration/idmap.bats +++ b/tests/integration/idmap.bats @@ -20,10 +20,7 @@ function setup() { mkdir -p rootfs/tmp/mount-{1,2} mkdir -p rootfs/mnt/bind-mount-{1,2} - update_config ' .linux.namespaces += [{"type": "user"}] - | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65536}] - | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65536}] - | .process.args += ["-c", "stat -c =%u=%g= /tmp/mount-1/foo.txt"] + update_config ' .process.args += ["-c", "stat -c =%u=%g= /tmp/mount-1/foo.txt"] | .mounts += [ { "source": "source-1/", @@ -52,11 +49,13 @@ function teardown() { @test "simple idmap mount" { runc run test_debian [ "$status" -eq 0 ] - [[ "$output" == *"=0=0="* ]] + [[ "$output" == *"=100000=100000="* ]] } @test "write to an idmap mount" { - update_config ' .process.args = ["sh", "-c", "touch /tmp/mount-1/bar && stat -c =%u=%g= /tmp/mount-1/bar"]' + # We need to change the config to permit UID 0 to write the mount id map. + update_config ' .process.args = ["sh", "-c", "touch /tmp/mount-1/bar && stat -c =%u=%g= /tmp/mount-1/bar"] + | .mounts |= map((select(.source == "source-1/") | .uidMappings[0].hostID = 0 | .gidMappings[0].hostID = 0) // .)' runc run test_debian [ "$status" -eq 0 ] [[ "$output" == *"=0=0="* ]] @@ -83,7 +82,7 @@ function teardown() { runc run test_debian [ "$status" -eq 0 ] - [[ "$output" == *"=0=0="* ]] + [[ "$output" == *"=100000=100000="* ]] } @test "two idmap mounts with two bind mounts" { @@ -120,9 +119,9 @@ function teardown() { runc run test_debian [ "$status" -eq 0 ] - [[ "$output" == *"=0=0="* ]] + [[ "$output" == *"=100000=100000="* ]] # source-2/ is owned by 1:1, so we expect this with the idmap mount too. - [[ "$output" == *"=1=1="* ]] + [[ "$output" == *"=100001=100001="* ]] } @test "idmap mount without gidMappings fails" { @@ -148,13 +147,3 @@ function teardown() { [ "$status" -eq 1 ] [[ "${output}" == *"invalid mount"* ]] } - -@test "idmap mount with different mapping than userns fails" { - # Let's modify the containerID to be 1, instead of 0 as it is in the - # userns config. - update_config ' .mounts |= map((select(.source == "source-1/") | .uidMappings[0]["containerID"] = 1 ) // .)' - - runc run test_debian - [ "$status" -eq 1 ] - [[ "${output}" == *"invalid mount"* ]] -}