From 65d54dfec51d37c2f7edb9874ac6f1af4ff6b2f0 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Tue, 18 Jul 2023 17:58:21 +0200 Subject: [PATCH] Support idmap mounts on volumes without userns. Commit fda12ab1014e ("Support idmap mounts on volumes") introduces support for idmap mount on volumes. At this time, it required userns to be used and idmap mount must have the same UID/GID mappings than userns. This commit removes this requirement, so it is possible to use idmap mount without userns. You can, of course, use id map mount with userns, but the UID/GID mappings can be different. Signed-off-by: Francis Laniel --- libcontainer/configs/validate/validator.go | 33 +--- .../configs/validate/validator_test.go | 9 +- libcontainer/container_linux.go | 32 ++- libcontainer/message_linux.go | 2 + libcontainer/nsenter/nsexec.c | 182 +++++++++++++++--- libcontainer/specconv/spec_linux.go | 4 + .../{idmap.bats => idmap-with-userns.bats} | 12 +- 7 files changed, 203 insertions(+), 71 deletions(-) rename tests/integration/{idmap.bats => idmap-with-userns.bats} (92%) diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 196b431dba1..3b4e296c657 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -95,7 +95,7 @@ func namespaces(config *configs.Config) error { return errors.New("USER namespaces aren't enabled in the kernel") } } else { - if config.UIDMappings != nil || config.GIDMappings != nil { + if len(config.UIDMappings) != 0 || len(config.GIDMappings) != 0 { return errors.New("User namespace mappings specified, but USER namespace isn't enabled in the config") } } @@ -264,14 +264,8 @@ func checkIDMapMounts(config *configs.Config, m *configs.Mount) error { if config.RootlessEUID { return fmt.Errorf("gidMappings/uidMappings is not supported when runc is being launched with EUID != 0, needs CAP_SYS_ADMIN on the runc parent's user namespace") } - if len(config.UIDMappings) == 0 || len(config.GIDMappings) == 0 { - return fmt.Errorf("not yet supported to use gidMappings/uidMappings in a mount without also using a user namespace") - } - if !sameMapping(config.UIDMappings, m.UIDMappings) { - return fmt.Errorf("not yet supported for the mount uidMappings to be different than user namespace uidMapping") - } - if !sameMapping(config.GIDMappings, m.GIDMappings) { - return fmt.Errorf("not yet supported for the mount gidMappings to be different than user namespace gidMapping") + if len(m.UIDMappings) == 0 || len(m.GIDMappings) == 0 { + return fmt.Errorf("id map mount without UID/GID mappings") } if !filepath.IsAbs(m.Source) { return fmt.Errorf("mount source not absolute") @@ -298,27 +292,6 @@ func mounts(config *configs.Config) error { return nil } -// sameMapping checks if the mappings are the same. If the mappings are the same -// but in different order, it returns false. -func sameMapping(a, b []configs.IDMap) bool { - if len(a) != len(b) { - return false - } - - for i := range a { - if a[i].ContainerID != b[i].ContainerID { - return false - } - if a[i].HostID != b[i].HostID { - return false - } - if a[i].Size != b[i].Size { - return false - } - } - return true -} - func isHostNetNS(path string) (bool, error) { const currentProcessNetns = "/proc/self/ns/net" diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index 58aae7a68f8..c8232fdfc29 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -436,8 +436,7 @@ func TestValidateIDMapMounts(t *testing.T) { }, }, { - name: "idmap mount without userns mappings", - isErr: true, + name: "idmap mount without userns mappings", config: &configs.Config{ Mounts: []*configs.Mount{ { @@ -451,8 +450,7 @@ func TestValidateIDMapMounts(t *testing.T) { }, }, { - name: "idmap mounts with different userns and mount mappings", - isErr: true, + name: "idmap mounts with different userns and mount mappings", config: &configs.Config{ UIDMappings: mapping, GIDMappings: mapping, @@ -474,8 +472,7 @@ func TestValidateIDMapMounts(t *testing.T) { }, }, { - name: "idmap mounts with different userns and mount mappings", - isErr: true, + name: "idmap mounts with different userns and mount mappings", config: &configs.Config{ UIDMappings: mapping, GIDMappings: mapping, diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 2f0a6c64166..a48e1fb76f4 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -558,11 +558,6 @@ func (c *Container) shouldSendIdmapSources() bool { return false } - // For the time being we require userns to be in use. - if !c.config.Namespaces.Contains(configs.NEWUSER) { - return false - } - // We need to send sources if there are idmap bind-mounts. for _, m := range c.config.Mounts { if m.IsBind() && m.IsIDMapped() { @@ -2301,6 +2296,9 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa // Idmap mount sources to open. if it == initStandard && c.shouldSendIdmapSources() { var mounts []byte + var uidmaps []byte + var gidmaps []byte + for _, m := range c.config.Mounts { if m.IsBind() && m.IsIDMapped() { // While other parts of the code check this too (like @@ -2310,11 +2308,35 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa return nil, fmt.Errorf("mount source string contains null byte: %q", m.Source) } + uidBytes, err := encodeIDMapping(m.UIDMappings) + if err != nil { + return nil, err + } + gidBytes, err := encodeIDMapping(m.GIDMappings) + if err != nil { + return nil, err + } + + uidmaps = append(uidmaps, uidBytes...) + gidmaps = append(gidmaps, gidBytes...) mounts = append(mounts, []byte(m.Source)...) } + + uidmaps = append(uidmaps, byte(0)) + gidmaps = append(gidmaps, byte(0)) mounts = append(mounts, byte(0)) } + r.AddData(&Bytemsg{ + Type: IdmapUidmapAttr, + Value: uidmaps, + }) + + r.AddData(&Bytemsg{ + Type: IdmapGidmapAttr, + Value: gidmaps, + }) + r.AddData(&Bytemsg{ Type: IdmapSourcesAttr, Value: mounts, diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 17db81a29f3..e2f13275543 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -23,6 +23,8 @@ const ( GidmapPathAttr uint16 = 27289 MountSourcesAttr uint16 = 27290 IdmapSourcesAttr uint16 = 27291 + IdmapUidmapAttr uint16 = 27292 + IdmapGidmapAttr uint16 = 27293 ) type Int32msg struct { diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 6297276f8b2..394cc0ddca4 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -104,6 +104,14 @@ struct nlconfig_t { /* Idmap sources opened outside the container userns which will be id mapped. */ char *idmapsources; size_t idmapsources_len; + + /* UID map to id map the above idmapsources. */ + char *idmap_uidmap; + size_t idmap_uidmap_len; + + /* GID map to id map the above idmapsources. */ + char *idmap_gidmap; + size_t idmap_gidmap_len; }; /* @@ -122,6 +130,8 @@ struct nlconfig_t { #define GIDMAPPATH_ATTR 27289 #define MOUNT_SOURCES_ATTR 27290 #define IDMAP_SOURCES_ATTR 27291 +#define IDMAP_UIDMAP_ATTR 27292 +#define IDMAP_GIDMAP_ATTR 27293 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -445,6 +455,14 @@ static void nl_parse(int fd, struct nlconfig_t *config) config->idmapsources = current; config->idmapsources_len = payload_len; break; + case IDMAP_UIDMAP_ATTR: + config->idmap_uidmap = current; + config->idmap_uidmap_len = payload_len; + break; + case IDMAP_GIDMAP_ATTR: + config->idmap_gidmap = current; + config->idmap_gidmap_len = payload_len; + break; default: bail("unknown netlink message type %d", nlattr->nla_type); } @@ -664,40 +682,146 @@ void try_unshare(int flags, const char *msg) bail("failed to unshare %s", msg); } -void send_idmapsources(int sockfd, pid_t pid, char *idmap_src, int idmap_src_len) +int child_mount_user_ns(void *unused) { - char proc_user_path[PATH_MAX]; - - /* Open the userns fd only once. - * Currently we only support idmap mounts that use the same mapping than - * the userns. This is validated in libcontainer/configs/validate/validator.go, - * so if we reached here, we know the mapping for the idmap is the same - * as the userns. This is why we just open the userns_fd once from the - * PID of the child process that has the userns already applied. - */ - int ret = snprintf(proc_user_path, sizeof(proc_user_path), "/proc/%d/ns/user", pid); - if (ret < 0 || (size_t)ret >= sizeof(proc_user_path)) { + sigset_t set; + pid_t pid; + int sig; + + pid = getpid(); + + if (sigemptyset(&set)) { sane_kill(pid, SIGKILL); - bail("failed to create userns path string"); + bail("failed to empty sigset"); } + if (sigaddset(&set, SIGKILL)) { + sane_kill(pid, SIGKILL); + bail("failed to add SIGKILL to sigset"); + } + + if (sigwait(&set, &sig)) { + sane_kill(pid, SIGKILL); + bail("failed to sigwait"); + } + + return 0; +} + +void send_idmapsources(int sockfd, pid_t pid, struct nlconfig_t *config) +{ + char *uidmap_src = config->idmap_uidmap; + char *gidmap_src = config->idmap_gidmap; + + char *idmap_src = config->idmapsources; + char *idmap_end = idmap_src + config->idmapsources_len; + + int nr_uidmap = 0; + int nr_gidmap = 0; + int nr_idmap = 0; + + int i; - int userns_fd = open(proc_user_path, O_RDONLY | O_CLOEXEC | O_NOCTTY); - if (userns_fd < 0) { + /* + * There should be as many UID/GID map as mount sources as done in + * bootstrapData(). + * But let's check this nonethelss. + */ + for (i = 0; i < config->idmapsources_len; i++) + if (idmap_src[i] == '\0') + nr_idmap++; + + for (i = 0; i < config->idmap_uidmap_len; i++) + if (uidmap_src[i] == '\0') + nr_uidmap++; + + for (i = 0; i < config->idmap_gidmap_len; i++) + if (gidmap_src[i] == '\0') + nr_gidmap++; + + if (nr_gidmap != nr_uidmap && nr_uidmap != nr_idmap) { sane_kill(pid, SIGKILL); - bail("failed to get user namespace fd"); + bail("there should be as many id map mount sources as UID and GID mappings: %d vs %d and %d", nr_idmap, + nr_uidmap, nr_gidmap); } - char *idmap_end = idmap_src + idmap_src_len; while (idmap_src < idmap_end) { + char proc_user_path[PATH_MAX]; + int child_stack_size = 4096; + void *child_stack; + int child_pid; + int ret; + if (idmap_src[0] == '\0') { idmap_src++; + uidmap_src++; + gidmap_src++; + continue; } + /* + * For each mount source, we have corresponding UID and GID + * mappings. + * Then, we need to apply the man user_namespace for each of + * them. + * That is to say, create a new child with CLONE_NEWUSER, write + * the corresponding UID and GID maps, then doing the ID map + * mount with the child PID. + * This could be parallelized by spawning a thread for each + * triple (mount source, UID map, GID map). + * But for the moment, let's keep it sequential. + */ + child_stack = malloc(child_stack_size); + if (child_stack == NULL) { + sane_kill(pid, SIGKILL); + bail("failed to allocate child stack"); + } + + child_pid = clone(child_mount_user_ns, child_stack + child_stack_size, CLONE_NEWUSER, NULL); + if (child_pid == -1) { + free(child_stack); + sane_kill(pid, SIGKILL); + bail("failed to clone with CLONE_NEWUSER"); + } + + update_setgroups(child_pid, SETGROUPS_DENY); + + int uidmap_len = strlen(uidmap_src); + int gidmap_len = strlen(gidmap_src); + + /* Update child mappings from the parent. */ + update_uidmap(config->uidmappath, child_pid, uidmap_src, uidmap_len); + update_gidmap(config->gidmappath, child_pid, gidmap_src, gidmap_len); + + /* Open the userns fd only once. + * Currently we only support idmap mounts that use the same mapping than + * the userns. This is validated in libcontainer/configs/validate/validator.go, + * so if we reached here, we know the mapping for the idmap is the same + * as the userns. This is why we just open the userns_fd once from the + * PID of the child process that has the userns already applied. + */ + ret = snprintf(proc_user_path, sizeof(proc_user_path), "/proc/%d/ns/user", child_pid); + if (ret < 0 || (size_t)ret >= sizeof(proc_user_path)) { + sane_kill(child_pid, SIGKILL); + free(child_stack); + sane_kill(pid, SIGKILL); + bail("failed to create userns path string"); + } + + int userns_fd = open(proc_user_path, O_RDONLY | O_CLOEXEC | O_NOCTTY); + if (userns_fd < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); + sane_kill(pid, SIGKILL); + bail("failed to get user namespace fd"); + } + int fd_tree = sys_open_tree(-EBADF, idmap_src, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT); if (fd_tree < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); sane_kill(pid, SIGKILL); if (errno == EINVAL) bail("failed to use open_tree(2) with path: %s, the kernel doesn't supports ID-mapped mounts", idmap_src); @@ -712,6 +836,8 @@ void send_idmapsources(int sockfd, pid_t pid, char *idmap_src, int idmap_src_len ret = sys_mount_setattr(fd_tree, "", AT_EMPTY_PATH, &attr, sizeof(attr)); if (ret < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); sane_kill(pid, SIGKILL); if (errno == EINVAL) bail("failed to change mount attributes, maybe the filesystem doesn't supports ID-mapped mounts"); @@ -723,16 +849,25 @@ void send_idmapsources(int sockfd, pid_t pid, char *idmap_src, int idmap_src_len send_fd(sockfd, fd_tree); if (close(fd_tree) < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); sane_kill(pid, SIGKILL); bail("error closing fd_tree"); } - idmap_src += strlen(idmap_src) + 1; - } + if (close(userns_fd) < 0) { + sane_kill(child_pid, SIGKILL); + free(child_stack); + sane_kill(pid, SIGKILL); + bail("error closing userns fd"); + } - if (close(userns_fd) < 0) { - sane_kill(pid, SIGKILL); - bail("error closing userns fd"); + sane_kill(child_pid, SIGKILL); + free(child_stack); + + idmap_src += strlen(idmap_src) + 1; + uidmap_src += uidmap_len + 1; + gidmap_src += gidmap_len + 1; } } @@ -985,8 +1120,7 @@ void nsexec(void) break; case SYNC_MOUNT_IDMAP_PLS: write_log(DEBUG, "stage-1 requested to open idmap sources"); - send_idmapsources(syncfd, stage1_pid, config.idmapsources, - config.idmapsources_len); + send_idmapsources(syncfd, stage1_pid, &config); s = SYNC_MOUNT_IDMAP_ACK; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { sane_kill(stage1_pid, SIGKILL); diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 7fb67d8eee5..2a4ace35ac3 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -415,6 +415,10 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { return nil, err } } + + config.UIDMappings = toConfigIDMap(spec.Linux.UIDMappings) + config.GIDMappings = toConfigIDMap(spec.Linux.GIDMappings) + config.MaskPaths = spec.Linux.MaskedPaths config.ReadonlyPaths = spec.Linux.ReadonlyPaths config.MountLabel = spec.Linux.MountLabel diff --git a/tests/integration/idmap.bats b/tests/integration/idmap-with-userns.bats similarity index 92% rename from tests/integration/idmap.bats rename to tests/integration/idmap-with-userns.bats index 375191bf705..9d135f832e0 100644 --- a/tests/integration/idmap.bats +++ b/tests/integration/idmap-with-userns.bats @@ -149,12 +149,12 @@ function teardown() { [[ "${output}" == *"invalid mount"* ]] } -@test "idmap mount with different mapping than userns fails" { - # Let's modify the containerID to be 1, instead of 0 as it is in the - # userns config. - update_config ' .mounts |= map((select(.source == "source-1/") | .uidMappings[0]["containerID"] = 1 ) // .)' +@test "idmap mount with different mapping than userns" { + update_config ' .linux.uidMappings[0].hostID = 99999' runc run test_debian - [ "$status" -eq 1 ] - [[ "${output}" == *"invalid mount"* ]] + [ "$status" -eq 0 ] + # We have 1 as UID as user ns first UID is 99999 and id map mount is 10000. + # We still have 0 as GID was did not modify it. + [[ "${output}" == *"=1=0="* ]] }