From 149437588b6adf1548ec553a53d831e87f6092b9 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 15 Nov 2021 10:34:43 +0100 Subject: [PATCH] linux: new mount option "idmap" when the "idmap" mount option is specified, create the mount outside of the container user namespace context and pass the mount fd to the container init process. Signed-off-by: Giuseppe Scrivano --- crun.1 | 8 +++++ crun.1.md | 8 +++++ src/libcrun/linux.c | 80 ++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 91 insertions(+), 5 deletions(-) diff --git a/crun.1 b/crun.1 index bedccadf0c..23d391cb5b 100644 --- a/crun.1 +++ b/crun.1 @@ -638,6 +638,14 @@ itself. .PP If the \fB\fCrro\fR option is specified then the mount is made recursively read-only. +.SH rro mount options +.PP +If the \fB\fCrro\fR option is specified then the mount is made recursively read-only. + +.SH idmap mount options +.PP +If the \fB\fCidmap\fR option is specified then the mount is ID mapped using the container target user namespace. + .SH Automatically create user namespace .PP When running as user different than root, an user namespace is diff --git a/crun.1.md b/crun.1.md index 7f0dfe607c..061712e719 100644 --- a/crun.1.md +++ b/crun.1.md @@ -507,6 +507,14 @@ itself. If the `rro` option is specified then the mount is made recursively read-only. +## rro mount options + +If the `rro` option is specified then the mount is made recursively read-only. + +## idmap mount options + +If the `idmap` option is specified then the mount is ID mapped using the container target user namespace. + ## Automatically create user namespace When running as user different than root, an user namespace is diff --git a/src/libcrun/linux.c b/src/libcrun/linux.c index 1e9fdd24f3..60b5fa4129 100644 --- a/src/libcrun/linux.c +++ b/src/libcrun/linux.c @@ -291,6 +291,10 @@ struct mount_attr_s # define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ #endif +#ifndef MOUNT_ATTR_IDMAP +# define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#endif + static int syscall_mount_setattr (int dfd, const char *path, unsigned int flags, struct mount_attr_s *attr) @@ -359,6 +363,42 @@ make_mount_rro (const char *target, int targetfd, libcrun_error_t *err) return 0; } +static int +get_idmapped_mount (const char *src, pid_t pid, libcrun_error_t *err) +{ + cleanup_close int open_tree_fd = -1; + cleanup_close int fd = -1; + int ret; + char proc_path[64]; + struct mount_attr_s attr = { + 0, + }; + + sprintf (proc_path, "/proc/%d/ns/user", pid); + fd = open (proc_path, O_RDONLY); + if (UNLIKELY (fd < 0)) + return crun_make_error (err, errno, "open `%s`", proc_path); + + open_tree_fd = syscall_open_tree (-1, src, + AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); + if (UNLIKELY (open_tree_fd < 0)) + return crun_make_error (err, errno, "open `/%s`", src); + + attr.attr_set = MOUNT_ATTR_IDMAP; + attr.userns_fd = fd; + + ret = syscall_mount_setattr (open_tree_fd, "", AT_EMPTY_PATH, &attr); + if (UNLIKELY (ret < 0)) + return crun_make_error (err, errno, "mount_setattr `%s`", src); + + /* + ret = syscall_fsmount (open_tree_fd, FSMOUNT_CLOEXEC, 0); + if (UNLIKELY (ret < 0)) + return crun_make_error (err, errno, "fsmount `%s`", src); +*/ + return get_and_reset (&open_tree_fd); +} + int libcrun_create_keyring (const char *name, const char *label, libcrun_error_t *err) { @@ -432,8 +472,11 @@ enum { OPTION_TMPCOPYUP = (1 << 0), OPTION_RRO = (1 << 1), + OPTION_IDMAP = (1 << 2), }; +#define IDMAP "idmap" + static struct propagation_flags_s propagation_flags[] = { { "defaults", 0, 0, 0 }, { "bind", 0, MS_BIND, 0 }, { "rbind", 0, MS_REC | MS_BIND, 0 }, @@ -470,6 +513,7 @@ static struct propagation_flags_s propagation_flags[] = { { "defaults", 0, 0, 0 { "tmpcopyup", 0, 0, OPTION_TMPCOPYUP }, { "rro", 0, 0, OPTION_RRO }, + { IDMAP, 0, 0, OPTION_IDMAP }, { NULL, 0, 0, 0 } }; @@ -1658,7 +1702,8 @@ do_mounts (libcrun_container_t *container, int rootfsfd, const char *rootfs, con ret = fs_move_mount_to (mfd, targetfd, NULL); if (LIKELY (ret == 0)) { - ret = do_mount (container, NULL, mfd, target, NULL, flags, data, LABEL_NONE, err); + /* Force no MS_BIND flag to not attempt again the bind mount. */ + ret = do_mount (container, NULL, mfd, target, NULL, flags & ~MS_BIND, data, LABEL_NONE, err); if (UNLIKELY (ret < 0)) return ret; mounted = true; @@ -1711,7 +1756,7 @@ do_mounts (libcrun_container_t *container, int rootfsfd, const char *rootfs, con return ret; } - if (extra_flags & OPTION_RRO) + if (extra_flags & (OPTION_RRO | OPTION_IDMAP)) { cleanup_close int dfd = -1; @@ -1719,9 +1764,12 @@ do_mounts (libcrun_container_t *container, int rootfsfd, const char *rootfs, con if (UNLIKELY (dfd < 0)) return crun_make_error (err, errno, "open mount target `/%s`", target); - ret = make_mount_rro (target, dfd, err); - if (UNLIKELY (ret < 0)) - return ret; + if (extra_flags & OPTION_RRO) + { + ret = make_mount_rro (target, dfd, err); + if (UNLIKELY (ret < 0)) + return ret; + } } } return 0; @@ -3257,6 +3305,17 @@ get_fd_map (libcrun_container_t *container) return mount_fds; } +static bool +is_idmapped (runtime_spec_schema_defs_mount *mnt) +{ + size_t i; + + for (i = 0; i < mnt->options_len; i++) + if (strcmp (mnt->options[i], IDMAP) == 0) + return true; + return false; +} + static int prepare_and_send_mounts (libcrun_container_t *container, struct init_status_s *init_status, pid_t pid, int sync_socket_host, libcrun_error_t *err) @@ -3274,6 +3333,17 @@ prepare_and_send_mounts (libcrun_container_t *container, struct init_status_s *i for (i = 0; i < def->mounts_len; i++) { + if (is_idmapped (def->mounts[i])) + { + int fd; + + fd = get_idmapped_mount (def->mounts[i]->source, pid, err); + if (UNLIKELY (fd < 0)) + return fd; + + mount_fds->fds[i] = fd; + } + if (mount_fds->fds[i] >= 0) how_many++; }