diff --git a/Cargo.lock b/Cargo.lock index 333002fdd..6c3bd6085 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2377,8 +2377,7 @@ dependencies = [ [[package]] name = "oci-spec" version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e423c4f827362c0d8d8da4b1f571270f389ebde73bcd3240a3d23c6d6f61d0f0" +source = "git+https://github.com/lengrongfu/oci-spec-rs?branch=feat/add_idmapped_mount#da50e3817ae5ac1022ee396cfa81c64d11208198" dependencies = [ "derive_builder 0.20.0", "getset", diff --git a/crates/libcgroups/Cargo.toml b/crates/libcgroups/Cargo.toml index 868eaf471..9d0ec9789 100644 --- a/crates/libcgroups/Cargo.toml +++ b/crates/libcgroups/Cargo.toml @@ -22,7 +22,8 @@ cgroupsv2_devices = ["rbpf", "libbpf-sys", "errno", "libc", "nix/dir"] [dependencies] nix = { version = "0.28.0", features = ["signal", "user", "fs"] } procfs = "0.16.0" -oci-spec = { version = "~0.6.4", features = ["runtime"] } +#oci-spec = { version = "~0.6.4", features = ["runtime"] } +oci-spec={git="https://github.com/lengrongfu/oci-spec-rs",branch = "feat/add_idmapped_mount",features = ["runtime"] } fixedbitset = "0.5.7" serde = { version = "1.0", features = ["derive"] } rbpf = { version = "0.2.0", optional = true } @@ -34,7 +35,8 @@ tracing = { version = "0.1.40", features = ["attributes"] } [dev-dependencies] anyhow = "1.0" -oci-spec = { version = "~0.6.4", features = ["proptests", "runtime"] } +#oci-spec = { version = "~0.6.4", features = ["proptests", "runtime"] } +oci-spec={git="https://github.com/lengrongfu/oci-spec-rs",branch = "feat/add_idmapped_mount",features = ["proptests", "runtime"]} quickcheck = "1" mockall = { version = "0.12.1", features = [] } clap = "4.1.6" diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index a2ea5f1cb..3234df294 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -39,7 +39,8 @@ nix = { version = "0.28.0", features = [ "term", "hostname", ] } -oci-spec = { version = "~0.6.4", features = ["runtime"] } +#oci-spec = { version = "~0.6.4", features = ["runtime"] } +oci-spec={git="https://github.com/lengrongfu/oci-spec-rs",branch = "feat/add_idmapped_mount",features = ["runtime"] } once_cell = "1.19.0" procfs = "0.16.0" prctl = "1.0.0" @@ -56,7 +57,8 @@ safe-path = "0.1.0" nc = "0.8.20" [dev-dependencies] -oci-spec = { version = "~0.6.4", features = ["proptests", "runtime"] } +#oci-spec = { version = "~0.6.4", features = ["proptests", "runtime"] } +oci-spec={git="https://github.com/lengrongfu/oci-spec-rs",branch = "feat/add_idmapped_mount",features = ["proptests", "runtime"]} quickcheck = "1" serial_test = "3.1.1" tempfile = "3" diff --git a/crates/libcontainer/src/process/channel.rs b/crates/libcontainer/src/process/channel.rs index 801b2cb4b..c076b1660 100644 --- a/crates/libcontainer/src/process/channel.rs +++ b/crates/libcontainer/src/process/channel.rs @@ -4,6 +4,7 @@ use nix::unistd::Pid; use crate::channel::{channel, Receiver, Sender}; use crate::process::message::Message; +use crate::rootfs::mount::{IdMountParam, IdMountSource}; #[derive(Debug, thiserror::Error)] pub enum ChannelError { @@ -57,6 +58,13 @@ impl MainSender { Ok(()) } + // process mount place + pub fn process_mount_place(&mut self, m: IdMountParam) -> Result<(), ChannelError> { + tracing::debug!("send process mapping place request"); + self.sender.send(Message::SendConfigureMount(m))?; + Ok(()) + } + pub fn seccomp_notify_request(&mut self, fd: RawFd) -> Result<(), ChannelError> { self.sender .send_fds(Message::SeccompNotify, &[fd.as_raw_fd()])?; @@ -186,6 +194,23 @@ impl MainReceiver { } } + pub fn wait_process_mount_place(&mut self) -> Result { + let msg = self + .receiver + .recv() + .map_err(|err| ChannelError::ReceiveError { + msg: "waiting for process mount place".to_string(), + source: err, + })?; + match msg { + Message::SendConfigureMount(m) => Ok(m), + msg => Err(ChannelError::UnexpectedMessage { + expected: Message::SendConfigureMount(Default::default()), + received: msg, + }) + } + } + pub fn close(&self) -> Result<(), ChannelError> { self.receiver.close()?; @@ -270,6 +295,12 @@ impl InitSender { Ok(()) } + pub fn send_mount_source(&mut self, ms: &IdMountSource) -> Result<(),ChannelError> { + tracing::debug!("send mount source request"); + self.sender.send(Message::ReceiveMountFd(ms.clone()))?; + Ok(()) + } + pub fn close(&self) -> Result<(), ChannelError> { self.sender.close()?; @@ -301,6 +332,22 @@ impl InitReceiver { } } + pub fn wait_for_mount_source(&mut self) -> Result { + let msg = self.receiver.recv().map_err(|err| ChannelError::ReceiveError { + msg: "waiting for get mount source".to_string(), + source: err, + })?; + match msg { + Message::ReceiveMountFd(ms) => Ok(ms), + msg => Err( + ChannelError::UnexpectedMessage { + expected: Message::ReceiveMountFd(IdMountSource { file: 0 }), + received: msg, + } + ) + } + } + pub fn close(&self) -> Result<(), ChannelError> { self.receiver.close()?; diff --git a/crates/libcontainer/src/process/container_init_process.rs b/crates/libcontainer/src/process/container_init_process.rs index 912cf3f9a..0673d5ed8 100644 --- a/crates/libcontainer/src/process/container_init_process.rs +++ b/crates/libcontainer/src/process/container_init_process.rs @@ -323,6 +323,10 @@ pub fn container_init_process( let in_user_ns = utils::is_in_new_userns().map_err(InitProcessError::Io)?; let bind_service = namespaces.get(LinuxNamespaceType::User)?.is_some() || in_user_ns; + let mut ns_path: Option = None; + if let Some(user_namespace) = namespaces.get(LinuxNamespaceType::User)? { + ns_path = Some(user_namespace.path().clone().unwrap()); + } let rootfs = RootFS::new(); rootfs .prepare_rootfs( @@ -330,6 +334,9 @@ pub fn container_init_process( rootfs_path, bind_service, namespaces.get(LinuxNamespaceType::Cgroup)?.is_some(), + ns_path, + main_sender, + init_receiver, ) .map_err(|err| { tracing::error!(?err, "failed to prepare rootfs"); diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index f6e81e3b4..e6600b34b 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -1,3 +1,6 @@ +use std::mem; +use std::fs::File; +use std::os::fd::{AsRawFd, RawFd}; use nix::sys::wait::{waitpid, WaitStatus}; use nix::unistd::Pid; @@ -5,7 +8,7 @@ use crate::process::args::ContainerArgs; use crate::process::fork::{self, CloneCb}; use crate::process::intel_rdt::setup_intel_rdt; use crate::process::{channel, container_intermediate_process}; -use crate::syscall::SyscallError; +use crate::syscall::{linux, SyscallError}; use crate::user_ns::UserNamespaceConfig; #[derive(Debug, thiserror::Error)] @@ -115,6 +118,44 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo // The intermediate process will send the init pid once it forks the init // process. The intermediate process should exit after this point. let init_pid = main_receiver.wait_for_intermediate_ready()?; + + loop { + let id_map = main_receiver.wait_process_mount_place().unwrap(); + if id_map.end { + break; + } + let mut flags = libc::OPEN_TREE_CLONE | libc::OPEN_TREE_CLOEXEC; + let id_map_flags = id_map.flags; + if id_map_flags&libc::MS_REC == libc::MS_REC { + flags |= libc::AT_RECURSIVE as std::os::raw::c_uint; + } + let mount_file = unsafe { + libc::syscall(libc::SYS_open_tree,libc::AT_FDCWD,id_map.source,flags) + }; + let mut userns_file: RawFd = -1; + if id_map.user_ns_path != "" { + let file = File::open(id_map.user_ns_path).unwrap(); + userns_file = file.as_raw_fd(); + } else { + // TODO we need use uid and git go get fd + } + + let mat = &linux::MountAttr{ + attr_set: linux::MOUNT_ATTR_IDMAP, + attr_clr: 0, + propagation: 0, + userns_fd: userns_file as u64, + }; + let mut set_attr_flags = libc::AT_EMPTY_PATH; + if id_map.recursive { + set_attr_flags |= libc::AT_RECURSIVE; + } + syscall.mount_setattr(mount_file as i32, "".as_ref(), set_attr_flags as u32, mat,mem::size_of::(),).map_err(|err| { + tracing::error!(?err, "failed to mount_setattr"); + ProcessError::SyscallOther(err) + }).unwrap(); + } + let mut need_to_clean_up_intel_rdt_subdirectory = false; if let Some(linux) = container_args.spec.linux() { diff --git a/crates/libcontainer/src/process/message.rs b/crates/libcontainer/src/process/message.rs index e74c9ce12..42dfb71aa 100644 --- a/crates/libcontainer/src/process/message.rs +++ b/crates/libcontainer/src/process/message.rs @@ -1,6 +1,7 @@ use core::fmt; use serde::{Deserialize, Serialize}; +use crate::rootfs::mount::{IdMountParam, IdMountSource}; /// Used as a wrapper for messages to be sent between child and parent processes #[derive(Debug, Serialize, Deserialize, Clone)] @@ -12,6 +13,8 @@ pub enum Message { SeccompNotify, SeccompNotifyDone, ExecFailed(String), + SendConfigureMount(IdMountParam), + ReceiveMountFd(IdMountSource), } impl fmt::Display for Message { @@ -24,6 +27,8 @@ impl fmt::Display for Message { Message::SeccompNotify => write!(f, "SeccompNotify"), Message::SeccompNotifyDone => write!(f, "SeccompNotifyDone"), Message::ExecFailed(s) => write!(f, "ExecFailed({})", s), + Message::SendConfigureMount(_) => write!(f,"SendConfigureMount"), + Message::ReceiveMountFd(_) => write!(f,"ReceiveMountFd"), } } } diff --git a/crates/libcontainer/src/rootfs/mount.rs b/crates/libcontainer/src/rootfs/mount.rs index 73b9b2a59..c1291b7c1 100644 --- a/crates/libcontainer/src/rootfs/mount.rs +++ b/crates/libcontainer/src/rootfs/mount.rs @@ -4,24 +4,27 @@ use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; #[cfg(feature = "v1")] use std::{borrow::Cow, collections::HashMap}; +use std::os::fd::RawFd; use libcgroups::common::CgroupSetup::{Hybrid, Legacy, Unified}; #[cfg(feature = "v1")] use libcgroups::common::DEFAULT_CGROUP_ROOT; use nix::dir::Dir; use nix::errno::Errno; -use nix::fcntl::OFlag; +use nix::fcntl::{OFlag}; use nix::mount::MsFlags; use nix::sys::stat::Mode; use nix::NixPath; use oci_spec::runtime::{Mount as SpecMount, MountBuilder as SpecMountBuilder}; use procfs::process::{MountInfo, MountOptFields, Process}; use safe_path; +use serde::{Deserialize, Serialize}; +use crate::process::channel; #[cfg(feature = "v1")] use super::symlink::Symlink; use super::symlink::SymlinkError; -use super::utils::{parse_mount, MountOptionConfig}; +use super::utils::{parse_mount, check_idmap_mounts, MountOptionConfig}; use crate::syscall::syscall::create_syscall; use crate::syscall::{linux, Syscall, SyscallError}; use crate::utils::PathBufExt; @@ -59,6 +62,35 @@ pub struct MountOptions<'a> { pub cgroup_ns: bool, } +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize,Default)] +pub struct IdMountParam { + /// Mount Flags. + pub flags: libc::c_ulong, + + #[serde(default, skip_serializing_if = "Option::is_none")] + /// RecAttr represents mount properties to be applied recursively. + pub rec_attr: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + /// Source specifies the source path of the mount. + pub source: Option, + + /// Recursive indicates if the mapping needs to be recursive. + pub recursive: bool, + /// UserNSPath is a path to a user namespace that indicates the necessary + /// id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and + /// GIDMappings must be set to nil. + pub user_ns_path: String, + + /// send flag to main process end mount search + pub end: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq,Deserialize,Serialize,Default)] +pub struct IdMountSource { + pub file: RawFd, +} + pub struct Mount { syscall: Box, } @@ -76,10 +108,38 @@ impl Mount { } } - pub fn setup_mount(&self, mount: &SpecMount, options: &MountOptions) -> Result<()> { + pub fn setup_mount(&self, mount: &SpecMount, options: &MountOptions, + ns_ptah: Option, + main_sender: Option<&mut channel::MainSender>, + init_receiver: Option<&mut channel::InitReceiver>) -> Result<()> { tracing::debug!("mounting {:?}", mount); - let mut mount_option_config = parse_mount(mount)?; + let mut mount_option_config = parse_mount(mount,ns_ptah)?; + // check idmap mount config + check_idmap_mounts(mount_option_config.clone())?; + + let mut ms_option: Option=None; + let mc = mount_option_config.clone(); + if self.is_id_mapped(mc.clone()).is_ok() && init_receiver.is_some() && main_sender.is_some(){ + let id_map_param = IdMountParam { + flags: mc.flags.bits(), + rec_attr: mc.rec_attr.clone(), + source: mount.source().clone(), + recursive: mc.id_mapping.clone().unwrap().recursive, + user_ns_path:mc.id_mapping.clone().unwrap().user_ns_path, + end:false, + }; + main_sender.unwrap().process_mount_place(id_map_param).map_err(|err| { + tracing::error!(?err, "failed to send mount to get host file"); + MountError::Custom("failed to send mount to get host file".to_string()) + })?; + let ms: IdMountSource = init_receiver.unwrap().wait_for_mount_source().map_err(|err| { + tracing::error!(?err, "failed to get mount source"); + MountError::Custom("failed to get mount source".to_string()) + })?; + ms_option = Some(ms); + } + // cgroup is not support idmap, proc、sysfs、mqueue、tmpfs、bind and other type support idmap match mount.typ().as_deref() { Some("cgroup") => { let cgroup_setup = libcgroups::common::get_cgroup_setup().map_err(|err| { @@ -116,22 +176,24 @@ impl Mount { options.root, &mount_option_config, options.label, + ms_option, ) - .map_err(|err| { - tracing::error!("failed to mount /dev: {}", err); - err - })?; + .map_err(|err| { + tracing::error!("failed to mount /dev: {}", err); + err + })?; } else { self.mount_into_container( mount, options.root, &mount_option_config, options.label, + ms_option, ) - .map_err(|err| { - tracing::error!("failed to mount {:?}: {}", mount, err); - err - })?; + .map_err(|err| { + tracing::error!("failed to mount {:?}: {}", mount, err); + err + })?; } } } @@ -159,7 +221,7 @@ impl Mount { err })?; - self.setup_mount(&tmpfs, options).map_err(|err| { + self.setup_mount(&tmpfs, options,None, None,None).map_err(|err| { tracing::error!("failed to mount tmpfs for cgroup: {}", err); err })?; @@ -286,6 +348,7 @@ impl Mount { flags: MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV, data: data.to_string(), rec_attr: None, + id_mapping: None, }; self.mount_into_container( @@ -293,11 +356,12 @@ impl Mount { options.root, &mount_options_config, options.label, + None, ) - .map_err(|err| { - tracing::error!("failed to mount {subsystem_mount:?}: {err}"); - err - }) + .map_err(|err| { + tracing::error!("failed to mount {subsystem_mount:?}: {err}"); + err + }) } #[cfg(feature = "v1")] @@ -352,7 +416,7 @@ impl Mount { .build()?; tracing::debug!("Mounting emulated cgroup subsystem: {:?}", emulated); - self.setup_mount(&emulated, options).map_err(|err| { + self.setup_mount(&emulated, options,None, None,None).map_err(|err| { tracing::error!("failed to mount {subsystem_name} cgroup hierarchy: {}", err); err })?; @@ -386,6 +450,7 @@ impl Mount { options.root, mount_option_config, options.label, + None, ) .is_err() { @@ -432,11 +497,12 @@ impl Mount { options.root, &mount_option_config, options.label, + None, ) - .map_err(|err| { - tracing::error!("failed to bind mount cgroup hierarchy: {}", err); - err - })?; + .map_err(|err| { + tracing::error!("failed to bind mount cgroup hierarchy: {}", err); + err + })?; } Ok(()) @@ -482,6 +548,7 @@ impl Mount { rootfs: &Path, mount_option_config: &MountOptionConfig, label: Option<&str>, + ms: Option, ) -> Result<()> { let typ = m.typ().as_deref(); let mut d = mount_option_config.data.to_string(); @@ -545,33 +612,43 @@ impl Mount { PathBuf::from(source) }; - if let Err(err) = - self.syscall - .mount(Some(&*src), dest, typ, mount_option_config.flags, Some(&*d)) - { - if let SyscallError::Nix(errno) = err { - if !matches!(errno, Errno::EINVAL) { - tracing::error!("mount of {:?} failed. {}", m.destination(), errno); - return Err(err.into()); + if self.is_id_mapped(mount_option_config.clone()).is_ok() && ms.is_some(){ + let joined_path = rootfs.join(m.destination()); + let path = joined_path.as_path(); + // let path = Path::join(rootfs, m.destination()).as_path(); + let fh = Dir::open(path, OFlag::O_PATH | OFlag::O_CLOEXEC, Mode::empty())?; + let proc_fd = PathBuf::from(format!("/proc/self/fd/{}", fh.as_raw_fd())); + let src_file_fd: RawFd = ms.unwrap().file; + self.syscall.move_mount(src_file_fd, "", libc::AT_FDCWD, proc_fd.to_str().unwrap(), (libc::MOVE_MOUNT_F_EMPTY_PATH | libc::MOVE_MOUNT_T_SYMLINKS) as i32).unwrap(); + return Ok(()); + }else { + if let Err(err) = + self.syscall + .mount(Some(&*src), dest, typ, mount_option_config.flags, Some(&*d)) + { + if let SyscallError::Nix(errno) = err { + if !matches!(errno, Errno::EINVAL) { + tracing::error!("mount of {:?} failed. {}", m.destination(), errno); + return Err(err.into()); + } } - } - self.syscall - .mount( - Some(&*src), - dest, - typ, - mount_option_config.flags, - Some(&mount_option_config.data), - ) - .map_err(|err| { - tracing::error!("failed to mount {src:?} to {dest:?}"); - err - })?; - } + self.syscall + .mount( + Some(&*src), + dest, + typ, + mount_option_config.flags, + Some(&mount_option_config.data), + ) + .map_err(|err| { + tracing::error!("failed to mount {src:?} to {dest:?}"); + err + })?; + } - if typ == Some("bind") - && mount_option_config.flags.intersects( + if typ == Some("bind") + && mount_option_config.flags.intersects( !(MsFlags::MS_REC | MsFlags::MS_REMOUNT | MsFlags::MS_BIND @@ -579,19 +656,20 @@ impl Mount { | MsFlags::MS_SHARED | MsFlags::MS_SLAVE), ) - { - self.syscall - .mount( - Some(dest), - dest, - None, - mount_option_config.flags | MsFlags::MS_REMOUNT, - None, - ) - .map_err(|err| { - tracing::error!("failed to remount {:?}: {}", dest, err); - err - })?; + { + self.syscall + .mount( + Some(dest), + dest, + None, + mount_option_config.flags | MsFlags::MS_REMOUNT, + None, + ) + .map_err(|err| { + tracing::error!("failed to remount {:?}: {}", dest, err); + err + })?; + } } if let Some(mount_attr) = &mount_option_config.rec_attr { @@ -608,6 +686,19 @@ impl Mount { Ok(()) } + + pub fn is_id_mapped(&self, m: MountOptionConfig) -> Result { + if m.id_mapping.is_none() { + Ok(false) + } else { + let id_mapping = m.id_mapping.clone().unwrap(); + if id_mapping.gid_mappings.is_none() || id_mapping.uid_mappings.is_none(){ + Ok(false) + }else { + Ok(true) + } + } + } } /// Find parent mount of rootfs in given mount infos @@ -654,14 +745,15 @@ mod tests { "gid=5".to_string(), ]) .build()?; - let mount_option_config = parse_mount(mount)?; + let mount_option_config = parse_mount(mount,None)?; assert!(m .mount_into_container( mount, tmp_dir.path(), &mount_option_config, - Some("defaults") + Some("defaults"), + None, ) .is_ok()); @@ -691,7 +783,7 @@ mod tests { .source(tmp_dir.path().join("null")) .options(vec!["ro".to_string()]) .build()?; - let mount_option_config = parse_mount(mount)?; + let mount_option_config = parse_mount(mount,None)?; OpenOptions::new() .create(true) .truncate(true) @@ -699,7 +791,7 @@ mod tests { .open(tmp_dir.path().join("null"))?; assert!(m - .mount_into_container(mount, tmp_dir.path(), &mount_option_config, None) + .mount_into_container(mount, tmp_dir.path(), &mount_option_config, None, None) .is_ok()); let want = vec![ @@ -944,7 +1036,7 @@ mod tests { } else { subsystem_name.to_string() } - .to_owned(), + .to_owned(), ), }; assert_eq!(expected, act); @@ -981,6 +1073,7 @@ mod tests { flags, data: String::new(), rec_attr: None, + id_mapping: None, }; mounter .mount_cgroup_v2(&spec_cgroup_mount, &mount_opts, &mount_option_config) diff --git a/crates/libcontainer/src/rootfs/rootfs.rs b/crates/libcontainer/src/rootfs/rootfs.rs index e74138333..8c3440f10 100644 --- a/crates/libcontainer/src/rootfs/rootfs.rs +++ b/crates/libcontainer/src/rootfs/rootfs.rs @@ -1,15 +1,16 @@ use std::collections::HashSet; -use std::path::Path; +use std::path::{Path, PathBuf}; use nix::mount::MsFlags; use oci_spec::runtime::{Linux, Spec}; use super::device::Device; -use super::mount::{Mount, MountOptions}; +use super::mount::{Mount, MountOptions, IdMountParam}; use super::symlink::Symlink; use super::utils::default_devices; use super::{Result, RootfsError}; use crate::error::MissingSpecError; +use crate::process::channel; use crate::syscall::syscall::create_syscall; use crate::syscall::Syscall; @@ -37,6 +38,9 @@ impl RootFS { rootfs: &Path, bind_devices: bool, cgroup_ns: bool, + ns_ptah: Option, + main_sender: &mut channel::MainSender, + init_receiver: &mut channel::InitReceiver, ) -> Result<()> { tracing::debug!(?rootfs, "prepare rootfs"); let mut flags = MsFlags::MS_REC; @@ -89,9 +93,12 @@ impl RootFS { if let Some(mounts) = spec.mounts() { for mount in mounts { - mounter.setup_mount(mount, &global_options)?; + mounter.setup_mount(mount, &global_options,ns_ptah.clone(),Some(main_sender),Some(init_receiver))?; } } + let mut id_map_param= IdMountParam::default(); + id_map_param.end = true; + main_sender.process_mount_place(id_map_param).unwrap(); let symlinker = Symlink::new(); symlinker.setup_kcore_symlink(rootfs)?; diff --git a/crates/libcontainer/src/rootfs/utils.rs b/crates/libcontainer/src/rootfs/utils.rs index 37bbb7e48..543bddc9f 100644 --- a/crates/libcontainer/src/rootfs/utils.rs +++ b/crates/libcontainer/src/rootfs/utils.rs @@ -3,7 +3,8 @@ use std::str::FromStr; use nix::mount::MsFlags; use nix::sys::stat::SFlag; -use oci_spec::runtime::{LinuxDevice, LinuxDeviceBuilder, LinuxDeviceType, Mount}; +use oci_spec::runtime::{LinuxDevice, LinuxDeviceBuilder, LinuxDeviceType, LinuxIdMapping, Mount}; +use crate::rootfs::mount::MountError::Custom; use super::mount::MountError; use crate::syscall::linux::{self, MountAttrOption}; @@ -18,6 +19,37 @@ pub struct MountOptionConfig { /// RecAttr represents mount properties to be applied recursively. pub rec_attr: Option, + + /// Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil, + /// the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings. + pub id_mapping: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct MountIDMapping { + /// Recursive indicates if the mapping needs to be recursive. + pub recursive: bool, + + /// UserNSPath is a path to a user namespace that indicates the necessary + /// id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and + /// GIDMappings must be set to nil. + pub user_ns_path: String, + + /// UIDMappings is the uid mapping set for this mount, to be used with + /// MOUNT_ATTR_IDMAP. + pub uid_mappings: Option>, + + /// GIDMappings is the gid mapping set for this mount, to be used with + /// MOUNT_ATTR_IDMAP. + pub gid_mappings: Option>, +} + +/// IDMap represents UID/GID Mappings for User Namespaces. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IDMap { + pub container_id: u32, + pub host_id: u32, + pub size: u32, } pub fn default_devices() -> Vec { @@ -82,11 +114,16 @@ pub fn to_sflag(dev_type: LinuxDeviceType) -> SFlag { } } -pub fn parse_mount(m: &Mount) -> std::result::Result { +pub fn parse_mount(m: &Mount,ns_ptah: Option) -> std::result::Result { let mut flags = MsFlags::empty(); let mut data = Vec::new(); let mut mount_attr: Option = None; - + let mut id_mapping: MountIDMapping = MountIDMapping { + recursive: false, + user_ns_path: "".to_string(), + uid_mappings: None, + gid_mappings: None, + }; if let Some(options) = &m.options() { for option in options { if let Ok(mount_attr_option) = linux::MountAttrOption::from_str(option.as_str()) { @@ -162,10 +199,7 @@ pub fn parse_mount(m: &Mount) -> std::result::Result Some((true, MsFlags::MS_RELATIME)), "strictatime" => Some((true, MsFlags::MS_STRICTATIME)), "nostrictatime" => Some((true, MsFlags::MS_STRICTATIME)), - unknown => { - if unknown == "idmap" || unknown == "ridmap" { - return Err(MountError::UnsupportedMountOption(unknown.to_string())); - } + _unknown => { None } } { @@ -177,16 +211,83 @@ pub fn parse_mount(m: &Mount) -> std::result::Result MountIDMapping { + recursive: false, + user_ns_path: "".to_string(), + uid_mappings: None, + gid_mappings: None, + }, + "ridmap" => MountIDMapping { + recursive: true, + user_ns_path: "".to_string(), + uid_mappings: None, + gid_mappings: None, + }, + _ => id_mapping, + }; + data.push(option.as_str()); } } + + if m.gid_mappings().is_some() || m.uid_mappings().is_some() { + id_mapping.uid_mappings = to_config_idmap(m.uid_mappings()); + id_mapping.gid_mappings = to_config_idmap(m.gid_mappings()); + } + if let Some(path) = ns_ptah { + id_mapping.user_ns_path = path.to_str().unwrap().to_string(); + } Ok(MountOptionConfig { flags, data: data.join(","), rec_attr: mount_attr, + id_mapping: Some(id_mapping), }) } + +pub fn to_config_idmap(ids: &Option>) -> Option> { + if ids.is_none() { + return None; + } + let mut idmaps = Vec::new(); + if let Some(ids_tmp) = ids { + for id in ids_tmp { + let idmap = IDMap { + container_id: id.container_id(), + host_id: id.host_id(), + size: id.size(), + }; + idmaps.push(idmap); + } + } + return Some(idmaps); +} + +pub fn check_idmap_mounts(mo_cfg: MountOptionConfig) -> Result<(), MountError> { + if mo_cfg.id_mapping.is_none() { + return Ok(()); + } + if let Some(rec) = mo_cfg.rec_attr { + if (rec.attr_set | rec.attr_clr) & linux::MOUNT_ATTR_IDMAP != 0 { + return Err(Custom("mount configuration cannot contain rec_attr for MOUNT_ATTR_IDMAP".to_string())); + } + } + if let Some(m) = mo_cfg.id_mapping { + if m.user_ns_path == "" { + if m.gid_mappings.is_none() || m.uid_mappings.is_none() { + return Err(Custom("id-mapped mounts must have both uid and gid mappings specified".to_string())); + } + } else { + if m.gid_mappings.is_some() || m.uid_mappings.is_some() { + return Err(Custom("[internal error] id-mapped mounts cannot have both userns_path and uid and gid mappings specified".to_string())); + } + } + } + Ok(()) +} + #[cfg(test)] mod tests { use anyhow::Result; @@ -215,12 +316,14 @@ mod tests { .typ("proc") .source(PathBuf::from("proc")) .build()?, + None, )?; assert_eq!( MountOptionConfig { flags: MsFlags::empty(), data: "".to_string(), rec_attr: None, + id_mapping: None, }, mount_option_config ); @@ -237,12 +340,14 @@ mod tests { "size=65536k".to_string(), ]) .build()?, + None, )?; assert_eq!( MountOptionConfig { flags: MsFlags::MS_NOSUID, data: "mode=755,size=65536k".to_string(), rec_attr: None, + id_mapping: None, }, mount_option_config ); @@ -262,12 +367,14 @@ mod tests { ]) .build() .unwrap(), + None, )?; assert_eq!( MountOptionConfig { flags: MsFlags::MS_NOSUID | MsFlags::MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=0620,gid=5".to_string(), - rec_attr: None + rec_attr: None, + id_mapping: None, }, mount_option_config ); @@ -285,12 +392,14 @@ mod tests { "size=65536k".to_string(), ]) .build()?, + None, )?; assert_eq!( MountOptionConfig { flags: MsFlags::MS_NOSUID | MsFlags::MS_NOEXEC | MsFlags::MS_NODEV, data: "mode=1777,size=65536k".to_string(), - rec_attr: None + rec_attr: None, + id_mapping: None, }, mount_option_config ); @@ -307,12 +416,14 @@ mod tests { ]) .build() .unwrap(), + None, )?; assert_eq!( MountOptionConfig { flags: MsFlags::MS_NOSUID | MsFlags::MS_NOEXEC | MsFlags::MS_NODEV, data: "".to_string(), - rec_attr: None + rec_attr: None, + id_mapping: None, }, mount_option_config ); @@ -329,6 +440,7 @@ mod tests { "ro".to_string(), ]) .build()?, + None, )?; assert_eq!( MountOptionConfig { @@ -338,6 +450,7 @@ mod tests { | MsFlags::MS_RDONLY, data: "".to_string(), rec_attr: None, + id_mapping: None, }, mount_option_config ); @@ -355,6 +468,7 @@ mod tests { "ro".to_string(), ]) .build()?, + None, )?; assert_eq!( MountOptionConfig { @@ -363,7 +477,8 @@ mod tests { | MsFlags::MS_NODEV | MsFlags::MS_RDONLY, data: "".to_string(), - rec_attr: None + rec_attr: None, + id_mapping: None, }, mount_option_config, ); @@ -407,6 +522,7 @@ mod tests { "nostrictatime".to_string(), ]) .build()?, + None, )?; assert_eq!( MountOptionConfig { @@ -421,6 +537,7 @@ mod tests { | MsFlags::MS_UNBINDABLE, data: "".to_string(), rec_attr: None, + id_mapping: None, }, mount_option_config ); @@ -449,12 +566,14 @@ mod tests { "rsymfollow".to_string(), ]) .build()?, + None, )?; assert_eq!( MountOptionConfig { flags: MsFlags::empty(), data: "".to_string(), - rec_attr: Some(MountAttr::all()) + rec_attr: Some(MountAttr::all()), + id_mapping: None, }, mount_option_config ); diff --git a/crates/libcontainer/src/syscall/linux.rs b/crates/libcontainer/src/syscall/linux.rs index d681ae12e..e9c1c6d40 100644 --- a/crates/libcontainer/src/syscall/linux.rs +++ b/crates/libcontainer/src/syscall/linux.rs @@ -19,6 +19,7 @@ use nix::sched::{unshare, CloneFlags}; use nix::sys::stat::{mknod, Mode, SFlag}; use nix::unistd::{chown, chroot, fchdir, pivot_root, sethostname, Gid, Uid}; use oci_spec::runtime::LinuxRlimit; +use serde::{Deserialize, Serialize}; use super::{Result, Syscall, SyscallError}; use crate::{capabilities, utils}; @@ -36,6 +37,7 @@ const MOUNT_ATTR_NOATIME: u64 = 0x00000010; const MOUNT_ATTR_STRICTATIME: u64 = 0x00000020; const MOUNT_ATTR_NODIRATIME: u64 = 0x00000080; const MOUNT_ATTR_NOSYMFOLLOW: u64 = 0x00200000; +pub const MOUNT_ATTR_IDMAP: u64 = 0x00100000; /// Constants used by mount_setattr(2). pub enum MountAttrOption { @@ -124,7 +126,7 @@ impl FromStr for MountAttrOption { } #[repr(C)] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize,Deserialize)] /// A structure used as te third argument of mount_setattr(2). pub struct MountAttr { /// Mount properties to set. @@ -480,6 +482,15 @@ impl Syscall for LinuxSyscall { Ok(()) } + fn move_mount(&self, from_dir_fd: i32, from_path_name: &str, to_dir_fd: i32, to_path_name: &str, flags: i32) -> Result<()> { + if unsafe { libc::syscall(libc::SYS_move_mount, from_dir_fd, from_path_name, to_dir_fd, to_path_name, flags, 0) } == -1 { + let err = nix::errno::Errno::last(); + tracing::error!(?err, "failed to move_mount"); + return Err(err.into()); + } + Ok(()) + } + fn symlink(&self, original: &Path, link: &Path) -> Result<()> { symlink(original, link)?; diff --git a/crates/libcontainer/src/syscall/syscall.rs b/crates/libcontainer/src/syscall/syscall.rs index 82669d5ee..0b8cd1b81 100644 --- a/crates/libcontainer/src/syscall/syscall.rs +++ b/crates/libcontainer/src/syscall/syscall.rs @@ -40,6 +40,8 @@ pub trait Syscall { flags: MsFlags, data: Option<&str>, ) -> Result<()>; + // fromDirfd int, fromPathName string, toDirfd int, toPathName string, flags int + fn move_mount(&self,from_dir_fd: i32, from_path_name: &str, to_dir_fd: i32, to_path_name: &str, flags: i32)-> Result<()>; fn symlink(&self, original: &Path, link: &Path) -> Result<()>; fn mknod(&self, path: &Path, kind: SFlag, perm: Mode, dev: u64) -> Result<()>; fn chown(&self, path: &Path, owner: Option, group: Option) -> Result<()>; diff --git a/crates/libcontainer/src/syscall/test.rs b/crates/libcontainer/src/syscall/test.rs index 395feb5eb..a6cb20fc5 100644 --- a/crates/libcontainer/src/syscall/test.rs +++ b/crates/libcontainer/src/syscall/test.rs @@ -259,6 +259,10 @@ impl Syscall for TestHelperSyscall { Box::new(IoPriorityArgs { class, priority }), ) } + + fn move_mount(&self, _: i32, _: &str, _: i32, _: &str, _: i32) -> Result<()> { + todo!() + } } impl TestHelperSyscall { diff --git a/tests/contest/contest/Cargo.toml b/tests/contest/contest/Cargo.toml index 81d90cca6..79879defe 100644 --- a/tests/contest/contest/Cargo.toml +++ b/tests/contest/contest/Cargo.toml @@ -11,7 +11,8 @@ libcgroups = { path = "../../../crates/libcgroups" } libcontainer = { path = "../../../crates/libcontainer" } nix = "0.28.0" num_cpus = "1.16" -oci-spec = { version = "0.6.4", features = ["runtime"] } +#oci-spec = { version = "0.6.4", features = ["runtime"] } +oci-spec={git="https://github.com/lengrongfu/oci-spec-rs",branch = "feat/add_idmapped_mount",features = ["runtime"] } once_cell = "1.19.0" pnet_datalink = "0.34.0" procfs = "0.16.0" diff --git a/tests/contest/runtimetest/Cargo.toml b/tests/contest/runtimetest/Cargo.toml index 44c9b9845..1bcabdaba 100644 --- a/tests/contest/runtimetest/Cargo.toml +++ b/tests/contest/runtimetest/Cargo.toml @@ -4,7 +4,8 @@ version = "0.0.1" edition = "2021" [dependencies] -oci-spec = { version = "0.6.4", features = ["runtime"] } +#oci-spec = { version = "0.6.4", features = ["runtime"] } +oci-spec={git="https://github.com/lengrongfu/oci-spec-rs",branch = "feat/add_idmapped_mount",features = ["runtime"]} nix = "0.28.0" anyhow = "1.0" libc = "0.2.155" # TODO (YJDoc2) upgrade to latest