Skip to content

Commit

Permalink
Fix rootless determination logic, to accomodate podman rootless behva…
Browse files Browse the repository at this point in the history
…iour
  • Loading branch information
YJDoc2 committed Aug 9, 2023
1 parent 666e18b commit 4b8ec32
Show file tree
Hide file tree
Showing 15 changed files with 120 additions and 28 deletions.
18 changes: 12 additions & 6 deletions crates/libcgroups/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,11 +316,12 @@ pub enum CreateCgroupSetupError {
Systemd(#[from] systemd::manager::SystemdManagerError),
}

#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct CgroupConfig {
pub cgroup_path: PathBuf,
pub systemd_cgroup: bool,
pub container_name: String,
pub use_system_bus: bool,
}

pub fn create_cgroup_manager(
Expand All @@ -342,7 +343,12 @@ pub fn create_cgroup_manager(
if cgroup_path.is_absolute() || !config.systemd_cgroup {
return Ok(create_v2_cgroup_manager(cgroup_path)?.any());
}
Ok(create_systemd_cgroup_manager(cgroup_path, config.container_name.as_str())?.any())
Ok(create_systemd_cgroup_manager(
cgroup_path,
config.container_name.as_str(),
config.use_system_bus,
)?
.any())
}
}
}
Expand Down Expand Up @@ -381,31 +387,31 @@ fn create_v2_cgroup_manager(
fn create_systemd_cgroup_manager(
cgroup_path: &Path,
container_name: &str,
use_system_bus: bool,
) -> Result<systemd::manager::Manager, systemd::manager::SystemdManagerError> {
if !systemd::booted() {
panic!(
"systemd cgroup flag passed, but systemd support for managing cgroups is not available"
);
}

let use_system = nix::unistd::geteuid().is_root();

tracing::info!(
"systemd cgroup manager with system bus {} will be used",
use_system
use_system_bus
);
systemd::manager::Manager::new(
DEFAULT_CGROUP_ROOT.into(),
cgroup_path.to_owned(),
container_name.into(),
use_system,
use_system_bus,
)
}

#[cfg(not(feature = "systemd"))]
fn create_systemd_cgroup_manager(
_cgroup_path: &Path,
_container_name: &str,
_use_system_bus: bool,
) -> Result<systemd::manager::Manager, systemd::manager::SystemdManagerError> {
Err(systemd::manager::SystemdManagerError::NotEnabled)
}
Expand Down
2 changes: 2 additions & 0 deletions crates/libcontainer/src/container/builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ impl ContainerBuilderImpl {
cgroup_path: cgroups_path,
systemd_cgroup: self.use_systemd || self.rootless.is_some(),
container_name: self.container_id.to_owned(),
use_system_bus: self.rootless.is_none(),
};
let process = self
.spec
Expand Down Expand Up @@ -191,6 +192,7 @@ impl ContainerBuilderImpl {
cgroup_path: cgroups_path,
systemd_cgroup: self.use_systemd || self.rootless.is_some(),
container_name: self.container_id.to_string(),
use_system_bus: self.rootless.is_none(),
})?;

let mut errors = Vec::new();
Expand Down
9 changes: 9 additions & 0 deletions crates/libcontainer/src/container/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,20 @@ impl Container {
self.state.use_systemd
}

pub fn is_rootless(&self) -> bool {
self.state.is_rootless
}

pub fn set_systemd(&mut self, should_use: bool) -> &mut Self {
self.state.use_systemd = should_use;
self
}

pub fn set_rootless(&mut self, is_rootless: bool) -> &mut Self {
self.state.is_rootless = is_rootless;
self
}

pub fn set_clean_up_intel_rdt_directory(&mut self, clean_up: bool) -> &mut Self {
self.state.clean_up_intel_rdt_subdirectory = Some(clean_up);
self
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/container_delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ impl Container {
cgroup_path: config.cgroup_path.to_owned(),
systemd_cgroup: self.systemd(),
container_name: self.id().to_string(),
use_system_bus: !self.is_rootless(),
},
)?;
cmanager.remove().map_err(|err| {
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/container_events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ impl Container {
cgroup_path: self.spec()?.cgroup_path,
systemd_cgroup: self.systemd(),
container_name: self.id().to_string(),
use_system_bus: !self.is_rootless(),
})?;
match stats {
true => {
Expand Down
2 changes: 2 additions & 0 deletions crates/libcontainer/src/container/container_kill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ impl Container {
cgroup_path: self.spec()?.cgroup_path,
systemd_cgroup: self.systemd(),
container_name: self.id().to_string(),
use_system_bus: !self.is_rootless(),
},
)?;
cmanager.freeze(libcgroups::common::FreezerState::Thawed)?;
Expand All @@ -100,6 +101,7 @@ impl Container {
cgroup_path: self.spec()?.cgroup_path,
systemd_cgroup: self.systemd(),
container_name: self.id().to_string(),
use_system_bus: !self.is_rootless(),
})?;

if let Err(e) = cmanager.freeze(libcgroups::common::FreezerState::Frozen) {
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/container_pause.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ impl Container {
cgroup_path: self.spec()?.cgroup_path,
systemd_cgroup: self.systemd(),
container_name: self.id().to_string(),
use_system_bus: self.is_rootless(),
})?;
cmanager.freeze(FreezerState::Frozen)?;

Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/container_resume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ impl Container {
cgroup_path: self.spec()?.cgroup_path,
systemd_cgroup: self.systemd(),
container_name: self.id().to_string(),
use_system_bus: self.is_rootless(),
})?;
// resume the frozen container
cmanager.freeze(FreezerState::Thawed)?;
Expand Down
5 changes: 4 additions & 1 deletion crates/libcontainer/src/container/init_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,17 @@ impl InitContainerBuilder {
} else {
None
};

let rootless = Rootless::new(&spec)?;
let config = YoukiConfig::from_spec(&spec, container.id(), rootless.is_some())?;
config.save(&container_dir).map_err(|err| {
tracing::error!(?container_dir, "failed to save config: {}", err);
err
})?;

container.set_rootless(rootless.is_some());

container.save()?;

let mut builder_impl = ContainerBuilderImpl {
container_type: ContainerType::InitContainer,
syscall: self.base.syscall,
Expand Down
3 changes: 3 additions & 0 deletions crates/libcontainer/src/container/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ pub struct State {
pub use_systemd: bool,
// Specifies if the Intel RDT subdirectory needs be cleaned up.
pub clean_up_intel_rdt_subdirectory: Option<bool>,
// specifies if the container was rootless or not
pub is_rootless: bool,
}

impl State {
Expand All @@ -143,6 +145,7 @@ impl State {
creator: None,
use_systemd: false,
clean_up_intel_rdt_subdirectory: None,
is_rootless: false,
}
}

Expand Down
3 changes: 2 additions & 1 deletion crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ pub fn container_init_process(
})?;
}

let bind_service = namespaces.get(LinuxNamespaceType::User)?.is_some();
let bind_service =
namespaces.get(LinuxNamespaceType::User)?.is_some() || utils::is_in_new_userns();
let rootfs = RootFS::new();
rootfs
.prepare_rootfs(
Expand Down
9 changes: 6 additions & 3 deletions crates/libcontainer/src/process/container_main_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::{
intel_rdt::setup_intel_rdt,
},
rootless::Rootless,
utils,
};
use nix::sys::wait::{waitpid, WaitStatus};
use nix::unistd::Pid;
Expand Down Expand Up @@ -91,9 +92,11 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo
// the main process to set up uid and gid mapping, once the intermediate
// process enters into a new user namespace.
if let Some(rootless) = &container_args.rootless {
main_receiver.wait_for_mapping_request()?;
setup_mapping(rootless, intermediate_pid)?;
inter_sender.mapping_written()?;
if !utils::is_in_new_userns() {
main_receiver.wait_for_mapping_request()?;
setup_mapping(rootless, intermediate_pid)?;
inter_sender.mapping_written()?;
}
}

// At this point, we don't need to send any message to intermediate process anymore,
Expand Down
85 changes: 68 additions & 17 deletions crates/libcontainer/src/rootless.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use crate::error::MissingSpecError;
use crate::namespaces::{NamespaceError, Namespaces};
use crate::utils;
use nix::unistd::Pid;
use oci_spec::runtime::{Linux, LinuxIdMapping, LinuxNamespace, LinuxNamespaceType, Mount, Spec};
use oci_spec::runtime::{
Linux, LinuxIdMapping, LinuxIdMappingBuilder, LinuxNamespace, LinuxNamespaceType, Mount, Spec,
};
use std::fs;
use std::path::Path;
use std::process::Command;
Expand Down Expand Up @@ -150,11 +153,13 @@ impl Rootless {

// If conditions requires us to use rootless, we must either create a new
// user namespace or enter an existing.
if rootless_required() && user_namespace.is_none() {
if rootless_required() && user_namespace.is_none() && !utils::is_in_new_userns() {
return Err(RootlessError::NoUserNamespace);
}

if user_namespace.is_some() && user_namespace.unwrap().path().is_none() {
if (user_namespace.is_some() && user_namespace.unwrap().path().is_none())
|| utils::is_in_new_userns()
{
tracing::debug!("rootless container should be created");

validate_spec_for_rootless(spec).map_err(|err| {
Expand Down Expand Up @@ -220,7 +225,7 @@ impl TryFrom<&Linux> for Rootless {
uid_mappings: linux.uid_mappings().to_owned(),
gid_mappings: linux.gid_mappings().to_owned(),
user_namespace: user_namespace.cloned(),
privileged: nix::unistd::geteuid().is_root(),
privileged: !rootless_required(),
rootless_id_mapper: RootlessIDMapper::new(),
})
}
Expand All @@ -232,6 +237,10 @@ pub fn rootless_required() -> bool {
return true;
}

if utils::is_in_new_userns() {
return true;
}

matches!(std::env::var("YOUKI_USE_ROOTLESS").as_deref(), Ok("true"))
}

Expand All @@ -255,24 +264,66 @@ pub fn unprivileged_user_ns_enabled() -> Result<bool> {
}
}

fn parse_mapping_entry(entry: &str) -> (u32, u32, u32) {
let components: Vec<_> = entry.split_ascii_whitespace().collect();
if components.len() != 3 {
panic!("invalid mapping entry {}", entry);
}
(
components[0].parse().unwrap(),
components[1].parse().unwrap(),
components[2].parse().unwrap(),
)
}

fn get_id_mapping(path: &str) -> Result<Vec<LinuxIdMapping>> {
let uid_map_str = std::fs::read_to_string(path)
.unwrap_or_else(|_| panic!("failed to read {path} for rootless container mapping entries"));
let mut entries = Vec::new();
for entry in uid_map_str.lines() {
let (container, host, size) = parse_mapping_entry(entry);
entries.push(
LinuxIdMappingBuilder::default()
.container_id(container)
.host_id(host)
.size(size)
.build()
.unwrap(),
);
}
Ok(entries)
}

/// Validates that the spec contains the required information for
/// running in rootless mode
fn validate_spec_for_rootless(spec: &Spec) -> std::result::Result<(), ValidateSpecError> {
tracing::debug!(?spec, "validating spec for rootless container");
let linux = spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
let namespaces = Namespaces::try_from(linux.namespaces().as_ref())?;
if namespaces.get(LinuxNamespaceType::User)?.is_none() {
if namespaces.get(LinuxNamespaceType::User)?.is_none() && !utils::is_in_new_userns() {
return Err(ValidateSpecError::NoUserNamespace);
}

let gid_mappings = linux
.gid_mappings()
.as_ref()
.ok_or(ValidateSpecError::NoGIDMapping)?;
let uid_mappings = linux
.uid_mappings()
.as_ref()
.ok_or(ValidateSpecError::NoUIDMappings)?;
let (uid_mappings, gid_mappings) = if !utils::is_in_new_userns() {
// if we are not already in a user namespace, then the mappings must be present
(
linux
.uid_mappings()
.as_ref()
.ok_or(ValidateSpecError::NoUIDMappings)?
.to_owned(),
linux
.gid_mappings()
.as_ref()
.ok_or(ValidateSpecError::NoGIDMapping)?
.to_owned(),
)
} else {
(
get_id_mapping("/proc/self/uid_map").unwrap(),
get_id_mapping("/proc/self/gid_map").unwrap(),
)
};

if uid_mappings.is_empty() {
return Err(ValidateSpecError::NoUIDMappings);
Expand All @@ -285,21 +336,21 @@ fn validate_spec_for_rootless(spec: &Spec) -> std::result::Result<(), ValidateSp
spec.mounts()
.as_ref()
.ok_or(ValidateSpecError::NoMountSpec)?,
uid_mappings,
gid_mappings,
&uid_mappings,
&gid_mappings,
)?;

if let Some(additional_gids) = spec
.process()
.as_ref()
.and_then(|process| process.user().additional_gids().as_ref())
{
let privileged = nix::unistd::geteuid().is_root();
let privileged = !rootless_required();

match (privileged, additional_gids.is_empty()) {
(true, false) => {
for gid in additional_gids {
if !is_id_mapped(*gid, gid_mappings) {
if !is_id_mapped(*gid, &gid_mappings) {
tracing::error!(?gid,"gid is specified as supplementary group, but is not mapped in the user namespace");
return Err(ValidateSpecError::GidNotMapped(*gid));
}
Expand Down
7 changes: 7 additions & 0 deletions crates/libcontainer/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ pub fn get_user_home(uid: u32) -> Option<PathBuf> {
}
}

pub fn is_in_new_userns() -> bool {
let uid_map_path = "/proc/self/uid_map";
let content = std::fs::read_to_string(uid_map_path)
.unwrap_or_else(|_| panic!("failed to read {}", uid_map_path));
!content.contains("4294967295")
}

/// If None, it will generate a default path for cgroups.
pub fn get_cgroup_path(
cgroups_path: &Option<PathBuf>,
Expand Down
1 change: 1 addition & 0 deletions crates/youki/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ fn create_cgroup_manager<P: AsRef<Path>>(
cgroup_path: container.spec()?.cgroup_path,
systemd_cgroup: container.systemd(),
container_name: container.id().to_string(),
use_system_bus: !container.is_rootless(),
},
)?)
}

0 comments on commit 4b8ec32

Please sign in to comment.