Skip to content

Commit

Permalink
Merge pull request #297 from utam0k/refactoring/init-1st
Browse files Browse the repository at this point in the history
sipliy split init.rs into several files.
  • Loading branch information
utam0k authored Sep 13, 2021
2 parents 8923a98 + 47f003c commit 9844d84
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 129 deletions.
12 changes: 8 additions & 4 deletions src/container/builder_impl.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
hooks,
notify_socket::NotifyListener,
process::{channel, fork, init},
process::{args::ContainerArgs, channel, fork, intermediate},
rootless::{self, Rootless},
syscall::linux::LinuxSyscall,
utils,
Expand Down Expand Up @@ -97,10 +97,10 @@ impl<'a> ContainerBuilderImpl<'a> {
prctl::set_dumpable(false).unwrap();
}

// This init_args will be passed to the container init process,
// This intermediate_args will be passed to the container intermediate process,
// therefore we will have to move all the variable by value. Since self
// is a shared reference, we have to clone these variables here.
let init_args = init::ContainerInitArgs {
let intermediate_args = ContainerArgs {
init: self.init,
syscall: self.syscall.clone(),
spec: self.spec.clone(),
Expand All @@ -121,7 +121,11 @@ impl<'a> ContainerBuilderImpl<'a> {
.close()
.context("Failed to close unused receiver")?;

init::container_intermediate(init_args, receiver_from_main, sender_to_main)
intermediate::container_intermediate(
intermediate_args,
receiver_from_main,
sender_to_main,
)
})?;
// Close down unused fds. The corresponding fds are duplicated to the
// child process during fork.
Expand Down
27 changes: 27 additions & 0 deletions src/process/args.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use oci_spec::runtime::Spec;
use std::os::unix::prelude::RawFd;
use std::path::PathBuf;

use crate::rootless::Rootless;
use crate::{container::Container, notify_socket::NotifyListener, syscall::linux::LinuxSyscall};

pub struct ContainerArgs<'a> {
/// Flag indicating if an init or a tenant container should be created
pub init: bool,
/// Interface to operating system primitives
pub syscall: LinuxSyscall,
/// OCI complient runtime spec
pub spec: Spec,
/// Root filesystem of the container
pub rootfs: PathBuf,
/// Socket to communicate the file descriptor of the ptty
pub console_socket: Option<RawFd>,
/// The Unix Domain Socket to communicate container start
pub notify_socket: NotifyListener,
/// File descriptos preserved/passed to the container init process.
pub preserve_fds: i32,
/// Container state
pub container: Option<Container>,
/// Options for rootless containers
pub rootless: Option<Rootless<'a>>,
}
132 changes: 8 additions & 124 deletions src/process/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,20 @@ use nix::{
sys::statfs,
unistd::{self, Gid, Uid},
};
use oci_spec::runtime::{LinuxNamespaceType, Spec, User};
use oci_spec::runtime::{LinuxNamespaceType, User};
use std::collections::HashMap;
use std::{
env,
os::unix::{io::AsRawFd, prelude::RawFd},
};
use std::{env, os::unix::io::AsRawFd};
use std::{fs, path::Path, path::PathBuf};

use crate::rootless::Rootless;
use crate::{
capabilities,
container::Container,
hooks,
namespaces::Namespaces,
notify_socket::NotifyListener,
process::channel,
process::fork,
rootfs,
syscall::{linux::LinuxSyscall, Syscall},
tty, utils,
capabilities, hooks, namespaces::Namespaces, process::channel, rootfs, syscall::Syscall, tty,
utils,
};

use super::args::ContainerArgs;


// Make sure a given path is on procfs. This is to avoid the security risk that
// /proc path is mounted over. Ref: CVE-2019-16884
fn ensure_procfs(path: &Path) -> Result<()> {
Expand Down Expand Up @@ -181,116 +173,8 @@ fn masked_path(path: &str, mount_label: &Option<String>) -> Result<()> {
Ok(())
}

pub struct ContainerInitArgs<'a> {
/// Flag indicating if an init or a tenant container should be created
pub init: bool,
/// Interface to operating system primitives
pub syscall: LinuxSyscall,
/// OCI complient runtime spec
pub spec: Spec,
/// Root filesystem of the container
pub rootfs: PathBuf,
/// Socket to communicate the file descriptor of the ptty
pub console_socket: Option<RawFd>,
/// The Unix Domain Socket to communicate container start
pub notify_socket: NotifyListener,
/// File descriptos preserved/passed to the container init process.
pub preserve_fds: i32,
/// Container state
pub container: Option<Container>,
/// Options for rootless containers
pub rootless: Option<Rootless<'a>>,
}

pub fn container_intermediate(
args: ContainerInitArgs,
receiver_from_main: &mut channel::ReceiverFromMain,
sender_to_main: &mut channel::SenderIntermediateToMain,
) -> Result<()> {
let command = &args.syscall;
let spec = &args.spec;
let linux = spec.linux.as_ref().context("no linux in spec")?;
let namespaces = Namespaces::from(linux.namespaces.as_ref());

// if new user is specified in specification, this will be true and new
// namespace will be created, check
// https://man7.org/linux/man-pages/man7/user_namespaces.7.html for more
// information
if let Some(user_namespace) = namespaces.get(LinuxNamespaceType::User) {
namespaces
.unshare_or_setns(user_namespace)
.with_context(|| format!("Failed to enter pid namespace: {:?}", user_namespace))?;
if user_namespace.path.is_none() {
log::debug!("creating new user namespace");
// child needs to be dumpable, otherwise the non root parent is not
// allowed to write the uid/gid maps
prctl::set_dumpable(true).unwrap();
sender_to_main.identifier_mapping_request()?;
receiver_from_main.wait_for_mapping_ack()?;
prctl::set_dumpable(false).unwrap();
}

// After UID and GID mapping is configured correctly in the Youki main
// process, We want to make sure continue as the root user inside the
// new user namespace. This is required because the process of
// configuring the container process will require root, even though the
// root in the user namespace likely is mapped to an non-priviliged user
// on the parent user namespace.
command.set_id(Uid::from_raw(0), Gid::from_raw(0)).context(
"Failed to configure uid and gid root in the beginning of a new user namespace",
)?;
}

// set limits and namespaces to the process
let proc = spec.process.as_ref().context("no process in spec")?;
if let Some(rlimits) = proc.rlimits.as_ref() {
for rlimit in rlimits.iter() {
command.set_rlimit(rlimit).context("failed to set rlimit")?;
}
}

// Pid namespace requires an extra fork to enter, so we enter pid namespace now.
if let Some(pid_namespace) = namespaces.get(LinuxNamespaceType::Pid) {
namespaces
.unshare_or_setns(pid_namespace)
.with_context(|| format!("Failed to enter pid namespace: {:?}", pid_namespace))?;
}

// We only need for init process to send us the ChildReady.
let (sender_to_intermediate, receiver_from_init) = &mut channel::init_to_intermediate()?;

// We resued the args passed in, but replace with a new set of channels.
let init_args = ContainerInitArgs { ..args };
// We have to record the pid of the child (container init process), since
// the child will be inside the pid namespace. We can't rely on child_ready
// to send us the correct pid.
let pid = fork::container_fork(|| {
// First thing in the child process to close the unused fds in the channel/pipe.
receiver_from_init
.close()
.context("Failed to close receiver in init process")?;
container_init(init_args, sender_to_intermediate)
})?;
// Close unused fds in the parent process.
sender_to_intermediate
.close()
.context("Failed to close sender in the intermediate process")?;
// There is no point using the pid returned here, since the child will be
// inside the pid namespace already.
receiver_from_init
.wait_for_init_ready()
.context("Failed to wait for the child")?;
// After the child (the container init process) becomes ready, we can signal
// the parent (the main process) that we are ready.
sender_to_main
.intermediate_ready(pid)
.context("Failed to send child ready from intermediate process")?;

Ok(())
}

pub fn container_init(
args: ContainerInitArgs,
args: ContainerArgs,
sender_to_intermediate: &mut channel::SenderInitToIntermediate,
) -> Result<()> {
let command = &args.syscall;
Expand Down
92 changes: 92 additions & 0 deletions src/process/intermediate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use crate::{namespaces::Namespaces, process::channel, process::fork, syscall::Syscall};
use anyhow::{Context, Result};
use nix::unistd::{Gid, Uid};
use oci_spec::runtime::LinuxNamespaceType;

use super::args::ContainerArgs;
use super::init::container_init;

pub fn container_intermediate(
args: ContainerArgs,
receiver_from_main: &mut channel::ReceiverFromMain,
sender_to_main: &mut channel::SenderIntermediateToMain,
) -> Result<()> {
let command = &args.syscall;
let spec = &args.spec;
let linux = spec.linux.as_ref().context("no linux in spec")?;
let namespaces = Namespaces::from(linux.namespaces.as_ref());

// if new user is specified in specification, this will be true and new
// namespace will be created, check
// https://man7.org/linux/man-pages/man7/user_namespaces.7.html for more
// information
if let Some(user_namespace) = namespaces.get(LinuxNamespaceType::User) {
namespaces
.unshare_or_setns(user_namespace)
.with_context(|| format!("Failed to enter pid namespace: {:?}", user_namespace))?;
if user_namespace.path.is_none() {
log::debug!("creating new user namespace");
// child needs to be dumpable, otherwise the non root parent is not
// allowed to write the uid/gid maps
prctl::set_dumpable(true).unwrap();
sender_to_main.identifier_mapping_request()?;
receiver_from_main.wait_for_mapping_ack()?;
prctl::set_dumpable(false).unwrap();
}

// After UID and GID mapping is configured correctly in the Youki main
// process, We want to make sure continue as the root user inside the
// new user namespace. This is required because the process of
// configuring the container process will require root, even though the
// root in the user namespace likely is mapped to an non-priviliged user
// on the parent user namespace.
command.set_id(Uid::from_raw(0), Gid::from_raw(0)).context(
"Failed to configure uid and gid root in the beginning of a new user namespace",
)?;
}

// set limits and namespaces to the process
let proc = spec.process.as_ref().context("no process in spec")?;
if let Some(rlimits) = proc.rlimits.as_ref() {
for rlimit in rlimits.iter() {
command.set_rlimit(rlimit).context("failed to set rlimit")?;
}
}

// Pid namespace requires an extra fork to enter, so we enter pid namespace now.
if let Some(pid_namespace) = namespaces.get(LinuxNamespaceType::Pid) {
namespaces
.unshare_or_setns(pid_namespace)
.with_context(|| format!("Failed to enter pid namespace: {:?}", pid_namespace))?;
}

// We only need for init process to send us the ChildReady.
let (sender_to_intermediate, receiver_from_init) = &mut channel::init_to_intermediate()?;

// We have to record the pid of the child (container init process), since
// the child will be inside the pid namespace. We can't rely on child_ready
// to send us the correct pid.
let pid = fork::container_fork(|| {
// First thing in the child process to close the unused fds in the channel/pipe.
receiver_from_init
.close()
.context("Failed to close receiver in init process")?;
container_init(args, sender_to_intermediate)
})?;
// Close unused fds in the parent process.
sender_to_intermediate
.close()
.context("Failed to close sender in the intermediate process")?;
// There is no point using the pid returned here, since the child will be
// inside the pid namespace already.
receiver_from_init
.wait_for_init_ready()
.context("Failed to wait for the child")?;
// After the child (the container init process) becomes ready, we can signal
// the parent (the main process) that we are ready.
sender_to_main
.intermediate_ready(pid)
.context("Failed to send child ready from intermediate process")?;

Ok(())
}
4 changes: 3 additions & 1 deletion src/process/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//! Provides a thin wrapper around fork syscall,
//! with enums and functions specific to youki implemented

pub mod args;
pub mod channel;
pub mod fork;
pub mod init;
pub(crate) mod init;
pub mod intermediate;
pub mod message;

0 comments on commit 9844d84

Please sign in to comment.