diff --git a/docs/doc-draft.md b/docs/doc-draft.md index 7103936c3..4abe5b428 100644 --- a/docs/doc-draft.md +++ b/docs/doc-draft.md @@ -76,5 +76,19 @@ The main youki process sets up the pipe and forks the child process and waits on - [user-namespace man page](https://man7.org/linux/man-pages/man7/user_namespaces.7.html) - [wait man page](https://man7.org/linux/man-pages/man3/wait.3p.html) +### Container + +This contains structure represent and functions related to container process and its state and status. + +### Command + +This contains a trait to wrap commonly required syscalls, so that they can be abstracted from implementation details for rest of Youki. +This also provides implementation for Linux syscalls for the trait. + +- [pivot_root man page](https://man7.org/linux/man-pages/man2/pivot_root.2.html) +- [umount2 man page](https://man7.org/linux/man-pages/man2/umount2.2.html) +- [capabilities man page](https://man7.org/linux/man-pages/man7/capabilities.7.html) +- [unshare man page](https://man7.org/linux/man-pages/man2/unshare.2.html) + [oci runtime specification]: https://github.com/opencontainers/runtime-spec/blob/master/runtime.md [runc man pages]: (https://github.com/opencontainers/runc/blob/master/man/runc.8.md) diff --git a/src/command/command.rs b/src/command/command.rs index 517ed6aee..4ad6f3417 100644 --- a/src/command/command.rs +++ b/src/command/command.rs @@ -1,3 +1,6 @@ +//! An interface trait so that rest of Youki can call +//! necessary functions without having to worry about their +//! implementation details use std::{any::Any, path::Path}; use anyhow::Result; @@ -9,6 +12,8 @@ use nix::{ use oci_spec::LinuxRlimit; +/// This specifies various kernel/other functionalities required for +/// container management pub trait Command { fn as_any(&self) -> &dyn Any; fn pivot_rootfs(&self, path: &Path) -> Result<()>; diff --git a/src/command/linux.rs b/src/command/linux.rs index 0f570e33e..71282ea4c 100644 --- a/src/command/linux.rs +++ b/src/command/linux.rs @@ -1,3 +1,4 @@ +//! Implements Command trait for Linux systems use std::{any::Any, path::Path}; use anyhow::{bail, Result}; @@ -22,36 +23,60 @@ use oci_spec::LinuxRlimit; use super::Command; use crate::capabilities; +/// Empty structure to implement Command trait for #[derive(Clone)] pub struct LinuxCommand; impl Command for LinuxCommand { + /// To enable dynamic typing, + /// see https://doc.rust-lang.org/std/any/index.html for more information fn as_any(&self) -> &dyn Any { self } + /// Function to set given path as root path inside process fn pivot_rootfs(&self, path: &Path) -> Result<()> { + // open the path as directory and read only let newroot = open(path, OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty())?; + // make the given path as the root directory for the container + // see https://man7.org/linux/man-pages/man2/pivot_root.2.html, specially the notes + // pivot root usually changes the root directory to first argument, and then mounts the original root + // directory at second argument. Giving same path for both stacks mapping of the original root directory + // above the new directory at the same path, then the call to umount unmounts the original root directory from + // this path. This is done, as otherwise, we will need to create a separate temporary directory under the new root path + // so we can move the original root there, and then unmount that. This way saves the creation of the temporary + // directory to put original root directory. pivot_root(path, path)?; + // Unmount the original root directory which was stacked on top of new root directory + // MNT_DETACH makes the mount point unavailable to new accesses, but waits till the original mount point + // to be free of activity to actually unmount + // see https://man7.org/linux/man-pages/man2/umount2.2.html for more information umount2("/", MntFlags::MNT_DETACH)?; + // Change directory to root fchdir(newroot)?; Ok(()) } + /// Set namespace for process fn set_ns(&self, rawfd: i32, nstype: CloneFlags) -> Result<()> { nix::sched::setns(rawfd, nstype)?; Ok(()) } + /// set uid and gid for process fn set_id(&self, uid: Uid, gid: Gid) -> Result<()> { if let Err(e) = prctl::set_keep_capabilities(true) { bail!("set keep capabilities returned {}", e); }; + // args : real *id, effective *id, saved set *id respectively unistd::setresgid(gid, gid, gid)?; unistd::setresuid(uid, uid, uid)?; + // if not the root user, reset capabilities to effective capabilities, + // which are used by kernel to perform checks + // see https://man7.org/linux/man-pages/man7/capabilities.7.html for more information if uid != Uid::from_raw(0) { capabilities::reset_effective(self)?; } @@ -61,15 +86,19 @@ impl Command for LinuxCommand { Ok(()) } + /// Disassociate parts of execution context + // see https://man7.org/linux/man-pages/man2/unshare.2.html for more information fn unshare(&self, flags: CloneFlags) -> Result<()> { unshare(flags)?; Ok(()) } + /// Set capabilities for container process fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<(), CapsError> { caps::set(None, cset, value) } + /// Sets hostname for process fn set_hostname(&self, hostname: &str) -> Result<()> { if let Err(e) = sethostname(hostname) { bail!("Failed to set {} as hostname. {:?}", hostname, e) @@ -77,6 +106,7 @@ impl Command for LinuxCommand { Ok(()) } + /// Sets resource limit for process fn set_rlimit(&self, rlimit: &LinuxRlimit) -> Result<()> { let rlim = &libc::rlimit { rlim_cur: rlimit.soft, diff --git a/src/command/mod.rs b/src/command/mod.rs index cf4923271..fa2fc01fe 100644 --- a/src/command/mod.rs +++ b/src/command/mod.rs @@ -1,4 +1,6 @@ //! Contains a wrapper of syscalls for unit tests +//! This provides a uniform interface for rest of Youki +//! to call syscalls required for container management #[allow(clippy::module_inception)] mod command; diff --git a/src/container/container.rs b/src/container/container.rs index 2fbce6198..378d30096 100644 --- a/src/container/container.rs +++ b/src/container/container.rs @@ -7,9 +7,12 @@ use procfs::process::Process; use crate::container::{ContainerStatus, State}; +/// Structure representing the container data #[derive(Debug)] pub struct Container { + // State of the container pub state: State, + // indicated the directory for the root path in the container pub root: PathBuf, } @@ -36,10 +39,12 @@ impl Container { pub fn status(&self) -> ContainerStatus { self.state.status } - pub fn refresh_status(&self) -> Result { let new_status = match self.pid() { Some(pid) => { + // Note that Process::new does not spawn a new process + // but instead creates a new Process structure, and fill + // it with information about the process with given pid if let Ok(proc) = Process::new(pid.as_raw()) { use procfs::process::ProcState; match proc.stat.state().unwrap() { diff --git a/src/container/state.rs b/src/container/state.rs index c465ab215..30964c855 100644 --- a/src/container/state.rs +++ b/src/container/state.rs @@ -1,3 +1,4 @@ +//! Information about status and state of the container use std::collections::HashMap; use std::fs; use std::{fs::File, path::Path}; @@ -7,17 +8,17 @@ use serde::{Deserialize, Serialize}; const STATE_FILE_PATH: &str = "state.json"; +/// Indicates status of the container #[derive(Serialize, Deserialize, Debug, Copy, Clone)] #[serde(rename_all = "camelCase")] pub enum ContainerStatus { - // StateCreating indicates that the container is being created + // The container is being created Creating, - // StateCreated indicates that the runtime has finished the create operation + // The runtime has finished the create operation Created, - // StateRunning indicates that the container process has executed the - // user-specified program but has not exited + // The container process has executed the user-specified program but has not exited Running, - // StateStopped indicates that the container process has exited + // The container process has exited Stopped, } @@ -39,6 +40,7 @@ impl ContainerStatus { } } +/// Stores the state information of the container #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct State {