diff --git a/Cargo.lock b/Cargo.lock index eb68715518..392c779ef6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1013,6 +1013,7 @@ dependencies = [ "prctl", "procfs", "quickcheck", + "rust-criu", "serde", "serde_json", "serial_test", @@ -1520,6 +1521,31 @@ dependencies = [ "libc", ] +[[package]] +name = "protobuf" +version = "2.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47c327e191621a2158159df97cdbc2e7074bb4e940275e35abf38eb3d2595754" + +[[package]] +name = "protobuf-codegen" +version = "2.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3df8c98c08bd4d6653c2dbae00bd68c1d1d82a360265a5b0bbc73d48c63cb853" +dependencies = [ + "protobuf", +] + +[[package]] +name = "protobuf-codegen-pure" +version = "2.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "394a73e2a819405364df8d30042c0f1174737a763e0170497ec9d36f8a2ea8f7" +dependencies = [ + "protobuf", + "protobuf-codegen", +] + [[package]] name = "ptr_meta" version = "0.1.4" @@ -1729,6 +1755,16 @@ dependencies = [ "syn", ] +[[package]] +name = "rust-criu" +version = "0.1.0" +source = "git+https://github.com/adrianreber/rust-criu#c583dbb71b90bcf241b1a97c8a6f88052c548827" +dependencies = [ + "libc", + "protobuf", + "protobuf-codegen-pure", +] + [[package]] name = "rustc-demangle" version = "0.1.21" diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index 68b64d6146..51c671cc0f 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -31,6 +31,7 @@ libcgroups = { version = "0.0.2", path = "../libcgroups" } libseccomp = { version = "0.0.2", path = "../libseccomp" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +rust-criu = { git = "https://github.com/adrianreber/rust-criu" } wasmer = { version = "2.1.0", optional = true } wasmer-wasi = { version = "2.1.0", optional = true } diff --git a/crates/libcontainer/src/container/container.rs b/crates/libcontainer/src/container/container.rs index 37af5b523f..8b33eb4806 100644 --- a/crates/libcontainer/src/container/container.rs +++ b/crates/libcontainer/src/container/container.rs @@ -198,6 +198,17 @@ impl Container { } } +/// Checkpoint parameter structure +pub struct CheckpointOptions { + pub ext_unix_sk: bool, + pub file_locks: bool, + pub image_path: PathBuf, + pub leave_running: bool, + pub shell_job: bool, + pub tcp_established: bool, + pub work_path: Option, +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/libcontainer/src/container/container_checkpoint.rs b/crates/libcontainer/src/container/container_checkpoint.rs new file mode 100644 index 0000000000..f48d12e535 --- /dev/null +++ b/crates/libcontainer/src/container/container_checkpoint.rs @@ -0,0 +1,89 @@ +use super::{Container, ContainerStatus}; +use crate::container::container::CheckpointOptions; +use anyhow::{bail, Context, Result}; + +use oci_spec::runtime::Spec; +use std::os::unix::io::AsRawFd; + +const CRIU_CHECKPOINT_LOG_FILE: &str = "dump.log"; + +impl Container { + pub fn checkpoint(&mut self, opts: &CheckpointOptions) -> Result<()> { + self.refresh_status() + .context("failed to refresh container status")?; + + // can_pause() checks if the container is running. That also works for + // checkpoitning. is_running() would make more sense here, but let's + // just reuse existing functions. + if !self.can_pause() { + bail!( + "{} could not be checkpointed because it was {:?}", + self.id(), + self.status() + ); + } + + let mut criu = rust_criu::Criu::new().unwrap(); + + // We need to tell CRIU that all bind mounts are external. CRIU will fail checkpointing + // if it does not know that these bind mounts are coming from the outside of the container. + // This information is needed during restore again. The external location of the bind + // mounts can change and CRIU will just mount whatever we tell it to mount based on + // information found in 'config.json'. + let source_spec_path = self.bundle().join("config.json"); + let spec = Spec::load(&source_spec_path)?; + let mounts = spec.mounts().clone(); + for m in mounts.unwrap() { + if m.typ() == &Some("bind".to_string()) { + let dest = m + .destination() + .clone() + .into_os_string() + .into_string() + .unwrap(); + criu.set_external_mount(dest.clone(), dest); + } + } + + let directory = std::fs::File::open(&opts.image_path)?; + criu.set_images_dir_fd(directory.as_raw_fd()); + + // It seems to be necessary to be defined outside of 'if' to + // keep the FD open until CRIU uses it. + let work_dir: std::fs::File; + if let Some(wp) = &opts.work_path { + work_dir = std::fs::File::open(wp)?; + criu.set_work_dir_fd(work_dir.as_raw_fd()); + } + + criu.set_log_file(CRIU_CHECKPOINT_LOG_FILE.to_string()); + criu.set_log_level(4); + criu.set_pid(self.pid().unwrap().into()); + criu.set_leave_running(opts.leave_running); + criu.set_ext_unix_sk(opts.ext_unix_sk); + criu.set_shell_job(opts.shell_job); + criu.set_tcp_established(opts.tcp_established); + criu.set_file_locks(opts.file_locks); + criu.set_orphan_pts_master(true); + criu.set_manage_cgroups(true); + if let Err(e) = criu.dump() { + bail!( + "Checkpointing container {} failed with {:?}. Please check CRIU logfile {:?}/{}", + self.id(), + e, + match &opts.work_path { + Some(wp) => wp, + _ => &opts.image_path, + }, + CRIU_CHECKPOINT_LOG_FILE + ); + } + + if !opts.leave_running { + self.set_status(ContainerStatus::Stopped).save()?; + } + + log::debug!("container {} checkpointed", self.id()); + Ok(()) + } +} diff --git a/crates/libcontainer/src/container/mod.rs b/crates/libcontainer/src/container/mod.rs index 73f4901886..467f612315 100644 --- a/crates/libcontainer/src/container/mod.rs +++ b/crates/libcontainer/src/container/mod.rs @@ -8,6 +8,7 @@ pub mod builder; mod builder_impl; #[allow(clippy::module_inception)] mod container; +mod container_checkpoint; mod container_delete; mod container_events; mod container_kill; @@ -17,5 +18,6 @@ mod container_start; pub mod init_builder; pub mod state; pub mod tenant_builder; +pub use container::CheckpointOptions; pub use container::Container; pub use state::{ContainerProcessState, ContainerStatus, State}; diff --git a/crates/liboci-cli/src/checkpoint.rs b/crates/liboci-cli/src/checkpoint.rs new file mode 100644 index 0000000000..e044cfbde3 --- /dev/null +++ b/crates/liboci-cli/src/checkpoint.rs @@ -0,0 +1,30 @@ +use clap::Parser; +use std::path::PathBuf; + +/// Checkpoint a running container +#[derive(Parser, Debug)] +pub struct Checkpoint { + #[clap(forbid_empty_values = true, required = true)] + pub container_id: String, + /// allow external unix sockets + #[clap(long)] + pub ext_unix_sk: bool, + /// allow file locks + #[clap(long)] + pub file_locks: bool, + /// path for saving criu image files + #[clap(long, default_value = "checkpoint")] + pub image_path: PathBuf, + /// leave the process running after checkpointing + #[clap(long)] + pub leave_running: bool, + /// allow shell jobs + #[clap(long)] + pub shell_job: bool, + /// allow open tcp connections + #[clap(long)] + pub tcp_established: bool, + /// path for saving work files and logs + #[clap(long)] + pub work_path: Option, +} diff --git a/crates/liboci-cli/src/lib.rs b/crates/liboci-cli/src/lib.rs index 9254787fd4..1278888013 100644 --- a/crates/liboci-cli/src/lib.rs +++ b/crates/liboci-cli/src/lib.rs @@ -14,6 +14,7 @@ mod state; pub use {create::Create, delete::Delete, kill::Kill, start::Start, state::State}; // Other common subcommands that aren't specified in the document +mod checkpoint; mod events; mod exec; mod list; @@ -25,8 +26,8 @@ mod spec; mod update; pub use { - events::Events, exec::Exec, list::List, pause::Pause, ps::Ps, resume::Resume, run::Run, - spec::Spec, update::Update, + checkpoint::Checkpoint, events::Events, exec::Exec, list::List, pause::Pause, ps::Ps, + resume::Resume, run::Run, spec::Spec, update::Update, }; // Subcommands parsed by liboci-cli, based on the [OCI @@ -48,6 +49,7 @@ pub enum StandardCmd { // and other runtimes. #[derive(Parser, Debug)] pub enum CommonCmd { + Checkpointt(Checkpoint), Events(Events), Exec(Exec), List(List), diff --git a/crates/youki/src/commands/checkpoint.rs b/crates/youki/src/commands/checkpoint.rs new file mode 100644 index 0000000000..9b12ca2393 --- /dev/null +++ b/crates/youki/src/commands/checkpoint.rs @@ -0,0 +1,24 @@ +//! Contains functionality of pause container command +use crate::commands::load_container; +use std::path::PathBuf; + +use anyhow::{Context, Result}; + +use liboci_cli::Checkpoint; + +pub fn checkpoint(args: Checkpoint, root_path: PathBuf) -> Result<()> { + log::debug!("start checkpointing container {}", args.container_id); + let mut container = load_container(root_path, &args.container_id)?; + let opts = libcontainer::container::CheckpointOptions { + ext_unix_sk: args.ext_unix_sk, + file_locks: args.file_locks, + image_path: args.image_path, + leave_running: args.leave_running, + shell_job: args.shell_job, + tcp_established: args.tcp_established, + work_path: args.work_path, + }; + container + .checkpoint(&opts) + .with_context(|| format!("failed to checkpoint container {}", args.container_id)) +} diff --git a/crates/youki/src/commands/mod.rs b/crates/youki/src/commands/mod.rs index facd1dc188..06a1046e49 100644 --- a/crates/youki/src/commands/mod.rs +++ b/crates/youki/src/commands/mod.rs @@ -7,6 +7,7 @@ use std::{ use libcgroups::common::CgroupManager; use libcontainer::container::Container; +pub mod checkpoint; pub mod completion; pub mod create; pub mod delete; diff --git a/crates/youki/src/main.rs b/crates/youki/src/main.rs index aca93961f3..669fd7c318 100644 --- a/crates/youki/src/main.rs +++ b/crates/youki/src/main.rs @@ -108,6 +108,9 @@ fn main() -> Result<()> { StandardCmd::State(state) => commands::state::state(state, root_path), }, SubCommand::Common(cmd) => match cmd { + CommonCmd::Checkpointt(checkpoint) => { + commands::checkpoint::checkpoint(checkpoint, root_path) + } CommonCmd::Events(events) => commands::events::events(events, root_path), CommonCmd::Exec(exec) => commands::exec::exec(exec, root_path), CommonCmd::List(list) => commands::list::list(list, root_path),