Skip to content

Commit

Permalink
Merge pull request #1637 from yihuaf/yihuaf/fix-exec
Browse files Browse the repository at this point in the history
Fixed container init process not re-parent to youki main process
  • Loading branch information
utam0k authored Mar 11, 2023
2 parents 8735005 + 16be642 commit ef5daba
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 39 deletions.
5 changes: 2 additions & 3 deletions crates/libcontainer/src/container/builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,7 @@ impl<'a> ContainerBuilderImpl<'a> {
detached: self.detached,
};

let (intermediate, init_pid) =
process::container_main_process::container_main_process(&container_args)?;
let init_pid = process::container_main_process::container_main_process(&container_args)?;

// if file to write the pid to is specified, write pid of the child
if let Some(pid_file) = &self.pid_file {
Expand All @@ -146,7 +145,7 @@ impl<'a> ContainerBuilderImpl<'a> {
.context("Failed to save container state")?;
}

Ok(intermediate)
Ok(init_pid)
}

fn cleanup_container(&self) -> Result<()> {
Expand Down
9 changes: 7 additions & 2 deletions crates/libcontainer/src/container/init_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,15 @@ impl<'a> InitContainerBuilder<'a> {
notify_path,
container: Some(container.clone()),
preserve_fds: self.base.preserve_fds,
detached: false, // TODO this should be set properly based on how the command is given
// TODO: This should be set properly based on how the command is
// given. For now, set the detached to true because this is what
// `youki create` defaults to.
detached: true,
};

builder_impl.create()?;
// TODO: Fix waiting on this pid (init process) when detached = false.
let _pid = builder_impl.create()?;

container.refresh_state()?;

Ok(container)
Expand Down
20 changes: 13 additions & 7 deletions crates/libcontainer/src/process/container_intermediate_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,18 @@ pub fn container_intermediate_process(
.with_context(|| format!("failed to enter pid namespace: {pid_namespace:?}"))?;
}

// We have to record the pid of the child (container init process), since
// the child will be inside the pid namespace. We can't rely on child_ready
// to send us the correct pid.
let pid = fork::container_fork(|| {
// We are inside the forked process here. The first thing we have to do is to close
// any unused senders, since fork will make a dup for all the socket.
// We have to record the pid of the init process. The init process will be
// inside the pid namespace, so we can't rely on the init process to send us
// the correct pid. We also want to clone the init process as a sibling
// process to the intermediate process. The intermediate process is only
// used as a jumping board to set the init process to the correct
// configuration. The youki main process can decide what to do with the init
// process and the intermediate process can just exit safely after the job
// is done.
let pid = fork::container_clone_sibling(|| {
// We are inside the forked process here. The first thing we have to do
// is to close any unused senders, since fork will make a dup for all
// the socket.
init_sender
.close()
.context("failed to close receiver in init process")?;
Expand All @@ -109,7 +115,7 @@ pub fn container_intermediate_process(
}
})?;

// close the exec_notify_fd in this process
// Close the exec_notify_fd in this process
if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type {
close(exec_notify_fd)?;
}
Expand Down
49 changes: 22 additions & 27 deletions crates/libcontainer/src/process/container_main_process.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
use crate::{
container::ContainerProcessState,
process::{
args::{ContainerArgs, ContainerType},
channel, container_intermediate_process, fork,
},
process::{args::ContainerArgs, channel, container_intermediate_process, fork},
rootless::Rootless,
seccomp, utils,
};
Expand All @@ -18,38 +15,27 @@ use nix::{
use oci_spec::runtime;
use std::{io::IoSlice, path::Path};

pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pid)> {
pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
// We use a set of channels to communicate between parent and child process.
// Each channel is uni-directional. Because we will pass these channel to
// forked process, we have to be deligent about closing any unused channel.
// cloned process, we have to be deligent about closing any unused channel.
// At minimum, we have to close down any unused senders. The corresponding
// receivers will be cleaned up once the senders are closed down.
let (main_sender, main_receiver) = &mut channel::main_channel()?;
let inter_chan = &mut channel::intermediate_channel()?;
let init_chan = &mut channel::init_channel()?;

let intermediate_pid = fork::container_fork(|| {
let container_pid = container_intermediate_process::container_intermediate_process(
container_intermediate_process::container_intermediate_process(
container_args,
inter_chan,
init_chan,
main_sender,
)?;

if matches!(
container_args.container_type,
ContainerType::TenantContainer { exec_notify_fd: _ }
) && !container_args.detached
{
match waitpid(container_pid, None)? {
WaitStatus::Exited(_, s) => Ok(s),
WaitStatus::Signaled(_, sig, _) => Ok(sig as i32),
_ => Ok(0),
}
} else {
Ok(0)
}
Ok(0)
})?;

// Close down unused fds. The corresponding fds are duplicated to the
// child process during fork.
main_sender
Expand Down Expand Up @@ -110,13 +96,22 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pi

log::debug!("init pid is {:?}", init_pid);

// here we send both intermediate and init pid, because :
// init pid is required for writing it to pid_file (if) given by the high-level runtime
// intermediate pid is required in the case when we call exec, as we need to wait for the
// intermediate process to exit, which itself waits for child process (the exec process) to exit
// in order to get the proper exit code. We cannot simply wait for the init_pid , that is the actual container
// process, as it is not (direct) child of our process
Ok((intermediate_pid, init_pid))
// Before the main process returns, we want to make sure the intermediate
// process is exit and reaped. By this point, the intermediate process
// should already exited successfully. If intermediate process errors out,
// the `init_ready` will not be sent.
match waitpid(intermediate_pid, None)? {
WaitStatus::Exited(_, 0) => (),
WaitStatus::Exited(_, s) => {
log::warn!("intermediate process failed with exit status: {s}");
}
WaitStatus::Signaled(_, sig, _) => {
log::warn!("intermediate process killed with signal: {sig}")
}
_ => (),
};

Ok(init_pid)
}

fn sync_seccomp(
Expand Down
2 changes: 2 additions & 0 deletions crates/youki/src/commands/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use liboci_cli::Run;

pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> {
let syscall = create_syscall();
// TODO: `youki run` should support passing in `detached` flags. Defaults to
// detached = true right now.
let mut container = ContainerBuilder::new(args.container_id.clone(), syscall.as_ref())
.with_pid_file(args.pid_file.as_ref())?
.with_console_socket(args.console_socket.as_ref())
Expand Down

0 comments on commit ef5daba

Please sign in to comment.