Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make exec behaviour consistent with runc's exec #1252

Merged
merged 7 commits into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions crates/libcontainer/src/container/builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ use super::{Container, ContainerStatus};
use crate::{
hooks,
notify_socket::NotifyListener,
process::{self, args::ContainerArgs},
process::{
self,
args::{ContainerArgs, ContainerType},
},
rootless::Rootless,
syscall::Syscall,
utils,
Expand All @@ -14,7 +17,7 @@ use std::{fs, io::Write, os::unix::prelude::RawFd, path::PathBuf};

pub(super) struct ContainerBuilderImpl<'a> {
/// Flag indicating if an init or a tenant container should be created
pub init: bool,
pub container_type: ContainerType,
/// Interface to operating system primitives
pub syscall: &'a dyn Syscall,
/// Flag indicating if systemd should be used for cgroup management
Expand All @@ -38,6 +41,8 @@ pub(super) struct ContainerBuilderImpl<'a> {
pub container: Option<Container>,
/// File descriptos preserved/passed to the container init process.
pub preserve_fds: i32,
/// If the container is to be run in detached mode
pub detached: bool,
}

impl<'a> ContainerBuilderImpl<'a> {
Expand Down Expand Up @@ -67,7 +72,7 @@ impl<'a> ContainerBuilderImpl<'a> {
)?;
let process = self.spec.process().as_ref().context("No process in spec")?;

if self.init {
if matches!(self.container_type, ContainerType::InitContainer) {
if let Some(hooks) = self.spec.hooks() {
hooks::run_hooks(hooks.create_runtime().as_ref(), self.container.as_ref())?
}
Expand Down Expand Up @@ -110,7 +115,7 @@ impl<'a> ContainerBuilderImpl<'a> {
// therefore we will have to move all the variable by value. Since self
// is a shared reference, we have to clone these variables here.
let container_args = ContainerArgs {
init: self.init,
container_type: self.container_type,
syscall: self.syscall,
spec: self.spec,
rootfs: &self.rootfs,
Expand All @@ -120,10 +125,11 @@ impl<'a> ContainerBuilderImpl<'a> {
container: &self.container,
rootless: &self.rootless,
cgroup_manager: cmanager,
detached: self.detached,
};

let (intermediate, init_pid) =
process::container_main_process::container_main_process(&container_args, !self.init)?;
process::container_main_process::container_main_process(&container_args)?;

// if file to write the pid to is specified, write pid of the child
if let Some(pid_file) = &self.pid_file {
Expand Down
8 changes: 6 additions & 2 deletions crates/libcontainer/src/container/init_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ use std::{
path::{Path, PathBuf},
};

use crate::{apparmor, config::YoukiConfig, notify_socket::NOTIFY_FILE, rootless, tty, utils};
use crate::{
apparmor, config::YoukiConfig, notify_socket::NOTIFY_FILE, process::args::ContainerType,
rootless, tty, utils,
};

use super::{
builder::ContainerBuilder, builder_impl::ContainerBuilderImpl, Container, ContainerStatus,
Expand Down Expand Up @@ -75,7 +78,7 @@ impl<'a> InitContainerBuilder<'a> {
.context("failed to save config")?;

let mut builder_impl = ContainerBuilderImpl {
init: true,
container_type: ContainerType::InitContainer,
syscall: self.base.syscall,
container_id: self.base.container_id,
pid_file: self.base.pid_file,
Expand All @@ -87,6 +90,7 @@ impl<'a> InitContainerBuilder<'a> {
notify_path,
container: Some(container.clone()),
preserve_fds: self.base.preserve_fds,
detached: false, // TODO this should be set properly based on how the command is given
};

builder_impl.create()?;
Expand Down
38 changes: 35 additions & 3 deletions crates/libcontainer/src/container/tenant_builder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use anyhow::{bail, Context, Result};
use caps::Capability;
use nix::unistd::{self, Pid};
use nix::fcntl::OFlag;
use nix::unistd::{self, close, pipe2, read, Pid};
use oci_spec::runtime::{
Capabilities as SpecCapabilities, Capability as SpecCapability, LinuxBuilder,
LinuxCapabilities, LinuxCapabilitiesBuilder, LinuxNamespace, LinuxNamespaceBuilder,
Expand All @@ -18,6 +19,7 @@ use std::{
str::FromStr,
};

use crate::process::args::ContainerType;
use crate::{capabilities::CapabilityExt, container::builder_impl::ContainerBuilderImpl};
use crate::{notify_socket::NotifySocket, rootless::Rootless, tty, utils};

Expand All @@ -37,6 +39,7 @@ pub struct TenantContainerBuilder<'a> {
no_new_privs: Option<bool>,
capabilities: Vec<String>,
process: Option<PathBuf>,
detached: bool,
}

impl<'a> TenantContainerBuilder<'a> {
Expand All @@ -52,6 +55,7 @@ impl<'a> TenantContainerBuilder<'a> {
no_new_privs: None,
capabilities: Vec::new(),
process: None,
detached: false,
}
}

Expand Down Expand Up @@ -88,6 +92,11 @@ impl<'a> TenantContainerBuilder<'a> {
self
}

pub fn with_detach(mut self, detached: bool) -> Self {
self.detached = detached;
self
}

/// Joins an existing container
pub fn build(self) -> Result<Pid> {
let container_dir = self
Expand Down Expand Up @@ -116,8 +125,12 @@ impl<'a> TenantContainerBuilder<'a> {
let use_systemd = self.should_use_systemd(&container);
let rootless = Rootless::new(&spec)?;

let (read_end, write_end) = pipe2(OFlag::O_CLOEXEC)?;

let mut builder_impl = ContainerBuilderImpl {
init: false,
container_type: ContainerType::TenantContainer {
exec_notify_fd: write_end,
},
syscall: self.base.syscall,
container_id: self.base.container_id,
pid_file: self.base.pid_file,
Expand All @@ -129,14 +142,33 @@ impl<'a> TenantContainerBuilder<'a> {
notify_path: notify_path.clone(),
container: None,
preserve_fds: self.base.preserve_fds,
detached: self.detached,
};

let pid = builder_impl.create()?;

let mut notify_socket = NotifySocket::new(notify_path);
notify_socket.notify_container_start()?;

Ok(pid)
close(write_end)?;

let mut err_str_buf = Vec::new();

loop {
let mut buf = [0; 3];
match read(read_end, &mut buf)? {
0 => {
if err_str_buf.is_empty() {
return Ok(pid);
} else {
bail!(String::from_utf8_lossy(&err_str_buf).to_string());
}
}
_ => {
err_str_buf.extend(buf.into_iter());
}
}
}
}

fn lookup_container_dir(&self) -> Result<PathBuf> {
Expand Down
12 changes: 10 additions & 2 deletions crates/libcontainer/src/process/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@ use std::path::PathBuf;
use crate::rootless::Rootless;
use crate::{container::Container, notify_socket::NotifyListener, syscall::Syscall};

#[derive(Debug, Copy, Clone)]
pub enum ContainerType {
InitContainer,
TenantContainer { exec_notify_fd: RawFd },
}

pub struct ContainerArgs<'a> {
/// Flag indicating if an init or a tenant container should be created
pub init: bool,
/// Indicates if an init or a tenant container should be created
pub container_type: ContainerType,
/// Interface to operating system primitives
pub syscall: &'a dyn Syscall,
/// OCI complient runtime spec
Expand All @@ -27,4 +33,6 @@ pub struct ContainerArgs<'a> {
pub rootless: &'a Option<Rootless<'a>>,
/// Cgroup Manager
pub cgroup_manager: Box<dyn CgroupManager>,
/// If the container is to be run in detached mode
pub detached: bool,
}
6 changes: 6 additions & 0 deletions crates/libcontainer/src/process/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ impl MainSender {
Ok(())
}

pub fn exec_failed(&mut self, err: String) -> Result<()> {
self.sender.send(Message::ExecFailed(err))?;
Ok(())
}

pub fn close(&self) -> Result<()> {
self.sender.close()
}
Expand All @@ -82,6 +87,7 @@ impl MainReceiver {

match msg {
Message::IntermediateReady(pid) => Ok(Pid::from_raw(pid)),
Message::ExecFailed(err) => bail!("exec process failed with error {}", err),
_ => bail!(
"receive unexpected message {:?} waiting for intermediate ready",
msg
Expand Down
6 changes: 3 additions & 3 deletions crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::args::ContainerArgs;
use super::args::{ContainerArgs, ContainerType};
use crate::apparmor;
use crate::syscall::Syscall;
use crate::workload::ExecutorManager;
Expand Down Expand Up @@ -186,7 +186,7 @@ pub fn container_init_process(
let _ = prctl::set_no_new_privileges(true);
}

if args.init {
if matches!(args.container_type, ContainerType::InitContainer) {
// create_container hook needs to be called after the namespace setup, but
// before pivot_root is called. This runs in the container namespaces.
if let Some(hooks) = hooks {
Expand Down Expand Up @@ -409,7 +409,7 @@ pub fn container_init_process(

// create_container hook needs to be called after the namespace setup, but
// before pivot_root is called. This runs in the container namespaces.
if args.init {
if matches!(args.container_type, ContainerType::InitContainer) {
if let Some(hooks) = hooks {
hooks::run_hooks(hooks.start_container().as_ref(), container)?
}
Expand Down
28 changes: 20 additions & 8 deletions crates/libcontainer/src/process/container_intermediate_process.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use crate::{namespaces::Namespaces, process::channel, process::fork};
use anyhow::{Context, Error, Result};
use libcgroups::common::CgroupManager;
use nix::unistd::{close, write};
use nix::unistd::{Gid, Pid, Uid};
use oci_spec::runtime::{LinuxNamespaceType, LinuxResources};
use procfs::process::Process;
use std::convert::From;

use super::args::ContainerArgs;
use super::args::{ContainerArgs, ContainerType};
use super::container_init_process::container_init_process;

pub fn container_intermediate_process(
Expand Down Expand Up @@ -35,7 +36,7 @@ pub fn container_intermediate_process(
apply_cgroups(
args.cgroup_manager.as_ref(),
linux.resources().as_ref(),
args.init,
matches!(args.container_type, ContainerType::InitContainer),
)
.context("failed to apply cgroups")?;

Expand Down Expand Up @@ -95,12 +96,24 @@ pub fn container_intermediate_process(
inter_sender
.close()
.context("failed to close sender in the intermediate process")?;
container_init_process(args, main_sender, init_receiver)?;
Ok(0)
match container_init_process(args, main_sender, init_receiver) {
Ok(_) => unreachable!("successful exec should never reach here"),
Err(e) => {
if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type {
let buf = format!("{}", e);
write(exec_notify_fd, buf.as_bytes())?;
close(exec_notify_fd)?;
}
Err(e)
}
}
})?;
// Once we fork the container init process, the job for intermediate process
// is done. We notify the container main process about the pid we just
// forked for container init process.

// close the exec_notify_fd in this process
if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type {
close(exec_notify_fd)?;
}

main_sender
.intermediate_ready(pid)
.context("failed to send child ready from intermediate process")?;
Expand All @@ -115,7 +128,6 @@ pub fn container_intermediate_process(
init_sender
.close()
.context("failed to close unused init sender")?;

Ok(pid)
}

Expand Down
13 changes: 10 additions & 3 deletions crates/libcontainer/src/process/container_main_process.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use crate::{
container::ContainerProcessState,
process::{args::ContainerArgs, channel, container_intermediate_process, fork},
process::{
args::{ContainerArgs, ContainerType},
channel, container_intermediate_process, fork,
},
rootless::Rootless,
seccomp, utils,
};
Expand All @@ -15,7 +18,7 @@ use nix::{
use oci_spec::runtime;
use std::{io::IoSlice, path::Path};

pub fn container_main_process(container_args: &ContainerArgs, wait: bool) -> Result<(Pid, Pid)> {
pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pid)> {
// We use a set of channels to communicate between parent and child process.
// Each channel is uni-directional. Because we will pass these channel to
// forked process, we have to be deligent about closing any unused channel.
Expand All @@ -33,7 +36,11 @@ pub fn container_main_process(container_args: &ContainerArgs, wait: bool) -> Res
main_sender,
)?;

if wait {
if matches!(
container_args.container_type,
ContainerType::TenantContainer { exec_notify_fd: _ }
) && !container_args.detached
{
match waitpid(container_pid, None)? {
WaitStatus::Exited(_, s) => Ok(s),
WaitStatus::Signaled(_, sig, _) => Ok(sig as i32),
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/process/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ pub enum Message {
MappingWritten,
SeccompNotify,
SeccompNotifyDone,
ExecFailed(String),
}
9 changes: 9 additions & 0 deletions crates/youki/src/commands/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,22 @@ pub fn exec(args: Exec, root_path: PathBuf) -> Result<i32> {
.with_console_socket(args.console_socket.as_ref())
.with_pid_file(args.pid_file.as_ref())?
.as_tenant()
.with_detach(args.detach)
.with_cwd(args.cwd.as_ref())
.with_env(args.env.clone().into_iter().collect())
.with_process(args.process.as_ref())
.with_no_new_privs(args.no_new_privs)
.with_container_args(args.command.clone())
.build()?;

// See https://github.com/containers/youki/pull/1252 for a detailed explanation
// basically, if there is any error in starting exec, the build above will return error
// however, if the process does start, and detach is given, we do not wait for it
// if not detached, then we wait for it using waitpid below
if args.detach {
return Ok(0);
}

match waitpid(pid, None)? {
WaitStatus::Exited(_, status) => Ok(status),
WaitStatus::Signaled(_, sig, _) => Ok(sig as i32),
Expand Down
11 changes: 7 additions & 4 deletions crates/youki/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,13 @@ fn main() -> Result<()> {
commands::checkpoint::checkpoint(checkpoint, root_path)
}
CommonCmd::Events(events) => commands::events::events(events, root_path),
CommonCmd::Exec(exec) => {
let exit_code = commands::exec::exec(exec, root_path)?;
std::process::exit(exit_code)
}
CommonCmd::Exec(exec) => match commands::exec::exec(exec, root_path) {
Ok(exit_code) => std::process::exit(exit_code),
Err(e) => {
eprintln!("exec failed : {}", e);
std::process::exit(-1);
}
},
CommonCmd::List(list) => commands::list::list(list, root_path),
CommonCmd::Pause(pause) => commands::pause::pause(pause, root_path),
CommonCmd::Ps(ps) => commands::ps::ps(ps, root_path),
Expand Down