From d978b0344c47266ec83fccff47215d565b2eba48 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 25 Sep 2021 22:46:44 +0200 Subject: [PATCH 1/2] Ensure cgroup is deleted --- cgroups/src/common.rs | 19 +++++++++++++++++++ cgroups/src/v1/manager.rs | 2 +- cgroups/src/v1/util.rs | 27 ++------------------------- cgroups/src/v2/manager.rs | 14 ++++++++++++-- 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/cgroups/src/common.rs b/cgroups/src/common.rs index 334576ad0..58ffc825c 100644 --- a/cgroups/src/common.rs +++ b/cgroups/src/common.rs @@ -3,6 +3,7 @@ use std::{ fs::{self, File}, io::{BufRead, BufReader, Write}, path::{Path, PathBuf}, + time::Duration, }; use anyhow::{bail, Context, Result}; @@ -355,3 +356,21 @@ pub(crate) fn default_devices() -> Vec { }, ] } + +pub(crate) fn delete_with_retry>(path: P) -> Result<()> { + let mut attempts = 0; + let mut delay = Duration::from_millis(10); + let path = path.as_ref(); + + while attempts < 5 { + if fs::remove_dir(path).is_ok() { + return Ok(()); + } + + std::thread::sleep(delay); + attempts += attempts; + delay *= attempts; + } + + bail!("could not delete {:?}", path) +} diff --git a/cgroups/src/v1/manager.rs b/cgroups/src/v1/manager.rs index ea3b41f2f..e2aea497d 100644 --- a/cgroups/src/v1/manager.rs +++ b/cgroups/src/v1/manager.rs @@ -161,7 +161,7 @@ impl CgroupManager for Manager { let _ = nix::sys::signal::kill(Pid::from_raw(pid), nix::sys::signal::SIGKILL); } - util::delete_with_retry(cgroup_path.1)?; + common::delete_with_retry(cgroup_path.1)?; } } diff --git a/cgroups/src/v1/util.rs b/cgroups/src/v1/util.rs index 0b95c2fe2..389bbddba 100644 --- a/cgroups/src/v1/util.rs +++ b/cgroups/src/v1/util.rs @@ -1,11 +1,6 @@ -use std::{ - collections::HashMap, - fs, - path::{Path, PathBuf}, - time::Duration, -}; +use std::{collections::HashMap, path::PathBuf}; -use anyhow::{anyhow, bail, Result}; +use anyhow::{anyhow, Result}; use procfs::process::Process; use super::{controller_type::CONTROLLERS, ControllerType}; @@ -56,21 +51,3 @@ pub fn get_subsystem_mount_point(subsystem: &ControllerType) -> Result .map(|m| m.mount_point) .ok_or_else(|| anyhow!("could not find mountpoint for {}", subsystem)) } - -pub(crate) fn delete_with_retry>(path: P) -> Result<()> { - let mut attempts = 0; - let mut delay = Duration::from_millis(10); - let path = path.as_ref(); - - while attempts < 5 { - if fs::remove_dir(path).is_ok() { - return Ok(()); - } - - std::thread::sleep(delay); - attempts += attempts; - delay *= attempts; - } - - bail!("could not delete {:?}", path) -} diff --git a/cgroups/src/v2/manager.rs b/cgroups/src/v2/manager.rs index 06c18c693..d9e4aad32 100644 --- a/cgroups/src/v2/manager.rs +++ b/cgroups/src/v2/manager.rs @@ -120,8 +120,18 @@ impl CgroupManager for Manager { } fn remove(&self) -> Result<()> { - log::debug!("remove cgroup {:?}", self.full_path); - fs::remove_dir(&self.full_path)?; + if self.full_path.exists() { + log::debug!("remove cgroup {:?}", self.full_path); + let procs_path = self.full_path.join(CGROUP_PROCS); + let procs = fs::read_to_string(&procs_path)?; + + for line in procs.lines() { + let pid: i32 = line.parse()?; + let _ = nix::sys::signal::kill(Pid::from_raw(pid), nix::sys::signal::SIGKILL); + } + + common::delete_with_retry(&self.full_path)?; + } Ok(()) } From f04c443204a5b57d38092f8da851440de3c59257 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 25 Sep 2021 22:52:07 +0200 Subject: [PATCH 2/2] Ensure cgroup error behavior is similar to runc --- cgroups/src/common.rs | 17 +++++++++++-- cgroups/src/v1/manager.rs | 3 ++- cgroups/src/v2/manager.rs | 3 ++- src/container/builder_impl.rs | 45 +++++++++++++++++++++++++++++++---- src/utils.rs | 4 ++-- 5 files changed, 62 insertions(+), 10 deletions(-) diff --git a/cgroups/src/common.rs b/cgroups/src/common.rs index 58ffc825c..ab1303a83 100644 --- a/cgroups/src/common.rs +++ b/cgroups/src/common.rs @@ -357,12 +357,22 @@ pub(crate) fn default_devices() -> Vec { ] } -pub(crate) fn delete_with_retry>(path: P) -> Result<()> { +/// Attempts to delete the path the requested number of times. +pub(crate) fn delete_with_retry, L: Into>>( + path: P, + retries: u32, + limit_backoff: L, +) -> Result<()> { let mut attempts = 0; let mut delay = Duration::from_millis(10); let path = path.as_ref(); + let limit = if let Some(backoff) = limit_backoff.into() { + backoff + } else { + Duration::MAX + }; - while attempts < 5 { + while attempts < retries { if fs::remove_dir(path).is_ok() { return Ok(()); } @@ -370,6 +380,9 @@ pub(crate) fn delete_with_retry>(path: P) -> Result<()> { std::thread::sleep(delay); attempts += attempts; delay *= attempts; + if delay > limit { + delay = limit; + } } bail!("could not delete {:?}", path) diff --git a/cgroups/src/v1/manager.rs b/cgroups/src/v1/manager.rs index e2aea497d..39b9c5670 100644 --- a/cgroups/src/v1/manager.rs +++ b/cgroups/src/v1/manager.rs @@ -1,5 +1,6 @@ use std::fs; use std::path::Path; +use std::time::Duration; use std::{collections::HashMap, path::PathBuf}; use anyhow::bail; @@ -161,7 +162,7 @@ impl CgroupManager for Manager { let _ = nix::sys::signal::kill(Pid::from_raw(pid), nix::sys::signal::SIGKILL); } - common::delete_with_retry(cgroup_path.1)?; + common::delete_with_retry(cgroup_path.1, 4, Duration::from_millis(100))?; } } diff --git a/cgroups/src/v2/manager.rs b/cgroups/src/v2/manager.rs index d9e4aad32..f4c10cc5c 100644 --- a/cgroups/src/v2/manager.rs +++ b/cgroups/src/v2/manager.rs @@ -2,6 +2,7 @@ use std::{ fs::{self}, os::unix::fs::PermissionsExt, path::{Path, PathBuf}, + time::Duration, }; use anyhow::Result; @@ -130,7 +131,7 @@ impl CgroupManager for Manager { let _ = nix::sys::signal::kill(Pid::from_raw(pid), nix::sys::signal::SIGKILL); } - common::delete_with_retry(&self.full_path)?; + common::delete_with_retry(&self.full_path, 4, Duration::from_millis(100))?; } Ok(()) diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs index 989db60ae..056196798 100644 --- a/src/container/builder_impl.rs +++ b/src/container/builder_impl.rs @@ -6,7 +6,7 @@ use crate::{ syscall::Syscall, utils, }; -use anyhow::{Context, Result}; +use anyhow::{bail, Context, Result}; use cgroups::{self, common::CgroupManager}; use nix::unistd::Pid; use oci_spec::runtime::{LinuxResources, Spec}; @@ -44,7 +44,15 @@ pub(super) struct ContainerBuilderImpl<'a> { impl<'a> ContainerBuilderImpl<'a> { pub(super) fn create(&mut self) -> Result<()> { - self.run_container() + if let Err(outer) = self.run_container().context("failed to create container") { + if let Err(inner) = self.cleanup_container() { + return Err(outer.context(inner)); + } + + return Err(outer); + } + + Ok(()) } fn run_container(&mut self) -> Result<()> { @@ -170,6 +178,33 @@ impl<'a> ContainerBuilderImpl<'a> { Ok(()) } + + fn cleanup_container(&self) -> Result<()> { + let linux = self.spec.linux.as_ref().context("no linux in spec")?; + let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, &self.container_id); + let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path, self.use_systemd)?; + + let mut errors = Vec::new(); + if let Err(e) = cmanager.remove().context("failed to remove cgroup") { + errors.push(e.to_string()); + } + + if let Some(container) = &self.container { + if container.root.exists() { + if let Err(e) = fs::remove_dir_all(&container.root) + .with_context(|| format!("could not delete {}", container.root.display())) + { + errors.push(e.to_string()); + } + } + } + + if !errors.is_empty() { + bail!("failed to cleanup container: {}", errors.join(";")); + } + + Ok(()) + } } fn setup_mapping(rootless: &Rootless, pid: Pid) -> Result<()> { @@ -201,10 +236,12 @@ fn apply_cgroups( }; cmanager .add_task(pid) - .context("Failed to add tasks to cgroup manager")?; + .with_context(|| format!("failed to add task {} to cgroup manager", pid))?; + cmanager .apply(&controller_opt) - .context("Failed to apply resource limits through cgroup")?; + .context("failed to apply resource limits to cgroup")?; + Ok(()) } diff --git a/src/utils.rs b/src/utils.rs index 9c8e5d000..75ebdaf67 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -76,7 +76,7 @@ pub fn set_name(_name: &str) -> Result<()> { pub fn get_cgroup_path(cgroups_path: &Option, container_id: &str) -> PathBuf { match cgroups_path { Some(cpath) => cpath.clone(), - None => PathBuf::from(format!("/youki/{}", container_id)), + None => PathBuf::from(container_id), } } @@ -244,7 +244,7 @@ mod tests { let cid = "sample_container_id"; assert_eq!( get_cgroup_path(&None, cid), - PathBuf::from("/youki/sample_container_id") + PathBuf::from("sample_container_id") ); assert_eq!( get_cgroup_path(&Some(PathBuf::from("/youki")), cid),