diff --git a/Cargo.lock b/Cargo.lock index 3c18993b31..dbc0b902fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1944,6 +1944,7 @@ dependencies = [ "libc", "libcgroups", "libseccomp", + "nc", "nix 0.27.1", "oci-spec", "once_cell", @@ -2213,6 +2214,15 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nc" +version = "0.8.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83c88ca23498aaa86177921d95ade67290248f9ef71f7416dc47d07cdc3c72a1" +dependencies = [ + "cc", +] + [[package]] name = "nix" version = "0.26.2" diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index c5e0511cfe..3bc46fc118 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -52,6 +52,7 @@ regex = "1.10.2" thiserror = "1.0.50" tracing = { version = "0.1.40", features = ["attributes"] } safe-path = "0.1.0" +nc = "0.8.18" [dev-dependencies] oci-spec = { version = "~0.6.4", features = ["proptests", "runtime"] } diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index 5bb6f9a56d..208a63bc44 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -4,7 +4,7 @@ use nix::unistd::{self, close, pipe2, read, Pid}; use oci_spec::runtime::{ Capabilities as SpecCapabilities, Capability as SpecCapability, LinuxBuilder, LinuxCapabilities, LinuxCapabilitiesBuilder, LinuxNamespace, LinuxNamespaceBuilder, - LinuxNamespaceType, Process, ProcessBuilder, Spec, + LinuxNamespaceType, LinuxSchedulerPolicy, Process, ProcessBuilder, Spec, }; use procfs::process::Namespace; @@ -222,6 +222,54 @@ impl TenantContainerBuilder { } } } + + if let Some(sc) = process.scheduler() { + let policy = sc.policy(); + if let Some(nice) = sc.nice() { + if *nice < -20 && *nice > 19 { + tracing::error!(?nice, "invalid scheduler.nice: '{}'", nice); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if let Some(priority) = sc.priority() { + if *priority != 0 + && (*policy != LinuxSchedulerPolicy::SchedFifo + && *policy != LinuxSchedulerPolicy::SchedRr) + { + tracing::error!(?policy,"scheduler.priority can only be specified for SchedFIFO or SchedRR policy"); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if *policy != LinuxSchedulerPolicy::SchedDeadline { + if let Some(runtime) = sc.runtime() { + if *runtime != 0 { + tracing::error!( + ?runtime, + "scheduler runtime can only be specified for SchedDeadline policy" + ); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if let Some(deadline) = sc.deadline() { + if *deadline != 0 { + tracing::error!( + ?deadline, + "scheduler deadline can only be specified for SchedDeadline policy" + ); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if let Some(period) = sc.period() { + if *period != 0 { + tracing::error!( + ?period, + "scheduler period can only be specified for SchedDeadline policy" + ); + Err(ErrInvalidSpec::Scheduler)?; + } + } + } + } } utils::validate_spec_for_new_user_ns(spec)?; diff --git a/crates/libcontainer/src/error.rs b/crates/libcontainer/src/error.rs index cda150addd..f0555c9626 100644 --- a/crates/libcontainer/src/error.rs +++ b/crates/libcontainer/src/error.rs @@ -92,4 +92,6 @@ pub enum ErrInvalidSpec { AppArmorNotEnabled, #[error("invalid io priority or class.")] IoPriority, + #[error("invalid scheduler config for process")] + Scheduler, } diff --git a/crates/libcontainer/src/process/container_init_process.rs b/crates/libcontainer/src/process/container_init_process.rs index f55c04ec8c..85ae2c8525 100644 --- a/crates/libcontainer/src/process/container_init_process.rs +++ b/crates/libcontainer/src/process/container_init_process.rs @@ -7,13 +7,18 @@ use crate::{ capabilities, hooks, namespaces::Namespaces, process::channel, rootfs::RootFS, tty, user_ns::UserNamespaceConfig, utils, }; +use nc; use nix::mount::MsFlags; use nix::sched::CloneFlags; use nix::sys::stat::Mode; use nix::unistd::setsid; use nix::unistd::{self, Gid, Uid}; -use oci_spec::runtime::{IOPriorityClass, LinuxIOPriority, LinuxNamespaceType, Spec, User}; +use oci_spec::runtime::{ + IOPriorityClass, LinuxIOPriority, LinuxNamespaceType, LinuxSchedulerFlag, LinuxSchedulerPolicy, + Scheduler, Spec, User, +}; use std::collections::HashMap; +use std::mem; use std::os::unix::io::AsRawFd; use std::{ env, fs, @@ -74,6 +79,8 @@ pub enum InitProcessError { WorkloadValidation(#[from] workload::ExecutorValidationError), #[error("invalid io priority class: {0}")] IoPriorityClass(String), + #[error("call exec sched_setattr error: {0}")] + SchedSetattr(String), } type Result = std::result::Result; @@ -288,6 +295,8 @@ pub fn container_init_process( set_io_priority(syscall.as_ref(), proc.io_priority())?; + setup_scheduler(proc.scheduler())?; + // set up tty if specified if let Some(csocketfd) = args.console_socket { tty::setup_console(&csocketfd).map_err(|err| { @@ -741,6 +750,56 @@ fn set_io_priority(syscall: &dyn Syscall, io_priority_op: &Option) -> Result<()> { + if let Some(sc) = sc_op { + let policy: u32 = match sc.policy() { + LinuxSchedulerPolicy::SchedOther => 0, + LinuxSchedulerPolicy::SchedFifo => 1, + LinuxSchedulerPolicy::SchedRr => 2, + LinuxSchedulerPolicy::SchedBatch => 3, + LinuxSchedulerPolicy::SchedIso => 4, + LinuxSchedulerPolicy::SchedIdle => 5, + LinuxSchedulerPolicy::SchedDeadline => 6, + }; + let mut flags: u64 = 0; + for flag in sc.flags().as_ref().unwrap() { + match flag { + LinuxSchedulerFlag::SchedResetOnFork => flags |= 0x01, + LinuxSchedulerFlag::SchedFlagReclaim => flags |= 0x02, + LinuxSchedulerFlag::SchedFlagDLOverrun => flags |= 0x04, + LinuxSchedulerFlag::SchedFlagKeepPolicy => flags |= 0x08, + LinuxSchedulerFlag::SchedFlagKeepParams => flags |= 0x10, + LinuxSchedulerFlag::SchedFlagUtilClampMin => flags |= 0x20, + LinuxSchedulerFlag::SchedFlagUtilClampMax => flags |= 0x40, + } + } + let mut a = nc::sched_attr_t { + size: mem::size_of::().try_into().unwrap(), + sched_policy: policy, + sched_flags: flags, + sched_nice: sc.nice().unwrap(), + sched_priority: sc.priority().unwrap() as u32, + sched_runtime: sc.runtime().unwrap(), + sched_deadline: sc.deadline().unwrap(), + sched_period: sc.period().unwrap(), + sched_util_min: 0, + sched_util_max: 0, + }; + unsafe { + let result = nc::sched_setattr(0, &mut a, 0); + match result { + Ok(_) => {} + Err(err) => { + tracing::error!(?err, "error setting scheduler"); + Err(InitProcessError::SchedSetattr(err.to_string()))?; + } + } + }; + } + Ok(()) +} + #[cfg(feature = "libseccomp")] fn sync_seccomp( fd: Option,