diff --git a/Cargo.lock b/Cargo.lock index 08dfc579d..fd959fab1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1915,6 +1915,7 @@ dependencies = [ "libc", "libcgroups", "libseccomp", + "nc", "nix 0.27.1", "oci-spec", "once_cell", @@ -2184,6 +2185,15 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nc" +version = "0.8.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83c88ca23498aaa86177921d95ade67290248f9ef71f7416dc47d07cdc3c72a1" +dependencies = [ + "cc", +] + [[package]] name = "nix" version = "0.26.4" @@ -3066,6 +3076,7 @@ name = "runtimetest" version = "0.0.1" dependencies = [ "anyhow", + "nc", "nix 0.27.1", "oci-spec", ] diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index 2764bfe3f..3653871a7 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -53,6 +53,7 @@ regex = "1.10.2" thiserror = "1.0.56" tracing = { version = "0.1.40", features = ["attributes"] } safe-path = "0.1.0" +nc = "0.8.18" [dev-dependencies] oci-spec = { version = "~0.6.4", features = ["proptests", "runtime"] } diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index 4adcc0e76..a503cc6ff 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -4,7 +4,7 @@ use nix::unistd::{self, close, pipe2, read, Pid}; use oci_spec::runtime::{ Capabilities as SpecCapabilities, Capability as SpecCapability, LinuxBuilder, LinuxCapabilities, LinuxCapabilitiesBuilder, LinuxNamespace, LinuxNamespaceBuilder, - LinuxNamespaceType, Process, ProcessBuilder, Spec, + LinuxNamespaceType, LinuxSchedulerPolicy, Process, ProcessBuilder, Spec, }; use procfs::process::Namespace; @@ -222,6 +222,62 @@ impl TenantContainerBuilder { } } } + + if let Some(sc) = process.scheduler() { + let policy = sc.policy(); + if let Some(nice) = sc.nice() { + // https://man7.org/linux/man-pages/man2/sched_setattr.2.html#top_of_page + if (*policy == LinuxSchedulerPolicy::SchedBatch + || *policy == LinuxSchedulerPolicy::SchedOther) + && (*nice < -20 || *nice > 19) + { + tracing::error!( + ?nice, + "invalid scheduler.nice: '{}', must be within -20 to 19", + nice + ); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if let Some(priority) = sc.priority() { + if *priority != 0 + && (*policy != LinuxSchedulerPolicy::SchedFifo + && *policy != LinuxSchedulerPolicy::SchedRr) + { + tracing::error!(?policy,"scheduler.priority can only be specified for SchedFIFO or SchedRR policy"); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if *policy != LinuxSchedulerPolicy::SchedDeadline { + if let Some(runtime) = sc.runtime() { + if *runtime != 0 { + tracing::error!( + ?runtime, + "scheduler runtime can only be specified for SchedDeadline policy" + ); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if let Some(deadline) = sc.deadline() { + if *deadline != 0 { + tracing::error!( + ?deadline, + "scheduler deadline can only be specified for SchedDeadline policy" + ); + Err(ErrInvalidSpec::Scheduler)?; + } + } + if let Some(period) = sc.period() { + if *period != 0 { + tracing::error!( + ?period, + "scheduler period can only be specified for SchedDeadline policy" + ); + Err(ErrInvalidSpec::Scheduler)?; + } + } + } + } } utils::validate_spec_for_new_user_ns(spec)?; diff --git a/crates/libcontainer/src/error.rs b/crates/libcontainer/src/error.rs index cda150add..f0555c962 100644 --- a/crates/libcontainer/src/error.rs +++ b/crates/libcontainer/src/error.rs @@ -92,4 +92,6 @@ pub enum ErrInvalidSpec { AppArmorNotEnabled, #[error("invalid io priority or class.")] IoPriority, + #[error("invalid scheduler config for process")] + Scheduler, } diff --git a/crates/libcontainer/src/process/container_init_process.rs b/crates/libcontainer/src/process/container_init_process.rs index f55c04ec8..eb2e5f2eb 100644 --- a/crates/libcontainer/src/process/container_init_process.rs +++ b/crates/libcontainer/src/process/container_init_process.rs @@ -7,13 +7,18 @@ use crate::{ capabilities, hooks, namespaces::Namespaces, process::channel, rootfs::RootFS, tty, user_ns::UserNamespaceConfig, utils, }; +use nc; use nix::mount::MsFlags; use nix::sched::CloneFlags; use nix::sys::stat::Mode; use nix::unistd::setsid; use nix::unistd::{self, Gid, Uid}; -use oci_spec::runtime::{IOPriorityClass, LinuxIOPriority, LinuxNamespaceType, Spec, User}; +use oci_spec::runtime::{ + IOPriorityClass, LinuxIOPriority, LinuxNamespaceType, LinuxSchedulerFlag, LinuxSchedulerPolicy, + Scheduler, Spec, User, +}; use std::collections::HashMap; +use std::mem; use std::os::unix::io::AsRawFd; use std::{ env, fs, @@ -74,6 +79,8 @@ pub enum InitProcessError { WorkloadValidation(#[from] workload::ExecutorValidationError), #[error("invalid io priority class: {0}")] IoPriorityClass(String), + #[error("call exec sched_setattr error: {0}")] + SchedSetattr(String), } type Result = std::result::Result; @@ -288,6 +295,8 @@ pub fn container_init_process( set_io_priority(syscall.as_ref(), proc.io_priority())?; + setup_scheduler(proc.scheduler())?; + // set up tty if specified if let Some(csocketfd) = args.console_socket { tty::setup_console(&csocketfd).map_err(|err| { @@ -741,6 +750,59 @@ fn set_io_priority(syscall: &dyn Syscall, io_priority_op: &Option) -> Result<()> { + if let Some(sc) = sc_op { + let policy: u32 = match *sc.policy() { + LinuxSchedulerPolicy::SchedOther => 0, + LinuxSchedulerPolicy::SchedFifo => 1, + LinuxSchedulerPolicy::SchedRr => 2, + LinuxSchedulerPolicy::SchedBatch => 3, + LinuxSchedulerPolicy::SchedIso => 4, + LinuxSchedulerPolicy::SchedIdle => 5, + LinuxSchedulerPolicy::SchedDeadline => 6, + }; + let mut flags_value: u64 = 0; + if let Some(flags) = sc.flags() { + for flag in flags { + match *flag { + LinuxSchedulerFlag::SchedResetOnFork => flags_value |= 0x01, + LinuxSchedulerFlag::SchedFlagReclaim => flags_value |= 0x02, + LinuxSchedulerFlag::SchedFlagDLOverrun => flags_value |= 0x04, + LinuxSchedulerFlag::SchedFlagKeepPolicy => flags_value |= 0x08, + LinuxSchedulerFlag::SchedFlagKeepParams => flags_value |= 0x10, + LinuxSchedulerFlag::SchedFlagUtilClampMin => flags_value |= 0x20, + LinuxSchedulerFlag::SchedFlagUtilClampMax => flags_value |= 0x40, + } + } + } + let mut a = nc::sched_attr_t { + size: mem::size_of::().try_into().unwrap(), + sched_policy: policy, + sched_flags: flags_value, + sched_nice: sc.nice().unwrap_or(0), + sched_priority: sc.priority().unwrap_or(0) as u32, + sched_runtime: sc.runtime().unwrap_or(0), + sched_deadline: sc.deadline().unwrap_or(0), + sched_period: sc.period().unwrap_or(0), + sched_util_min: 0, + sched_util_max: 0, + }; + // TODO when nix or libc support this function, replace nx crates. + unsafe { + let result = nc::sched_setattr(0, &mut a, 0); + match result { + Ok(_) => {} + Err(err) => { + tracing::error!(?err, "error setting scheduler"); + Err(InitProcessError::SchedSetattr(err.to_string()))?; + } + } + }; + } + Ok(()) +} + #[cfg(feature = "libseccomp")] fn sync_seccomp( fd: Option, diff --git a/tests/contest/contest/src/main.rs b/tests/contest/contest/src/main.rs index 9cabe35ae..9b7373848 100644 --- a/tests/contest/contest/src/main.rs +++ b/tests/contest/contest/src/main.rs @@ -10,6 +10,7 @@ use crate::tests::linux_ns_itype::get_ns_itype_tests; use crate::tests::mounts_recursive::get_mounts_recursive_test; use crate::tests::pidfile::get_pidfile_test; use crate::tests::readonly_paths::get_ro_paths_test; +use crate::tests::scheduler::get_scheduler_test; use crate::tests::seccomp::get_seccomp_test; use crate::tests::seccomp_notify::get_seccomp_notify_test; use crate::tests::sysctl::get_sysctl_test; @@ -103,6 +104,8 @@ fn main() -> Result<()> { let mounts_recursive = get_mounts_recursive_test(); let intel_rdt = get_intel_rdt_test(); let sysctl = get_sysctl_test(); + #[allow(unused_variables)] + let scheduler = get_scheduler_test(); tm.add_test_group(Box::new(cl)); tm.add_test_group(Box::new(cc)); @@ -123,6 +126,7 @@ fn main() -> Result<()> { tm.add_test_group(Box::new(mounts_recursive)); tm.add_test_group(Box::new(intel_rdt)); tm.add_test_group(Box::new(sysctl)); + // tm.add_test_group(Box::new(scheduler)); tm.add_cleanup(Box::new(cgroups::cleanup_v1)); tm.add_cleanup(Box::new(cgroups::cleanup_v2)); diff --git a/tests/contest/contest/src/tests/mod.rs b/tests/contest/contest/src/tests/mod.rs index c0d738e22..099cf7ff4 100644 --- a/tests/contest/contest/src/tests/mod.rs +++ b/tests/contest/contest/src/tests/mod.rs @@ -8,6 +8,7 @@ pub mod linux_ns_itype; pub mod mounts_recursive; pub mod pidfile; pub mod readonly_paths; +pub mod scheduler; pub mod seccomp; pub mod seccomp_notify; pub mod sysctl; diff --git a/tests/contest/contest/src/tests/scheduler/mod.rs b/tests/contest/contest/src/tests/scheduler/mod.rs new file mode 100644 index 000000000..b3ad07cab --- /dev/null +++ b/tests/contest/contest/src/tests/scheduler/mod.rs @@ -0,0 +1,3 @@ +mod scheduler_policy; + +pub use scheduler_policy::get_scheduler_test; diff --git a/tests/contest/contest/src/tests/scheduler/scheduler_policy.rs b/tests/contest/contest/src/tests/scheduler/scheduler_policy.rs new file mode 100644 index 000000000..fbfed9c89 --- /dev/null +++ b/tests/contest/contest/src/tests/scheduler/scheduler_policy.rs @@ -0,0 +1,56 @@ +use anyhow::{Context, Result}; +use oci_spec::runtime::{ + LinuxSchedulerPolicy, ProcessBuilder, SchedulerBuilder, Spec, SpecBuilder, +}; +use test_framework::{test_result, Test, TestGroup, TestResult}; + +use crate::utils::test_inside_container; + +fn create_spec(policy: LinuxSchedulerPolicy, execute_test: &str) -> Result { + let sc = SchedulerBuilder::default() + .policy(policy) + .nice(1i32) + .build() + .unwrap(); + SpecBuilder::default() + .process( + ProcessBuilder::default() + .args( + ["runtimetest", execute_test] + .iter() + .map(|s| s.to_string()) + .collect::>(), + ) + .scheduler(sc) + .build()?, + ) + .build() + .context("failed to create spec") +} + +fn scheduler_policy_other_test() -> TestResult { + let spec = test_result!(create_spec( + LinuxSchedulerPolicy::SchedOther, + "scheduler_policy_other" + )); + test_inside_container(spec, &|_| Ok(())) +} + +fn scheduler_policy_batch_test() -> TestResult { + let spec = test_result!(create_spec( + LinuxSchedulerPolicy::SchedBatch, + "scheduler_policy_batch" + )); + test_inside_container(spec, &|_| Ok(())) +} + +pub fn get_scheduler_test() -> TestGroup { + let mut scheduler_policy_group = TestGroup::new("set_scheduler_policy"); + let policy_fifo_test = Test::new("policy_other", Box::new(scheduler_policy_other_test)); + let policy_rr_test = Test::new("policy_batch", Box::new(scheduler_policy_batch_test)); + + scheduler_policy_group.add(vec![Box::new(policy_fifo_test)]); + scheduler_policy_group.add(vec![Box::new(policy_rr_test)]); + + scheduler_policy_group +} diff --git a/tests/contest/runtimetest/Cargo.toml b/tests/contest/runtimetest/Cargo.toml index f32550467..63dab30b9 100644 --- a/tests/contest/runtimetest/Cargo.toml +++ b/tests/contest/runtimetest/Cargo.toml @@ -7,4 +7,5 @@ edition = "2021" oci-spec = { version = "0.6.4", features = ["runtime"] } nix = "0.27.1" anyhow = "1.0" +nc = "0.8.18" diff --git a/tests/contest/runtimetest/src/main.rs b/tests/contest/runtimetest/src/main.rs index cbeb53a28..65e0ed5e3 100644 --- a/tests/contest/runtimetest/src/main.rs +++ b/tests/contest/runtimetest/src/main.rs @@ -35,6 +35,8 @@ fn main() { "mounts_recursive" => tests::validate_mounts_recursive(&spec), "seccomp" => tests::validate_seccomp(&spec), "sysctl" => tests::validate_sysctl(&spec), + "scheduler_policy_other" => tests::validate_scheduler_policy(&spec), + "scheduler_policy_batch" => tests::validate_scheduler_policy(&spec), _ => eprintln!("error due to unexpected execute test name: {execute_test}"), } } diff --git a/tests/contest/runtimetest/src/tests.rs b/tests/contest/runtimetest/src/tests.rs index 4275b8af7..20c501c2d 100644 --- a/tests/contest/runtimetest/src/tests.rs +++ b/tests/contest/runtimetest/src/tests.rs @@ -2,8 +2,9 @@ use crate::utils::{self, test_read_access, test_write_access}; use anyhow::{bail, Result}; use nix::errno::Errno; use nix::unistd::getcwd; -use oci_spec::runtime::Spec; +use oci_spec::runtime::{LinuxSchedulerPolicy, Spec}; use std::fs::{self, read_dir}; +use std::mem; use std::path::Path; ////////// ANCHOR: example_hello_world @@ -296,7 +297,9 @@ pub fn validate_sysctl(spec: &Spec) { .trim() .to_string(), Err(e) => { - return eprintln!("error due to fail to read the file {key_path:?}, error: {e}") + return eprintln!( + "error due to fail to read the file {key_path:?}, error: {e}" + ); } }; if &actual_value != expected_value { @@ -307,3 +310,52 @@ pub fn validate_sysctl(spec: &Spec) { } } } + +pub fn validate_scheduler_policy(spec: &Spec) { + let proc = spec.process().as_ref().unwrap(); + let sc = proc.scheduler().as_ref().unwrap(); + println!("schedul is {:?}", spec); + let size = mem::size_of::().try_into().unwrap(); + let mut get_sched_attr = nc::sched_attr_t { + size: 0, + sched_policy: 0, + sched_flags: 0, + sched_nice: 0, + sched_priority: 0, + sched_runtime: 0, + sched_deadline: 0, + sched_period: 0, + sched_util_min: 0, + sched_util_max: 0, + }; + unsafe { + match nc::sched_getattr(0, &mut get_sched_attr, size, 0) { + Ok(_) => { + println!("sched_getattr get success"); + } + Err(e) => { + return eprintln!("error due to fail to get sched attr error: {e}"); + } + }; + } + println!("get_sched_attr is {:?}", get_sched_attr); + let sp = get_sched_attr.sched_policy; + let want_sp: u32 = match *sc.policy() { + LinuxSchedulerPolicy::SchedOther => 0, + LinuxSchedulerPolicy::SchedFifo => 1, + LinuxSchedulerPolicy::SchedRr => 2, + LinuxSchedulerPolicy::SchedBatch => 3, + LinuxSchedulerPolicy::SchedIso => 4, + LinuxSchedulerPolicy::SchedIdle => 5, + LinuxSchedulerPolicy::SchedDeadline => 6, + }; + println!("want_sp {:?}", want_sp); + if sp != want_sp { + return eprintln!("error due to sched_policy want {want_sp}, got {sp}"); + } + let sn = get_sched_attr.sched_nice; + let want_sn = sc.nice().unwrap(); + if sn != want_sn { + eprintln!("error due to sched_nice want {want_sn}, got {sn}") + } +}