From 57f9d1f01a6b249c6673e6da253c3504ce8ce4c4 Mon Sep 17 00:00:00 2001 From: Askar Safin Date: Wed, 24 Jan 2024 15:22:00 +0300 Subject: [PATCH 1/3] This commit is part of clone3 clean up. As part of clean up we will remove tests/ui/command/command-create-pidfd.rs . But it contains very useful comment, so let's move the comment to library/std/src/sys/pal/unix/rand.rs , which contains another instance of the same Docker problem --- library/std/src/sys/pal/unix/rand.rs | 13 ++++++++++++- tests/ui/command/command-create-pidfd.rs | 10 ---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/library/std/src/sys/pal/unix/rand.rs b/library/std/src/sys/pal/unix/rand.rs index cf0fe0f47c53f..1dba1ccf64e68 100644 --- a/library/std/src/sys/pal/unix/rand.rs +++ b/library/std/src/sys/pal/unix/rand.rs @@ -106,7 +106,18 @@ mod imp { // supported on the current kernel. // // Also fall back in case it is disabled by something like - // seccomp or inside of virtual machines. + // seccomp or inside of docker. + // + // If the `getrandom` syscall is not implemented in the current kernel version it should return an + // `ENOSYS` error. Docker also blocks the whole syscall inside unprivileged containers, and + // returns `EPERM` (instead of `ENOSYS`) when a program tries to invoke the syscall. Because of + // that we need to check for *both* `ENOSYS` and `EPERM`. + // + // Note that Docker's behavior is breaking other projects (notably glibc), so they're planning + // to update their filtering to return `ENOSYS` in a future release: + // + // https://github.com/moby/moby/issues/42680 + // GETRANDOM_UNAVAILABLE.store(true, Ordering::Relaxed); return false; } else if err == libc::EAGAIN { diff --git a/tests/ui/command/command-create-pidfd.rs b/tests/ui/command/command-create-pidfd.rs index 4df443c66d65b..9f9e5dd0e0174 100644 --- a/tests/ui/command/command-create-pidfd.rs +++ b/tests/ui/command/command-create-pidfd.rs @@ -16,16 +16,6 @@ fn has_clone3() -> bool { .then(|| Error::last_os_error()) .expect("probe syscall should not succeed"); - // If the `clone3` syscall is not implemented in the current kernel version it should return an - // `ENOSYS` error. Docker also blocks the whole syscall inside unprivileged containers, and - // returns `EPERM` (instead of `ENOSYS`) when a program tries to invoke the syscall. Because of - // that we need to check for *both* `ENOSYS` and `EPERM`. - // - // Note that Docker's behavior is breaking other projects (notably glibc), so they're planning - // to update their filtering to return `ENOSYS` in a future release: - // - // https://github.com/moby/moby/issues/42680 - // err.raw_os_error() != Some(libc::ENOSYS) && err.raw_os_error() != Some(libc::EPERM) } From 1ee773e2421e23a249cb007dade8286c16eb5cd8 Mon Sep 17 00:00:00 2001 From: Askar Safin Date: Wed, 24 Jan 2024 15:32:06 +0300 Subject: [PATCH 2/3] This commit is part of clone3 clean up. Merge tests from tests/ui/command/command-create-pidfd.rs to library/std/src/sys/pal/unix/process/process_unix/tests.rs to remove code duplication --- .../pal/unix/process/process_unix/tests.rs | 20 +++++++- tests/ui/command/command-create-pidfd.rs | 46 ------------------- 2 files changed, 18 insertions(+), 48 deletions(-) delete mode 100644 tests/ui/command/command-create-pidfd.rs diff --git a/library/std/src/sys/pal/unix/process/process_unix/tests.rs b/library/std/src/sys/pal/unix/process/process_unix/tests.rs index 6e952ed7c42af..0a6c6ec19fc7e 100644 --- a/library/std/src/sys/pal/unix/process/process_unix/tests.rs +++ b/library/std/src/sys/pal/unix/process/process_unix/tests.rs @@ -62,13 +62,14 @@ fn test_command_fork_no_unwind() { } #[test] -#[cfg(target_os = "linux")] +#[cfg(target_os = "linux")] // pidfds are a linux-specific concept fn test_command_pidfd() { use crate::assert_matches::assert_matches; use crate::os::fd::{AsRawFd, RawFd}; use crate::os::linux::process::{ChildExt, CommandExt}; use crate::process::Command; + // pidfds require the pidfd_open syscall let our_pid = crate::process::id(); let pidfd = unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) }; let pidfd_open_available = if pidfd >= 0 { @@ -81,7 +82,9 @@ fn test_command_pidfd() { // always exercise creation attempts let mut child = Command::new("false").create_pidfd(true).spawn().unwrap(); - // but only check if we know that the kernel supports pidfds + // but only check if we know that the kernel supports pidfds. + // We don't assert the precise value, since the standard library + // might have opened other file descriptors before our code runs. if pidfd_open_available { assert!(child.pidfd().is_ok()); } @@ -97,4 +100,17 @@ fn test_command_pidfd() { child.kill().expect("failed to kill child"); let status = child.wait().expect("error waiting on pidfd"); assert_eq!(status.signal(), Some(libc::SIGKILL)); + + let _ = Command::new("echo") + .create_pidfd(false) + .spawn() + .unwrap() + .pidfd() + .expect_err("pidfd should not have been created when create_pid(false) is set"); + + let _ = Command::new("echo") + .spawn() + .unwrap() + .pidfd() + .expect_err("pidfd should not have been created"); } diff --git a/tests/ui/command/command-create-pidfd.rs b/tests/ui/command/command-create-pidfd.rs deleted file mode 100644 index 9f9e5dd0e0174..0000000000000 --- a/tests/ui/command/command-create-pidfd.rs +++ /dev/null @@ -1,46 +0,0 @@ -// run-pass -// only-linux - pidfds are a linux-specific concept - -#![feature(linux_pidfd)] -#![feature(rustc_private)] - -extern crate libc; - -use std::io::Error; -use std::os::linux::process::{ChildExt, CommandExt}; -use std::process::Command; - -fn has_clone3() -> bool { - let res = unsafe { libc::syscall(libc::SYS_clone3, 0, 0) }; - let err = (res == -1) - .then(|| Error::last_os_error()) - .expect("probe syscall should not succeed"); - - err.raw_os_error() != Some(libc::ENOSYS) && err.raw_os_error() != Some(libc::EPERM) -} - -fn main() { - // pidfds require the clone3 syscall - if !has_clone3() { - return; - } - - // We don't assert the precise value, since the standard library - // might have opened other file descriptors before our code runs. - let _ = Command::new("echo") - .create_pidfd(true) - .spawn() - .unwrap() - .pidfd().expect("failed to obtain pidfd"); - - let _ = Command::new("echo") - .create_pidfd(false) - .spawn() - .unwrap() - .pidfd().expect_err("pidfd should not have been created when create_pid(false) is set"); - - let _ = Command::new("echo") - .spawn() - .unwrap() - .pidfd().expect_err("pidfd should not have been created"); -} From df0c9c37c1d7458d1d06b370912f6595e0295079 Mon Sep 17 00:00:00 2001 From: Askar Safin Date: Wed, 24 Jan 2024 15:36:57 +0300 Subject: [PATCH 3/3] Finishing clone3 clean up --- library/std/src/os/linux/process.rs | 3 +-- library/std/src/sys/pal/unix/process/process_unix.rs | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/library/std/src/os/linux/process.rs b/library/std/src/os/linux/process.rs index 51af432d0568d..2ba67a6dd1aa9 100644 --- a/library/std/src/os/linux/process.rs +++ b/library/std/src/os/linux/process.rs @@ -149,8 +149,7 @@ pub trait CommandExt: Sealed { /// The pidfd can be retrieved from the child with [`pidfd`] or [`take_pidfd`]. /// /// A pidfd will only be created if it is possible to do so - /// in a guaranteed race-free manner (e.g. if the `clone3` system call - /// is supported). Otherwise, [`pidfd`] will return an error. + /// in a guaranteed race-free manner. Otherwise, [`pidfd`] will return an error. /// /// If a pidfd has been successfully created and not been taken from the `Child` /// then calls to `kill()`, `wait()` and `try_wait()` will use the pidfd diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs index fac6d92439ee8..df0fe2bb9d84a 100644 --- a/library/std/src/sys/pal/unix/process/process_unix.rs +++ b/library/std/src/sys/pal/unix/process/process_unix.rs @@ -147,8 +147,7 @@ impl Command { #[cfg(not(target_os = "linux"))] let pidfd = -1; - // Safety: We obtained the pidfd from calling `clone3` with - // `CLONE_PIDFD` so it's valid an otherwise unowned. + // Safety: We obtained the pidfd (on Linux) using SOCK_SEQPACKET, so it's valid. let mut p = unsafe { Process::new(pid, pidfd) }; let mut bytes = [0; 8];