From 5c46acac0431a5fe0fbf8f5239a7d3364c9186cf Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sat, 22 Jun 2024 14:38:14 +0200 Subject: [PATCH 1/5] document the cvt methods --- library/std/src/sys/pal/unix/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/library/std/src/sys/pal/unix/mod.rs b/library/std/src/sys/pal/unix/mod.rs index b370f06e92baf..ae257cab1e50b 100644 --- a/library/std/src/sys/pal/unix/mod.rs +++ b/library/std/src/sys/pal/unix/mod.rs @@ -307,10 +307,13 @@ macro_rules! impl_is_minus_one { impl_is_minus_one! { i8 i16 i32 i64 isize } +/// Convert native return values to Result using the *-1 means error is in `errno`* convention. +/// Non-error values are `Ok`-wrapped. pub fn cvt(t: T) -> crate::io::Result { if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) } } +/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value. pub fn cvt_r(mut f: F) -> crate::io::Result where T: IsMinusOne, @@ -325,6 +328,7 @@ where } #[allow(dead_code)] // Not used on all platforms. +/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`. pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> { if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) } } From 6687a3f7da60a4d0f06fd84fea75bec1dd0fce2a Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sat, 22 Jun 2024 14:37:12 +0200 Subject: [PATCH 2/5] use pidfd_spawn for faster process creation when pidfds are requested --- .../std/src/sys/pal/unix/linux/pidfd/tests.rs | 13 ++- .../src/sys/pal/unix/process/process_unix.rs | 99 ++++++++++++++++++- 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs index 6d9532f2ef1ff..672cb0efed1d1 100644 --- a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs +++ b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs @@ -1,7 +1,7 @@ use crate::assert_matches::assert_matches; use crate::os::fd::{AsRawFd, RawFd}; -use crate::os::linux::process::{ChildExt, CommandExt}; -use crate::os::unix::process::ExitStatusExt; +use crate::os::linux::process::{ChildExt, CommandExt as _}; +use crate::os::unix::process::{CommandExt as _, ExitStatusExt}; use crate::process::Command; #[test] @@ -42,6 +42,15 @@ fn test_command_pidfd() { .unwrap() .pidfd() .expect_err("pidfd should not have been created"); + + // exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp() + let mut child = + unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap(); + + if pidfd_open_available { + assert!(child.pidfd().is_ok()) + } + child.wait().expect("error waiting on child"); } #[test] diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs index 32382d9a50cf4..3de66d9789fbd 100644 --- a/library/std/src/sys/pal/unix/process/process_unix.rs +++ b/library/std/src/sys/pal/unix/process/process_unix.rs @@ -449,17 +449,70 @@ impl Command { use crate::mem::MaybeUninit; use crate::sys::weak::weak; use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used}; + #[cfg(target_os = "linux")] + use core::sync::atomic::{AtomicU8, Ordering}; if self.get_gid().is_some() || self.get_uid().is_some() || (self.env_saw_path() && !self.program_is_path()) || !self.get_closures().is_empty() || self.get_groups().is_some() - || self.get_create_pidfd() { return Ok(None); } + cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + weak! { + fn pidfd_spawnp( + *mut libc::c_int, + *const libc::c_char, + *const libc::posix_spawn_file_actions_t, + *const libc::posix_spawnattr_t, + *const *mut libc::c_char, + *const *mut libc::c_char + ) -> libc::c_int + } + + weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int } + + static PIDFD_SPAWN_SUPPORTED: AtomicU8 = AtomicU8::new(0); + const UNKNOWN: u8 = 0; + const YES: u8 = 1; + // NO currently forces a fallback to fork/exec. We could be more nuanced here and keep using spawn + // if we know pidfd's aren't supported at all and the fallback would be futile. + const NO: u8 = 2; + + if self.get_create_pidfd() { + let flag = PIDFD_SPAWN_SUPPORTED.load(Ordering::Relaxed); + if flag == NO || pidfd_spawnp.get().is_none() || pidfd_getpid.get().is_none() { + return Ok(None); + } + if flag == UNKNOWN { + let mut support = NO; + let our_pid = crate::process::id(); + let pidfd = + unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as libc::c_int; + if pidfd >= 0 { + let pid = unsafe { pidfd_getpid.get().unwrap()(pidfd) } as u32; + unsafe { libc::close(pidfd) }; + if pid == our_pid { + support = YES + }; + } + PIDFD_SPAWN_SUPPORTED.store(support, Ordering::Relaxed); + if support != YES { + return Ok(None); + } + } + } + } else { + if self.get_create_pidfd() { + unreachable!("only implemented on linux") + } + } + } + // Only glibc 2.24+ posix_spawn() supports returning ENOENT directly. #[cfg(all(target_os = "linux", target_env = "gnu"))] { @@ -543,9 +596,6 @@ impl Command { let pgroup = self.get_pgroup(); - // Safety: -1 indicates we don't have a pidfd. - let mut p = unsafe { Process::new(0, -1) }; - struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit); impl Drop for PosixSpawnFileActions<'_> { @@ -640,6 +690,47 @@ impl Command { #[cfg(target_os = "nto")] let spawn_fn = retrying_libc_posix_spawnp; + #[cfg(target_os = "linux")] + if self.get_create_pidfd() { + let mut pidfd: libc::c_int = -1; + let spawn_res = pidfd_spawnp.get().unwrap()( + &mut pidfd, + self.get_program_cstr().as_ptr(), + file_actions.0.as_ptr(), + attrs.0.as_ptr(), + self.get_argv().as_ptr() as *const _, + envp as *const _, + ); + + let spawn_res = cvt_nz(spawn_res); + if let Err(ref e) = spawn_res + && e.raw_os_error() == Some(libc::ENOSYS) + { + PIDFD_SPAWN_SUPPORTED.store(NO, Ordering::Relaxed); + return Ok(None); + } + spawn_res?; + + let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) { + Ok(pid) => pid, + Err(e) => { + // The child has been spawned and we are holding its pidfd. + // But we cannot obtain its pid even though pidfd_getpid support was verified earlier. + // This might happen if libc can't open procfs because the file descriptor limit has been reached. + libc::close(pidfd); + return Err(Error::new( + e.kind(), + "pidfd_spawnp succeeded but the child's PID could not be obtained", + )); + } + }; + + return Ok(Some(Process::new(pid, pidfd))); + } + + // Safety: -1 indicates we don't have a pidfd. + let mut p = Process::new(0, -1); + let spawn_res = spawn_fn( &mut p.pid, self.get_program_cstr().as_ptr(), From 0ce361938eddf08da88e4c35e0ed63dbb204b2f2 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 24 Jun 2024 23:10:17 +0200 Subject: [PATCH 3/5] document safety properties of the internal Process::new constructor --- library/std/src/sys/pal/unix/process/process_unix.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs index 3de66d9789fbd..23dce4ea5fb71 100644 --- a/library/std/src/sys/pal/unix/process/process_unix.rs +++ b/library/std/src/sys/pal/unix/process/process_unix.rs @@ -877,6 +877,12 @@ pub struct Process { impl Process { #[cfg(target_os = "linux")] + /// # Safety + /// + /// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file + /// descriptor (See [I/O Safety]). + /// + /// [I/O Safety]: crate::io#io-safety unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self { use crate::os::unix::io::FromRawFd; use crate::sys_common::FromInner; From 3e4e31b7bf34dafa4dc3fc97e454a046886692da Mon Sep 17 00:00:00 2001 From: The 8472 Date: Tue, 25 Jun 2024 00:14:55 +0200 Subject: [PATCH 4/5] more fine-grained feature-detection for pidfd spawning we now distinguish between pidfd_spawn support, pidfd-via-fork/exec and not-supported --- .../src/sys/pal/unix/process/process_unix.rs | 54 +++++++++++-------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs index 23dce4ea5fb71..abd4a334783e4 100644 --- a/library/std/src/sys/pal/unix/process/process_unix.rs +++ b/library/std/src/sys/pal/unix/process/process_unix.rs @@ -476,35 +476,47 @@ impl Command { weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int } - static PIDFD_SPAWN_SUPPORTED: AtomicU8 = AtomicU8::new(0); + static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0); const UNKNOWN: u8 = 0; - const YES: u8 = 1; - // NO currently forces a fallback to fork/exec. We could be more nuanced here and keep using spawn - // if we know pidfd's aren't supported at all and the fallback would be futile. - const NO: u8 = 2; + const SPAWN: u8 = 1; + // Obtaining a pidfd via the fork+exec path might work + const FORK_EXEC: u8 = 2; + // Neither pidfd_spawn nor fork/exec will get us a pidfd. + // Instead we'll just posix_spawn if the other preconditions are met. + const NO: u8 = 3; if self.get_create_pidfd() { - let flag = PIDFD_SPAWN_SUPPORTED.load(Ordering::Relaxed); - if flag == NO || pidfd_spawnp.get().is_none() || pidfd_getpid.get().is_none() { + let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed); + if support == FORK_EXEC { return Ok(None); } - if flag == UNKNOWN { - let mut support = NO; + if support == UNKNOWN { + support = NO; let our_pid = crate::process::id(); - let pidfd = - unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as libc::c_int; - if pidfd >= 0 { - let pid = unsafe { pidfd_getpid.get().unwrap()(pidfd) } as u32; - unsafe { libc::close(pidfd) }; - if pid == our_pid { - support = YES - }; + let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int); + match pidfd { + Ok(pidfd) => { + support = FORK_EXEC; + if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) { + if pidfd_spawnp.get().is_some() && pid as u32 == our_pid { + support = SPAWN + } + } + unsafe { libc::close(pidfd) }; + } + Err(e) if e.raw_os_error() == Some(libc::EMFILE) => { + // We're temporarily(?) out of file descriptors. In this case obtaining a pidfd would also fail + // Don't update the support flag so we can probe again later. + return Err(e) + } + _ => {} } - PIDFD_SPAWN_SUPPORTED.store(support, Ordering::Relaxed); - if support != YES { + PIDFD_SUPPORTED.store(support, Ordering::Relaxed); + if support == FORK_EXEC { return Ok(None); } } + core::assert_matches::debug_assert_matches!(support, SPAWN | NO); } } else { if self.get_create_pidfd() { @@ -691,7 +703,7 @@ impl Command { let spawn_fn = retrying_libc_posix_spawnp; #[cfg(target_os = "linux")] - if self.get_create_pidfd() { + if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN { let mut pidfd: libc::c_int = -1; let spawn_res = pidfd_spawnp.get().unwrap()( &mut pidfd, @@ -706,7 +718,7 @@ impl Command { if let Err(ref e) = spawn_res && e.raw_os_error() == Some(libc::ENOSYS) { - PIDFD_SPAWN_SUPPORTED.store(NO, Ordering::Relaxed); + PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed); return Ok(None); } spawn_res?; From ec0c755704bba1b6c4faa0b10aa0d886cdfa309e Mon Sep 17 00:00:00 2001 From: The 8472 Date: Tue, 25 Jun 2024 00:17:31 +0200 Subject: [PATCH 5/5] Check that we get somewhat sane PIDs when spawning with pidfds --- library/std/src/sys/pal/unix/linux/pidfd/tests.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs index 672cb0efed1d1..fb928c76fbd04 100644 --- a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs +++ b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs @@ -21,6 +21,7 @@ fn test_command_pidfd() { let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap(); assert!(flags & libc::FD_CLOEXEC != 0); } + assert!(child.id() > 0 && child.id() < -1i32 as u32); let status = child.wait().expect("error waiting on pidfd"); assert_eq!(status.code(), Some(1)); @@ -47,6 +48,8 @@ fn test_command_pidfd() { let mut child = unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap(); + assert!(child.id() > 0 && child.id() < -1i32 as u32); + if pidfd_open_available { assert!(child.pidfd().is_ok()) }