From 4815f2968d4f5948465bc6e12b3b892142ed8936 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Sat, 22 Jun 2024 14:38:14 +0200
Subject: [PATCH 1/5] document the cvt methods

---
 std/src/sys/pal/unix/mod.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/std/src/sys/pal/unix/mod.rs b/std/src/sys/pal/unix/mod.rs
index b370f06e92baf..ae257cab1e50b 100644
--- a/std/src/sys/pal/unix/mod.rs
+++ b/std/src/sys/pal/unix/mod.rs
@@ -307,10 +307,13 @@ macro_rules! impl_is_minus_one {
 
 impl_is_minus_one! { i8 i16 i32 i64 isize }
 
+/// Convert native return values to Result using the *-1 means error is in `errno`*  convention.
+/// Non-error values are `Ok`-wrapped.
 pub fn cvt<T: IsMinusOne>(t: T) -> crate::io::Result<T> {
     if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) }
 }
 
+/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value.
 pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
 where
     T: IsMinusOne,
@@ -325,6 +328,7 @@ where
 }
 
 #[allow(dead_code)] // Not used on all platforms.
+/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`.
 pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> {
     if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) }
 }

From 9212236fc183bbd6cfc2a149967a2d615343b3ad Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Sat, 22 Jun 2024 14:37:12 +0200
Subject: [PATCH 2/5] use pidfd_spawn for faster process creation when pidfds
 are requested

---
 std/src/sys/pal/unix/linux/pidfd/tests.rs    | 13 ++-
 std/src/sys/pal/unix/process/process_unix.rs | 99 +++++++++++++++++++-
 2 files changed, 106 insertions(+), 6 deletions(-)

diff --git a/std/src/sys/pal/unix/linux/pidfd/tests.rs b/std/src/sys/pal/unix/linux/pidfd/tests.rs
index 6d9532f2ef1ff..672cb0efed1d1 100644
--- a/std/src/sys/pal/unix/linux/pidfd/tests.rs
+++ b/std/src/sys/pal/unix/linux/pidfd/tests.rs
@@ -1,7 +1,7 @@
 use crate::assert_matches::assert_matches;
 use crate::os::fd::{AsRawFd, RawFd};
-use crate::os::linux::process::{ChildExt, CommandExt};
-use crate::os::unix::process::ExitStatusExt;
+use crate::os::linux::process::{ChildExt, CommandExt as _};
+use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
 use crate::process::Command;
 
 #[test]
@@ -42,6 +42,15 @@ fn test_command_pidfd() {
         .unwrap()
         .pidfd()
         .expect_err("pidfd should not have been created");
+
+    // exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp()
+    let mut child =
+        unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
+
+    if pidfd_open_available {
+        assert!(child.pidfd().is_ok())
+    }
+    child.wait().expect("error waiting on child");
 }
 
 #[test]
diff --git a/std/src/sys/pal/unix/process/process_unix.rs b/std/src/sys/pal/unix/process/process_unix.rs
index 32382d9a50cf4..3de66d9789fbd 100644
--- a/std/src/sys/pal/unix/process/process_unix.rs
+++ b/std/src/sys/pal/unix/process/process_unix.rs
@@ -449,17 +449,70 @@ impl Command {
         use crate::mem::MaybeUninit;
         use crate::sys::weak::weak;
         use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used};
+        #[cfg(target_os = "linux")]
+        use core::sync::atomic::{AtomicU8, Ordering};
 
         if self.get_gid().is_some()
             || self.get_uid().is_some()
             || (self.env_saw_path() && !self.program_is_path())
             || !self.get_closures().is_empty()
             || self.get_groups().is_some()
-            || self.get_create_pidfd()
         {
             return Ok(None);
         }
 
+        cfg_if::cfg_if! {
+            if #[cfg(target_os = "linux")] {
+                weak! {
+                    fn pidfd_spawnp(
+                        *mut libc::c_int,
+                        *const libc::c_char,
+                        *const libc::posix_spawn_file_actions_t,
+                        *const libc::posix_spawnattr_t,
+                        *const *mut libc::c_char,
+                        *const *mut libc::c_char
+                    ) -> libc::c_int
+                }
+
+                weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
+
+                static PIDFD_SPAWN_SUPPORTED: AtomicU8 = AtomicU8::new(0);
+                const UNKNOWN: u8 = 0;
+                const YES: u8 = 1;
+                // NO currently forces a fallback to fork/exec. We could be more nuanced here and keep using spawn
+                // if we know pidfd's aren't supported at all and the fallback would be futile.
+                const NO: u8 = 2;
+
+                if self.get_create_pidfd() {
+                    let flag = PIDFD_SPAWN_SUPPORTED.load(Ordering::Relaxed);
+                    if flag == NO || pidfd_spawnp.get().is_none() || pidfd_getpid.get().is_none() {
+                        return Ok(None);
+                    }
+                    if flag == UNKNOWN {
+                        let mut support = NO;
+                        let our_pid = crate::process::id();
+                        let pidfd =
+                            unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as libc::c_int;
+                        if pidfd >= 0 {
+                            let pid = unsafe { pidfd_getpid.get().unwrap()(pidfd) } as u32;
+                            unsafe { libc::close(pidfd) };
+                            if pid == our_pid {
+                                support = YES
+                            };
+                        }
+                        PIDFD_SPAWN_SUPPORTED.store(support, Ordering::Relaxed);
+                        if support != YES {
+                            return Ok(None);
+                        }
+                    }
+                }
+            } else {
+                if self.get_create_pidfd() {
+                    unreachable!("only implemented on linux")
+                }
+            }
+        }
+
         // Only glibc 2.24+ posix_spawn() supports returning ENOENT directly.
         #[cfg(all(target_os = "linux", target_env = "gnu"))]
         {
@@ -543,9 +596,6 @@ impl Command {
 
         let pgroup = self.get_pgroup();
 
-        // Safety: -1 indicates we don't have a pidfd.
-        let mut p = unsafe { Process::new(0, -1) };
-
         struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
 
         impl Drop for PosixSpawnFileActions<'_> {
@@ -640,6 +690,47 @@ impl Command {
             #[cfg(target_os = "nto")]
             let spawn_fn = retrying_libc_posix_spawnp;
 
+            #[cfg(target_os = "linux")]
+            if self.get_create_pidfd() {
+                let mut pidfd: libc::c_int = -1;
+                let spawn_res = pidfd_spawnp.get().unwrap()(
+                    &mut pidfd,
+                    self.get_program_cstr().as_ptr(),
+                    file_actions.0.as_ptr(),
+                    attrs.0.as_ptr(),
+                    self.get_argv().as_ptr() as *const _,
+                    envp as *const _,
+                );
+
+                let spawn_res = cvt_nz(spawn_res);
+                if let Err(ref e) = spawn_res
+                    && e.raw_os_error() == Some(libc::ENOSYS)
+                {
+                    PIDFD_SPAWN_SUPPORTED.store(NO, Ordering::Relaxed);
+                    return Ok(None);
+                }
+                spawn_res?;
+
+                let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) {
+                    Ok(pid) => pid,
+                    Err(e) => {
+                        // The child has been spawned and we are holding its pidfd.
+                        // But we cannot obtain its pid even though pidfd_getpid support was verified earlier.
+                        // This might happen if libc can't open procfs because the file descriptor limit has been reached.
+                        libc::close(pidfd);
+                        return Err(Error::new(
+                            e.kind(),
+                            "pidfd_spawnp succeeded but the child's PID could not be obtained",
+                        ));
+                    }
+                };
+
+                return Ok(Some(Process::new(pid, pidfd)));
+            }
+
+            // Safety: -1 indicates we don't have a pidfd.
+            let mut p = Process::new(0, -1);
+
             let spawn_res = spawn_fn(
                 &mut p.pid,
                 self.get_program_cstr().as_ptr(),

From bf06e436d4a18398cb7f912d5112808982240f1e Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Mon, 24 Jun 2024 23:10:17 +0200
Subject: [PATCH 3/5] document safety properties of the internal Process::new
 constructor

---
 std/src/sys/pal/unix/process/process_unix.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/std/src/sys/pal/unix/process/process_unix.rs b/std/src/sys/pal/unix/process/process_unix.rs
index 3de66d9789fbd..23dce4ea5fb71 100644
--- a/std/src/sys/pal/unix/process/process_unix.rs
+++ b/std/src/sys/pal/unix/process/process_unix.rs
@@ -877,6 +877,12 @@ pub struct Process {
 
 impl Process {
     #[cfg(target_os = "linux")]
+    /// # Safety
+    ///
+    /// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file
+    /// descriptor (See [I/O Safety]).
+    ///
+    /// [I/O Safety]: crate::io#io-safety
     unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self {
         use crate::os::unix::io::FromRawFd;
         use crate::sys_common::FromInner;

From 4c9a96eebda9f4fec2f12da950c3fc19db3d53c3 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Tue, 25 Jun 2024 00:14:55 +0200
Subject: [PATCH 4/5] more fine-grained feature-detection for pidfd spawning

we now distinguish between pidfd_spawn support, pidfd-via-fork/exec and not-supported
---
 std/src/sys/pal/unix/process/process_unix.rs | 54 ++++++++++++--------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/std/src/sys/pal/unix/process/process_unix.rs b/std/src/sys/pal/unix/process/process_unix.rs
index 23dce4ea5fb71..abd4a334783e4 100644
--- a/std/src/sys/pal/unix/process/process_unix.rs
+++ b/std/src/sys/pal/unix/process/process_unix.rs
@@ -476,35 +476,47 @@ impl Command {
 
                 weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
 
-                static PIDFD_SPAWN_SUPPORTED: AtomicU8 = AtomicU8::new(0);
+                static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0);
                 const UNKNOWN: u8 = 0;
-                const YES: u8 = 1;
-                // NO currently forces a fallback to fork/exec. We could be more nuanced here and keep using spawn
-                // if we know pidfd's aren't supported at all and the fallback would be futile.
-                const NO: u8 = 2;
+                const SPAWN: u8 = 1;
+                // Obtaining a pidfd via the fork+exec path might work
+                const FORK_EXEC: u8 = 2;
+                // Neither pidfd_spawn nor fork/exec will get us a pidfd.
+                // Instead we'll just posix_spawn if the other preconditions are met.
+                const NO: u8 = 3;
 
                 if self.get_create_pidfd() {
-                    let flag = PIDFD_SPAWN_SUPPORTED.load(Ordering::Relaxed);
-                    if flag == NO || pidfd_spawnp.get().is_none() || pidfd_getpid.get().is_none() {
+                    let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed);
+                    if support == FORK_EXEC {
                         return Ok(None);
                     }
-                    if flag == UNKNOWN {
-                        let mut support = NO;
+                    if support == UNKNOWN {
+                        support = NO;
                         let our_pid = crate::process::id();
-                        let pidfd =
-                            unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as libc::c_int;
-                        if pidfd >= 0 {
-                            let pid = unsafe { pidfd_getpid.get().unwrap()(pidfd) } as u32;
-                            unsafe { libc::close(pidfd) };
-                            if pid == our_pid {
-                                support = YES
-                            };
+                        let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int);
+                        match pidfd {
+                            Ok(pidfd) => {
+                                support = FORK_EXEC;
+                                if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) {
+                                    if pidfd_spawnp.get().is_some() && pid as u32 == our_pid {
+                                        support = SPAWN
+                                    }
+                                }
+                                unsafe { libc::close(pidfd) };
+                            }
+                            Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
+                                // We're temporarily(?) out of file descriptors.  In this case obtaining a pidfd would also fail
+                                // Don't update the support flag so we can probe again later.
+                                return Err(e)
+                            }
+                            _ => {}
                         }
-                        PIDFD_SPAWN_SUPPORTED.store(support, Ordering::Relaxed);
-                        if support != YES {
+                        PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
+                        if support == FORK_EXEC {
                             return Ok(None);
                         }
                     }
+                    core::assert_matches::debug_assert_matches!(support, SPAWN | NO);
                 }
             } else {
                 if self.get_create_pidfd() {
@@ -691,7 +703,7 @@ impl Command {
             let spawn_fn = retrying_libc_posix_spawnp;
 
             #[cfg(target_os = "linux")]
-            if self.get_create_pidfd() {
+            if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN {
                 let mut pidfd: libc::c_int = -1;
                 let spawn_res = pidfd_spawnp.get().unwrap()(
                     &mut pidfd,
@@ -706,7 +718,7 @@ impl Command {
                 if let Err(ref e) = spawn_res
                     && e.raw_os_error() == Some(libc::ENOSYS)
                 {
-                    PIDFD_SPAWN_SUPPORTED.store(NO, Ordering::Relaxed);
+                    PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed);
                     return Ok(None);
                 }
                 spawn_res?;

From 17d03b950a48a17f30aa32e02e0169b83f215ad3 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Tue, 25 Jun 2024 00:17:31 +0200
Subject: [PATCH 5/5] Check that we get somewhat sane PIDs when spawning with
 pidfds

---
 std/src/sys/pal/unix/linux/pidfd/tests.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/std/src/sys/pal/unix/linux/pidfd/tests.rs b/std/src/sys/pal/unix/linux/pidfd/tests.rs
index 672cb0efed1d1..fb928c76fbd04 100644
--- a/std/src/sys/pal/unix/linux/pidfd/tests.rs
+++ b/std/src/sys/pal/unix/linux/pidfd/tests.rs
@@ -21,6 +21,7 @@ fn test_command_pidfd() {
         let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
         assert!(flags & libc::FD_CLOEXEC != 0);
     }
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
     let status = child.wait().expect("error waiting on pidfd");
     assert_eq!(status.code(), Some(1));
 
@@ -47,6 +48,8 @@ fn test_command_pidfd() {
     let mut child =
         unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
 
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
+
     if pidfd_open_available {
         assert!(child.pidfd().is_ok())
     }