| //! A library for awaiting and killing child processes from multiple threads. |
| //! |
| //! - [Docs](https://docs.rs/shared_child) |
| //! - [Crate](https://crates.io/crates/shared_child) |
| //! - [Repo](https://github.com/oconnor663/shared_child.rs) |
| //! |
| //! The |
| //! [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html) |
| //! type in the standard library provides |
| //! [`wait`](https://doc.rust-lang.org/std/process/struct.Child.html#method.wait) |
| //! and |
| //! [`kill`](https://doc.rust-lang.org/std/process/struct.Child.html#method.kill) |
| //! methods that take `&mut self`, making it impossible to kill a child process |
| //! while another thread is waiting on it. That design works around a race |
| //! condition in Unix's `waitpid` function, where a PID might get reused as soon |
| //! as the wait returns, so a signal sent around the same time could |
| //! accidentally get delivered to the wrong process. |
| //! |
| //! However with the newer POSIX `waitid` function, we can wait on a child |
| //! without freeing its PID for reuse. That makes it safe to send signals |
| //! concurrently. Windows has actually always supported this, by preventing PID |
| //! reuse while there are still open handles to a child process. This library |
| //! wraps `std::process::Child` for concurrent use, backed by these APIs. |
| //! |
| //! Compatibility note: The `libc` crate doesn't currently support `waitid` on |
| //! NetBSD or OpenBSD, or on older versions of OSX. There [might also |
| //! be](https://bugs.python.org/msg167016) some version of OSX where the |
| //! `waitid` function exists but is broken. We can add a "best effort" |
| //! workaround using `waitpid` for these platforms as we run into them. Please |
| //! [file an issue](https://github.com/oconnor663/shared_child.rs/issues/new) if |
| //! you hit this. |
| //! |
| //! # Example |
| //! |
| //! ```rust |
| //! use shared_child::SharedChild; |
| //! use std::process::Command; |
| //! use std::sync::Arc; |
| //! |
| //! // Spawn a child that will just sleep for a long time, |
| //! // and put it in an Arc to share between threads. |
| //! let mut command = Command::new("python"); |
| //! command.arg("-c").arg("import time; time.sleep(1000000000)"); |
| //! let shared_child = SharedChild::spawn(&mut command).unwrap(); |
| //! let child_arc = Arc::new(shared_child); |
| //! |
| //! // On another thread, wait on the child process. |
| //! let child_arc_clone = child_arc.clone(); |
| //! let thread = std::thread::spawn(move || { |
| //! child_arc_clone.wait().unwrap() |
| //! }); |
| //! |
| //! // While the other thread is waiting, kill the child process. |
| //! // This wouldn't be possible with e.g. Arc<Mutex<Child>> from |
| //! // the standard library, because the waiting thread would be |
| //! // holding the mutex. |
| //! child_arc.kill().unwrap(); |
| //! |
| //! // Join the waiting thread and get the exit status. |
| //! let exit_status = thread.join().unwrap(); |
| //! assert!(!exit_status.success()); |
| //! ``` |
| |
| use std::io; |
| use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command, ExitStatus}; |
| use std::sync::{Condvar, Mutex}; |
| |
| mod sys; |
| |
| // Publish the Unix-only SharedChildExt trait. |
| #[cfg(unix)] |
| pub mod unix; |
| |
| #[derive(Debug)] |
| pub struct SharedChild { |
| // This lock provides shared access to kill() and wait(). We never hold it |
| // during a blocking wait, though, so that non-blocking waits and kills can |
| // go through. (Blocking waits use libc::waitid with the WNOWAIT flag.) |
| child: Mutex<Child>, |
| |
| // When there are multiple waiting threads, one of them will actually wait |
| // on the child, and the rest will block on this condvar. |
| state_lock: Mutex<ChildState>, |
| state_condvar: Condvar, |
| } |
| |
| impl SharedChild { |
| /// Spawn a new `SharedChild` from a |
| /// [`std::process::Command`](https://doc.rust-lang.org/std/process/struct.Command.html). |
| pub fn spawn(command: &mut Command) -> io::Result<Self> { |
| let child = command.spawn()?; |
| Ok(Self { |
| child: Mutex::new(child), |
| state_lock: Mutex::new(NotWaiting), |
| state_condvar: Condvar::new(), |
| }) |
| } |
| |
| /// Construct a new `SharedChild` from an already spawned |
| /// [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html). |
| /// |
| /// This constructor needs to know whether `child` has already been waited on, and the only way |
| /// to find that out is to call `child.try_wait()` internally. If the child process is |
| /// currently a zombie, that call will clean it up as a side effect. The [`SharedChild::spawn`] |
| /// constructor doesn't need to do this. |
| pub fn new(mut child: Child) -> io::Result<Self> { |
| let state = match child.try_wait()? { |
| Some(status) => Exited(status), |
| None => NotWaiting, |
| }; |
| Ok(Self { |
| child: Mutex::new(child), |
| state_lock: Mutex::new(state), |
| state_condvar: Condvar::new(), |
| }) |
| } |
| |
| /// Return the child process ID. |
| pub fn id(&self) -> u32 { |
| self.child.lock().unwrap().id() |
| } |
| |
| fn get_handle(&self) -> sys::Handle { |
| sys::get_handle(&self.child.lock().unwrap()) |
| } |
| |
| /// Wait for the child to exit, blocking the current thread, and return its |
| /// exit status. |
| pub fn wait(&self) -> io::Result<ExitStatus> { |
| let mut state = self.state_lock.lock().unwrap(); |
| loop { |
| match *state { |
| NotWaiting => { |
| // Either no one is waiting on the child yet, or a previous |
| // waiter failed. That means we need to do it ourselves. |
| // Break out of this loop. |
| break; |
| } |
| Waiting => { |
| // Another thread is already waiting on the child. We'll |
| // block until it signal us on the condvar, then loop again. |
| // Spurious wakeups could bring us here multiple times |
| // though, see the Condvar docs. |
| state = self.state_condvar.wait(state).unwrap(); |
| } |
| Exited(exit_status) => return Ok(exit_status), |
| } |
| } |
| |
| // If we get here, we have the state lock, and we're the thread |
| // responsible for waiting on the child. Set the state to Waiting and |
| // then release the state lock, so that other threads can observe it |
| // while we block. Afterwards we must leave the Waiting state before |
| // this function exits, or other waiters will deadlock. |
| *state = Waiting; |
| drop(state); |
| |
| // Block until the child exits without reaping it. (On Unix, that means |
| // we need to call libc::waitid with the WNOWAIT flag. On Windows |
| // waiting never reaps.) That makes it safe for another thread to kill |
| // while we're here, without racing against some process reusing the |
| // child's PID. Having only one thread in this section is important, |
| // because POSIX doesn't guarantee much about what happens when multiple |
| // threads wait on a child at the same time: |
| // http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_13 |
| let noreap_result = sys::wait_without_reaping(self.get_handle()); |
| |
| // Now either we hit an error, or the child has exited and needs to be |
| // reaped. Retake the state lock and handle all the different exit |
| // cases. No matter what happened/happens, we'll leave the Waiting state |
| // and signal the state condvar. |
| let mut state = self.state_lock.lock().unwrap(); |
| // The child has already exited, so this wait should clean up without blocking. |
| let final_result = noreap_result.and_then(|_| self.child.lock().unwrap().wait()); |
| *state = if let Ok(exit_status) = final_result { |
| Exited(exit_status) |
| } else { |
| NotWaiting |
| }; |
| self.state_condvar.notify_all(); |
| final_result |
| } |
| |
| /// Return the child's exit status if it has already exited. If the child is |
| /// still running, return `Ok(None)`. |
| pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> { |
| let mut status = self.state_lock.lock().unwrap(); |
| |
| // Unlike wait() above, we don't loop on the Condvar here. If the status |
| // is Waiting or Exited, we return immediately. However, if the status |
| // is NotWaiting, we'll do a non-blocking wait below, in case the child |
| // has already exited. |
| match *status { |
| NotWaiting => {} |
| Waiting => return Ok(None), |
| Exited(exit_status) => return Ok(Some(exit_status)), |
| }; |
| |
| // No one is waiting on the child. Check to see if it's already exited. |
| // If it has, put ourselves in the Exited state. (There can't be any |
| // other waiters to signal, because the state was NotWaiting when we |
| // started, and we're still holding the status lock.) |
| if sys::try_wait_without_reaping(self.get_handle())? { |
| // The child has exited. Reap it. This should not block. |
| let exit_status = self.child.lock().unwrap().wait()?; |
| *status = Exited(exit_status); |
| Ok(Some(exit_status)) |
| } else { |
| Ok(None) |
| } |
| } |
| |
| /// Send a kill signal to the child. On Unix this sends SIGKILL, and you |
| /// should call `wait` afterwards to avoid leaving a zombie. If the process |
| /// has already been waited on, this returns `Ok(())` and does nothing. |
| pub fn kill(&self) -> io::Result<()> { |
| let status = self.state_lock.lock().unwrap(); |
| if let Exited(_) = *status { |
| return Ok(()); |
| } |
| // The child is still running. Kill it. This assumes that the wait |
| // functions above will never hold the child lock during a blocking |
| // wait. |
| self.child.lock().unwrap().kill() |
| } |
| |
| /// Consume the `SharedChild` and return the |
| /// [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html) |
| /// it contains. |
| /// |
| /// We never reap the child process except by calling `wait` or `try_wait` |
| /// on it, so the child object's inner state is correct, even if it was |
| /// waited on while it was shared. |
| pub fn into_inner(self) -> Child { |
| self.child.into_inner().unwrap() |
| } |
| |
| /// Take the child's |
| /// [`stdin`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stdin) |
| /// handle, if any. |
| /// |
| /// This will only return `Some` the first time it's called, and then only if the `Command` |
| /// that created the child was configured with `.stdin(Stdio::piped())`. |
| pub fn take_stdin(&self) -> Option<ChildStdin> { |
| self.child.lock().unwrap().stdin.take() |
| } |
| |
| /// Take the child's |
| /// [`stdout`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stdout) |
| /// handle, if any. |
| /// |
| /// This will only return `Some` the first time it's called, and then only if the `Command` |
| /// that created the child was configured with `.stdout(Stdio::piped())`. |
| pub fn take_stdout(&self) -> Option<ChildStdout> { |
| self.child.lock().unwrap().stdout.take() |
| } |
| |
| /// Take the child's |
| /// [`stderr`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stderr) |
| /// handle, if any. |
| /// |
| /// This will only return `Some` the first time it's called, and then only if the `Command` |
| /// that created the child was configured with `.stderr(Stdio::piped())`. |
| pub fn take_stderr(&self) -> Option<ChildStderr> { |
| self.child.lock().unwrap().stderr.take() |
| } |
| } |
| |
| #[derive(Debug)] |
| enum ChildState { |
| NotWaiting, |
| Waiting, |
| Exited(ExitStatus), |
| } |
| |
| use crate::ChildState::*; |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use std::error::Error; |
| use std::process::{Command, Stdio}; |
| use std::sync::Arc; |
| |
| // Python isn't available on some Unix platforms, e.g. Android, so we need this instead. |
| #[cfg(unix)] |
| pub fn true_cmd() -> Command { |
| Command::new("true") |
| } |
| |
| #[cfg(not(unix))] |
| pub fn true_cmd() -> Command { |
| let mut cmd = Command::new("python"); |
| cmd.arg("-c").arg(""); |
| cmd |
| } |
| |
| // Python isn't available on some Unix platforms, e.g. Android, so we need this instead. |
| #[cfg(unix)] |
| pub fn sleep_forever_cmd() -> Command { |
| let mut cmd = Command::new("sleep"); |
| cmd.arg("1000000"); |
| cmd |
| } |
| |
| #[cfg(not(unix))] |
| pub fn sleep_forever_cmd() -> Command { |
| let mut cmd = Command::new("python"); |
| cmd.arg("-c").arg("import time; time.sleep(1000000)"); |
| cmd |
| } |
| |
| // Python isn't available on some Unix platforms, e.g. Android, so we need this instead. |
| #[cfg(unix)] |
| pub fn cat_cmd() -> Command { |
| Command::new("cat") |
| } |
| |
| #[cfg(not(unix))] |
| pub fn cat_cmd() -> Command { |
| let mut cmd = Command::new("python"); |
| cmd.arg("-c").arg(""); |
| cmd |
| } |
| |
| #[test] |
| fn test_wait() { |
| let child = SharedChild::spawn(&mut true_cmd()).unwrap(); |
| // Test the id() function while we're at it. |
| let id = child.id(); |
| assert!(id > 0); |
| let status = child.wait().unwrap(); |
| assert_eq!(status.code().unwrap(), 0); |
| } |
| |
| #[test] |
| fn test_kill() { |
| let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); |
| child.kill().unwrap(); |
| let status = child.wait().unwrap(); |
| assert!(!status.success()); |
| } |
| |
| #[test] |
| fn test_try_wait() { |
| let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); |
| let maybe_status = child.try_wait().unwrap(); |
| assert_eq!(maybe_status, None); |
| child.kill().unwrap(); |
| // The child will handle that signal asynchronously, so we check it |
| // repeatedly in a busy loop. |
| let mut maybe_status = None; |
| while let None = maybe_status { |
| maybe_status = child.try_wait().unwrap(); |
| } |
| assert!(maybe_status.is_some()); |
| assert!(!maybe_status.unwrap().success()); |
| } |
| |
| #[test] |
| fn test_many_waiters() { |
| let child = Arc::new(SharedChild::spawn(&mut sleep_forever_cmd()).unwrap()); |
| let mut threads = Vec::new(); |
| for _ in 0..10 { |
| let clone = child.clone(); |
| threads.push(std::thread::spawn(move || clone.wait())); |
| } |
| child.kill().unwrap(); |
| for thread in threads { |
| thread.join().unwrap().unwrap(); |
| } |
| } |
| |
| #[test] |
| fn test_waitid_after_exit_doesnt_hang() { |
| // There are ominous reports (https://bugs.python.org/issue10812) of a |
| // broken waitid implementation on OSX, which might hang forever if it |
| // tries to wait on a child that's already exited. |
| let child = true_cmd().spawn().unwrap(); |
| sys::wait_without_reaping(sys::get_handle(&child)).unwrap(); |
| // At this point the child has definitely exited. Wait again to test |
| // that a second wait doesn't block. |
| sys::wait_without_reaping(sys::get_handle(&child)).unwrap(); |
| } |
| |
| #[test] |
| fn test_into_inner_before_wait() { |
| let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); |
| let mut child = shared_child.into_inner(); |
| child.kill().unwrap(); |
| child.wait().unwrap(); |
| } |
| |
| #[test] |
| fn test_into_inner_after_wait() { |
| // This makes sure the child's inner state is valid. If we used waitpid |
| // on the side, the inner child would try to wait again and cause an |
| // error. |
| let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); |
| shared_child.kill().unwrap(); |
| shared_child.wait().unwrap(); |
| let mut child = shared_child.into_inner(); |
| // The child has already been waited on, so kill should be an error. |
| let kill_err = child.kill().unwrap_err(); |
| if cfg!(windows) { |
| assert_eq!(std::io::ErrorKind::PermissionDenied, kill_err.kind()); |
| } else { |
| assert_eq!(std::io::ErrorKind::InvalidInput, kill_err.kind()); |
| } |
| // But wait should succeed. |
| child.wait().unwrap(); |
| } |
| |
| #[test] |
| fn test_new() -> Result<(), Box<dyn Error>> { |
| // Spawn a short-lived child. |
| let mut command = cat_cmd(); |
| command.stdin(Stdio::piped()); |
| command.stdout(Stdio::null()); |
| let mut child = command.spawn()?; |
| let child_stdin = child.stdin.take().unwrap(); |
| |
| // Construct a SharedChild from the Child, which has not yet been waited on. The child is |
| // blocked on stdin, so we know it hasn't yet exited. |
| let mut shared_child = SharedChild::new(child).unwrap(); |
| assert!(matches!( |
| *shared_child.state_lock.lock().unwrap(), |
| NotWaiting, |
| )); |
| |
| // Now close the child's stdin. This will cause the child to exit. |
| drop(child_stdin); |
| |
| // Construct more SharedChild objects from the same child, in a loop. Eventually one of |
| // them will notice that the child has exited. |
| loop { |
| shared_child = SharedChild::new(shared_child.into_inner())?; |
| if let Exited(status) = &*shared_child.state_lock.lock().unwrap() { |
| assert!(status.success()); |
| return Ok(()); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_takes() -> Result<(), Box<dyn Error>> { |
| let mut command = true_cmd(); |
| command.stdin(Stdio::piped()); |
| command.stdout(Stdio::piped()); |
| command.stderr(Stdio::piped()); |
| let shared_child = SharedChild::spawn(&mut command)?; |
| |
| assert!(shared_child.take_stdin().is_some()); |
| assert!(shared_child.take_stdout().is_some()); |
| assert!(shared_child.take_stderr().is_some()); |
| |
| assert!(shared_child.take_stdin().is_none()); |
| assert!(shared_child.take_stdout().is_none()); |
| assert!(shared_child.take_stderr().is_none()); |
| |
| shared_child.wait()?; |
| Ok(()) |
| } |
| } |