Skip to content

Commit

Permalink
WIP: optimize process spawning on Linux
Browse files Browse the repository at this point in the history
By avoiding allocations and sorting when copying environment variables
  • Loading branch information
Kobzol committed Dec 8, 2023
1 parent 5ea6256 commit 61d2185
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 7 deletions.
11 changes: 7 additions & 4 deletions library/std/src/sys/unix/process/process_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ pub struct Command {
/// `args`, followed by a `null`. Be careful when modifying `program` or
/// `args` to properly update this as well.
argv: Argv,
env: CommandEnv,
pub env: CommandEnv,

program_kind: ProgramKind,
cwd: Option<CString>,
uid: Option<uid_t>,
gid: Option<gid_t>,
saw_nul: bool,
pub saw_nul: bool,
closures: Vec<Box<dyn FnMut() -> io::Result<()> + Send + Sync>>,
groups: Option<Box<[gid_t]>>,
stdin: Option<Stdio>,
Expand Down Expand Up @@ -402,7 +402,7 @@ fn os2c(s: &OsStr, saw_nul: &mut bool) -> CString {

// Helper type to manage ownership of the strings within a C-style array.
pub struct CStringArray {
items: Vec<CString>,
pub items: Vec<CString>,
ptrs: Vec<*const c_char>,
}

Expand All @@ -426,7 +426,10 @@ impl CStringArray {
}
}

fn construct_envp(env: BTreeMap<OsString, OsString>, saw_nul: &mut bool) -> CStringArray {
pub(crate) fn construct_envp(
env: BTreeMap<OsString, OsString>,
saw_nul: &mut bool,
) -> CStringArray {
let mut result = CStringArray::with_capacity(env.len());
for (mut k, v) in env {
// Reserve additional space for '=' and null terminator
Expand Down
91 changes: 90 additions & 1 deletion library/std/src/sys/unix/process/process_unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@ use core::ffi::NonZero_c_int;

#[cfg(target_os = "linux")]
use crate::os::linux::process::PidFd;
use crate::os::unix::ffi::OsStringExt;
#[cfg(target_os = "linux")]
use crate::os::unix::io::AsRawFd;

use crate::sys;
use crate::sys::os::{env_read_lock, environ};
use crate::sys::process::process_common::*;
#[cfg(any(
target_os = "macos",
target_os = "watchos",
Expand All @@ -29,6 +32,7 @@ use libc::RTP_ID as pid_t;
#[cfg(not(target_os = "vxworks"))]
use libc::{c_int, pid_t};

use crate::collections::HashSet;
#[cfg(not(any(
target_os = "vxworks",
target_os = "l4re",
Expand Down Expand Up @@ -68,6 +72,88 @@ cfg_if::cfg_if! {
// Command
////////////////////////////////////////////////////////////////////////////////

fn count_env_vars() -> usize {
let mut count = 0;
unsafe {
let _guard = env_read_lock();
let mut environ = *environ();
while !(*environ).is_null() {
environ = environ.add(1);
count += 1;
}
}
count
}

/// Super-duper optimized version of capturing environment variables, that tries to avoid
/// unnecessary allocations and sorting.
fn capture_envp(cmd: &mut Command) -> CStringArray {
use crate::os::unix::ffi::OsStrExt;
use crate::os::unix::ffi::OsStringExt;

// Count the upper bound of environment variables (vars from the environ + vars coming from the
// command).
let env_count_upper_bound = count_env_vars() + cmd.env.vars.len();

let mut env_array = CStringArray::with_capacity(env_count_upper_bound);

// Remember which vars were already set by the user.
// If the user value is Some, we will add the variable to `env_array` and modify `visited`.
// If the user value is None, we will only modify `visited`.
// In either case, a variable with the same name from `environ` will not be added to `env_array`.
let mut visited: HashSet<&[u8]> = HashSet::with_capacity(cmd.env.vars.len());

// First, add user defined variables to `env_array`, and mark the visited ones.
for (key, maybe_value) in cmd.env.vars.iter() {
if let Some(value) = maybe_value {
// One extra byte for '=', and one extra byte for the NULL terminator.
let mut env_var: Vec<u8> =
Vec::with_capacity(key.as_bytes().len() + value.as_bytes().len() + 2);
env_var.extend_from_slice(key.as_bytes());
env_var.push(b'=');
env_var.extend_from_slice(value.as_bytes());

if let Ok(item) = CString::new(env_var) {
env_array.push(item);
} else {
cmd.saw_nul = true;
return env_array;
}
}
visited.insert(key.as_bytes());
}

// Then, if we're not clearing the original environment, go through it, and add each variable
// to env_array if we haven't seen it yet.
if !cmd.env.clear {
unsafe {
let _guard = env_read_lock();
let mut environ = *environ();
if !environ.is_null() {
while !(*environ).is_null() {
let c_str = CStr::from_ptr(*environ);
let key_value = c_str.to_bytes();
if !key_value.is_empty() {
if let Some(pos) = memchr::memchr(b'=', &key_value[1..]).map(|p| p + 1) {
let key = &key_value[..pos];
if !visited.contains(&key) {
env_array.push(CString::from(c_str));
}
}
}
environ = environ.add(1);
}
}
}
}

env_array
}

pub fn capture_env_linux(cmd: &mut Command) -> Option<CStringArray> {
if cmd.env.is_unchanged() { None } else { Some(capture_envp(cmd)) }
}

impl Command {
pub fn spawn(
&mut self,
Expand All @@ -76,6 +162,9 @@ impl Command {
) -> io::Result<(Process, StdioPipes)> {
const CLOEXEC_MSG_FOOTER: [u8; 4] = *b"NOEX";

#[cfg(target_os = "linux")]
let envp = capture_env_linux(self);
#[cfg(not(target_os = "linux"))]
let envp = self.capture_env();

if self.saw_nul() {
Expand Down
4 changes: 2 additions & 2 deletions library/std/src/sys_common/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ use crate::sys::process::{EnvKey, ExitStatus, Process, StdioPipes};
// Stores a set of changes to an environment
#[derive(Clone)]
pub struct CommandEnv {
clear: bool,
pub clear: bool,
saw_path: bool,
vars: BTreeMap<EnvKey, Option<OsString>>,
pub vars: BTreeMap<EnvKey, Option<OsString>>,
}

impl Default for CommandEnv {
Expand Down

0 comments on commit 61d2185

Please sign in to comment.