Skip to content

Commit

Permalink
Refactor fork/clone child stack management.
Browse files Browse the repository at this point in the history
1. Allocate the child stack using mmap, instead of on the stack. The
LLVM may optimize the memory in the wrong way to create a race condition
where to memory is still in use but free'ed.

2. Instead of having fix size 4K, we use the ulimit -s set on the
system. This is consistant with what fork and pthread_create uses.
  • Loading branch information
yihuaf committed Jul 29, 2021
1 parent 29c48c2 commit 48811a6
Showing 1 changed file with 77 additions and 9 deletions.
86 changes: 77 additions & 9 deletions src/process/fork.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,56 @@
use anyhow::Result;
use libc::c_int;
use libc::c_void;
use nix::errno::Errno;
use nix::sched;
use nix::unistd::Pid;
use std::mem;

pub fn clone(cb: sched::CloneCb, clone_flags: sched::CloneFlags) -> Result<Pid> {
// unlike fork, clone requires the caller to allocate the stack. here, we use the default
// 4KB for stack size, consistant with the runc implementation.
const STACK_SIZE: usize = 4096;
let ref mut stack: [u8; STACK_SIZE] = [0; STACK_SIZE];
// pass in the SIGCHID flag to mimic the effect of forking a process
let signal = nix::sys::signal::Signal::SIGCHLD;
let pid = sched::clone(cb, stack, clone_flags, Some(signal as i32))?;
pub fn clone(mut cb: sched::CloneCb, clone_flags: sched::CloneFlags) -> Result<Pid> {
extern "C" fn callback(data: *mut sched::CloneCb) -> c_int {
let cb: &mut sched::CloneCb = unsafe { &mut *data };
(*cb)() as c_int
}

let child_stack_top = unsafe {
let page_size: usize = match libc::sysconf(libc::_SC_PAGE_SIZE) {
-1 => 4 * 1024, // default to 4K page size
x => x as usize,
};

let mut rlimit = libc::rlimit {
rlim_cur: 0,
rlim_max: 0,
};

Errno::result(libc::getrlimit(libc::RLIMIT_STACK, &mut rlimit))?;
let default_stack_size = rlimit.rlim_cur as usize;

let child_stack = libc::mmap(
libc::PT_NULL as *mut c_void,
default_stack_size,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_STACK,
-1,
0,
);
Errno::result(libc::mprotect(child_stack, page_size, libc::PROT_NONE))?;
let child_stack_top = child_stack.add(default_stack_size);

child_stack_top
};

let res = unsafe {
let signal = nix::sys::signal::Signal::SIGCHLD;
let combined = clone_flags.bits() | signal as c_int;
libc::clone(
mem::transmute(callback as extern "C" fn(*mut Box<dyn FnMut() -> isize>) -> i32),
child_stack_top,
combined,
&mut cb as *mut _ as *mut c_void,
)
};
let pid = Errno::result(res).map(Pid::from_raw)?;

Ok(pid)
}
Expand All @@ -23,7 +64,7 @@ mod tests {
#[test]
fn test_fork_clone() -> Result<()> {
let cb = || -> Result<()> {
// in a new pid namespace, pid of this process should be 1
// In a new pid namespace, pid of this process should be 1
let pid = unistd::getpid();
assert_eq!(unistd::Pid::from_raw(1), pid, "PID should set to 1");

Expand Down Expand Up @@ -57,4 +98,31 @@ mod tests {

bail!("Process didn't exit correctly")
}

#[test]
fn test_clone_stack_allocation() -> Result<()> {
let flags = sched::CloneFlags::empty();
let pid = super::clone(
Box::new(|| {
let mut array_on_stack = [0u8; 4096];
array_on_stack.iter_mut().for_each(|x| *x = 0);

0
}),
flags,
)?;

let status = nix::sys::wait::waitpid(pid, None)?;
if let nix::sys::wait::WaitStatus::Exited(_, exit_code) = status {
assert_eq!(
0, exit_code,
"Process didn't exit correctly {:?}",
exit_code
);

return Ok(());
}

bail!("Process didn't exit correctly")
}
}

0 comments on commit 48811a6

Please sign in to comment.