From 5369ab7d6de9ccab4ccf81e72e25f0096064f0b4 Mon Sep 17 00:00:00 2001 From: Eric Fang Date: Wed, 8 Mar 2023 06:30:38 +0100 Subject: [PATCH] Fixed container init process not re-parent to youki main process Signed-off-by: Eric Fang --- .../src/container/builder_impl.rs | 5 +- .../src/container/init_builder.rs | 9 +++- .../process/container_intermediate_process.rs | 20 +++++--- .../src/process/container_main_process.rs | 48 ++++++++----------- crates/youki/src/commands/run.rs | 2 + 5 files changed, 45 insertions(+), 39 deletions(-) diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index 694ff1ef97..0aa15489ca 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -128,8 +128,7 @@ impl<'a> ContainerBuilderImpl<'a> { detached: self.detached, }; - let (intermediate, init_pid) = - process::container_main_process::container_main_process(&container_args)?; + let init_pid = process::container_main_process::container_main_process(&container_args)?; // if file to write the pid to is specified, write pid of the child if let Some(pid_file) = &self.pid_file { @@ -146,7 +145,7 @@ impl<'a> ContainerBuilderImpl<'a> { .context("Failed to save container state")?; } - Ok(intermediate) + Ok(init_pid) } fn cleanup_container(&self) -> Result<()> { diff --git a/crates/libcontainer/src/container/init_builder.rs b/crates/libcontainer/src/container/init_builder.rs index 27e5100434..075e819937 100644 --- a/crates/libcontainer/src/container/init_builder.rs +++ b/crates/libcontainer/src/container/init_builder.rs @@ -90,10 +90,15 @@ impl<'a> InitContainerBuilder<'a> { notify_path, container: Some(container.clone()), preserve_fds: self.base.preserve_fds, - detached: false, // TODO this should be set properly based on how the command is given + // TODO: This should be set properly based on how the command is + // given. For now, set the detached to true because this is what + // `youki create` defaults to. + detached: true, }; - builder_impl.create()?; + // TODO: Fix waiting on this pid (init process) when detached = false. + let _pid = builder_impl.create()?; + container.refresh_state()?; Ok(container) diff --git a/crates/libcontainer/src/process/container_intermediate_process.rs b/crates/libcontainer/src/process/container_intermediate_process.rs index c0df19665f..4bd64783d7 100644 --- a/crates/libcontainer/src/process/container_intermediate_process.rs +++ b/crates/libcontainer/src/process/container_intermediate_process.rs @@ -84,12 +84,18 @@ pub fn container_intermediate_process( .with_context(|| format!("failed to enter pid namespace: {pid_namespace:?}"))?; } - // We have to record the pid of the child (container init process), since - // the child will be inside the pid namespace. We can't rely on child_ready - // to send us the correct pid. - let pid = fork::container_fork(|| { - // We are inside the forked process here. The first thing we have to do is to close - // any unused senders, since fork will make a dup for all the socket. + // We have to record the pid of the init process. The init process will be + // inside the pid namespace, so we can't rely on the init process to send us + // the correct pid. We also want to clone the init process as a sibling + // process to the intermediate process. The intermediate process is only + // used as a jumping board to set the init process to the correct + // configuration. The youki main process can decide what to do with the init + // process and the intermediate process can just exit safely after the job + // is done. + let pid = fork::container_clone_sibling(|| { + // We are inside the forked process here. The first thing we have to do + // is to close any unused senders, since fork will make a dup for all + // the socket. init_sender .close() .context("failed to close receiver in init process")?; @@ -109,7 +115,7 @@ pub fn container_intermediate_process( } })?; - // close the exec_notify_fd in this process + // Close the exec_notify_fd in this process if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type { close(exec_notify_fd)?; } diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index 098fecef47..be1bb06e8d 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -1,9 +1,6 @@ use crate::{ container::ContainerProcessState, - process::{ - args::{ContainerArgs, ContainerType}, - channel, container_intermediate_process, fork, - }, + process::{args::ContainerArgs, channel, container_intermediate_process, fork}, rootless::Rootless, seccomp, utils, }; @@ -18,10 +15,10 @@ use nix::{ use oci_spec::runtime; use std::{io::IoSlice, path::Path}; -pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pid)> { +pub fn container_main_process(container_args: &ContainerArgs) -> Result { // We use a set of channels to communicate between parent and child process. // Each channel is uni-directional. Because we will pass these channel to - // forked process, we have to be deligent about closing any unused channel. + // cloned process, we have to be deligent about closing any unused channel. // At minimum, we have to close down any unused senders. The corresponding // receivers will be cleaned up once the senders are closed down. let (main_sender, main_receiver) = &mut channel::main_channel()?; @@ -29,27 +26,16 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pi let init_chan = &mut channel::init_channel()?; let intermediate_pid = fork::container_fork(|| { - let container_pid = container_intermediate_process::container_intermediate_process( + container_intermediate_process::container_intermediate_process( container_args, inter_chan, init_chan, main_sender, )?; - if matches!( - container_args.container_type, - ContainerType::TenantContainer { exec_notify_fd: _ } - ) && !container_args.detached - { - match waitpid(container_pid, None)? { - WaitStatus::Exited(_, s) => Ok(s), - WaitStatus::Signaled(_, sig, _) => Ok(sig as i32), - _ => Ok(0), - } - } else { - Ok(0) - } + Ok(0) })?; + // Close down unused fds. The corresponding fds are duplicated to the // child process during fork. main_sender @@ -110,13 +96,21 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pi log::debug!("init pid is {:?}", init_pid); - // here we send both intermediate and init pid, because : - // init pid is required for writing it to pid_file (if) given by the high-level runtime - // intermediate pid is required in the case when we call exec, as we nned to wait for the - // intermediate process to exit, which itself waits for child process (the exec process) to exit - // in order to get the proper exit code. We cannot simply wait for the init_pid , that is the actual container - // process, as it is not (direect) child of our process - Ok((intermediate_pid, init_pid)) + // Before the main process returns, we want to make sure the intermediate + // process is exit and reaped. By this point, the intermediate process + // should already exited successfully. If intermediate process errors out, + // the `init_ready` will not be sent. + match waitpid(intermediate_pid, None)? { + WaitStatus::Exited(_, s) => { + log::warn!("intermediate process failed with exit status: {s}"); + } + WaitStatus::Signaled(_, sig, _) => { + log::warn!("intermediate process killed with signal: {sig}") + } + _ => (), + }; + + Ok(init_pid) } fn sync_seccomp( diff --git a/crates/youki/src/commands/run.rs b/crates/youki/src/commands/run.rs index fd1bdca1f6..c930b372c6 100644 --- a/crates/youki/src/commands/run.rs +++ b/crates/youki/src/commands/run.rs @@ -6,6 +6,8 @@ use liboci_cli::Run; pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> { let syscall = create_syscall(); + // TODO: `youki run` should support passing in `detached` flags. Defaults to + // detached = true right now. let mut container = ContainerBuilder::new(args.container_id.clone(), syscall.as_ref()) .with_pid_file(args.pid_file.as_ref())? .with_console_socket(args.console_socket.as_ref())