From 5411d423d741f70aabd5f6c6aa4366143609bfd0 Mon Sep 17 00:00:00 2001 From: Felix Obenhuber Date: Wed, 26 Jan 2022 11:26:29 +0100 Subject: [PATCH] Replace manifest IO Pipe with Log The `pipe` option for the container output is removed from the manifest. The option `log` is renamed to `pipe`. A new option `discard` is added for the output. --- examples/console/manifest.yaml | 6 +- examples/cpueater/manifest.yaml | 6 +- examples/crashing/manifest.yaml | 6 +- examples/hello-ferris/manifest.yaml | 6 +- examples/hello-resource/manifest.yaml | 6 +- examples/hello-world/manifest.yaml | 6 +- examples/inspect/manifest.yaml | 12 +-- examples/memeater/manifest.yaml | 6 +- examples/persistence/manifest.yaml | 6 +- examples/seccomp/manifest.yaml | 8 +- northstar-tests/test-container/manifest.yaml | 11 +-- northstar-tests/tests/tests.rs | 2 +- northstar/src/npk/manifest.rs | 45 ++++++----- northstar/src/runtime/fork/forker/impl.rs | 76 ++++++++++++++----- northstar/src/runtime/fork/forker/messages.rs | 4 + northstar/src/runtime/fork/forker/mod.rs | 4 + northstar/src/runtime/fork/init/mod.rs | 55 ++++++++------ northstar/src/runtime/ipc/share_fds.rs | 29 ++++++- northstar/src/runtime/state.rs | 20 ++++- 19 files changed, 191 insertions(+), 123 deletions(-) diff --git a/examples/console/manifest.yaml b/examples/console/manifest.yaml index 0592ddadc6..e80a4dad81 100644 --- a/examples/console/manifest.yaml +++ b/examples/console/manifest.yaml @@ -5,10 +5,8 @@ console: true uid: 1000 gid: 1000 io: - stdout: - log: - level: DEBUG - tag: console + stdout: pipe + stderr: pipe mounts: /dev: type: dev diff --git a/examples/cpueater/manifest.yaml b/examples/cpueater/manifest.yaml index 07d7e716e7..eceb048f61 100644 --- a/examples/cpueater/manifest.yaml +++ b/examples/cpueater/manifest.yaml @@ -24,7 +24,5 @@ mounts: type: bind host: /system io: - stdout: - log: - level: DEBUG - tag: cpueater + stdout: pipe + stderr: pipe diff --git a/examples/crashing/manifest.yaml b/examples/crashing/manifest.yaml index f834304067..16e4852f78 100644 --- a/examples/crashing/manifest.yaml +++ b/examples/crashing/manifest.yaml @@ -20,7 +20,5 @@ mounts: type: bind host: /system io: - stdout: - log: - level: DEBUG - tag: crashing + stdout: pipe + stderr: pipe diff --git a/examples/hello-ferris/manifest.yaml b/examples/hello-ferris/manifest.yaml index 6a249e20bc..b8273d7a07 100644 --- a/examples/hello-ferris/manifest.yaml +++ b/examples/hello-ferris/manifest.yaml @@ -37,7 +37,5 @@ mounts: dir: / options: noexec,nodev,nosuid io: - stdout: - log: - level: DEBUG - tag: ferris + stdout: pipe + stderr: pipe diff --git a/examples/hello-resource/manifest.yaml b/examples/hello-resource/manifest.yaml index 0451680377..e4533d3c78 100644 --- a/examples/hello-resource/manifest.yaml +++ b/examples/hello-resource/manifest.yaml @@ -23,7 +23,5 @@ mounts: type: bind host: /system io: - stdout: - log: - level: DEBUG - tag: hello + stdout: pipe + stderr: pipe diff --git a/examples/hello-world/manifest.yaml b/examples/hello-world/manifest.yaml index e8cfc98dec..49ce0cca6c 100644 --- a/examples/hello-world/manifest.yaml +++ b/examples/hello-world/manifest.yaml @@ -6,10 +6,8 @@ gid: 1000 env: HELLO: northstar io: - stdout: - log: - level: DEBUG - tag: hello + stdout: pipe + stderr: pipe mounts: /dev: type: dev diff --git a/examples/inspect/manifest.yaml b/examples/inspect/manifest.yaml index 726a2c8b9b..5c5cf573f5 100644 --- a/examples/inspect/manifest.yaml +++ b/examples/inspect/manifest.yaml @@ -1,17 +1,11 @@ -name: inspect +name: inspect version: 0.0.1 init: /inspect uid: 1000 gid: 1000 io: - stdout: - log: - level: DEBUG - tag: inspect - stderr: - log: - level: WARN - tag: inspect + stdout: pipe + stderr: pipe mounts: /dev: type: dev diff --git a/examples/memeater/manifest.yaml b/examples/memeater/manifest.yaml index 5e9059ce48..7b2e27c0bb 100644 --- a/examples/memeater/manifest.yaml +++ b/examples/memeater/manifest.yaml @@ -23,7 +23,5 @@ mounts: type: bind host: /system io: - stdout: - log: - level: DEBUG - tag: memeater + stdout: pipe + stderr: pipe diff --git a/examples/persistence/manifest.yaml b/examples/persistence/manifest.yaml index 2e5595de42..19dda8c4e9 100644 --- a/examples/persistence/manifest.yaml +++ b/examples/persistence/manifest.yaml @@ -20,7 +20,5 @@ mounts: type: bind host: /system io: - stdout: - log: - level: DEBUG - tag: persistence + stdout: pipe + stderr: pipe diff --git a/examples/seccomp/manifest.yaml b/examples/seccomp/manifest.yaml index 3fb5147248..4156247708 100644 --- a/examples/seccomp/manifest.yaml +++ b/examples/seccomp/manifest.yaml @@ -18,10 +18,8 @@ mounts: type: bind host: /system io: - stdout: - log: - level: DEBUG - tag: seccomp + stdout: pipe + stderr: pipe seccomp: profile: - default \ No newline at end of file + default diff --git a/northstar-tests/test-container/manifest.yaml b/northstar-tests/test-container/manifest.yaml index c284090315..d0ee9c39fe 100644 --- a/northstar-tests/test-container/manifest.yaml +++ b/northstar-tests/test-container/manifest.yaml @@ -36,14 +36,9 @@ mounts: dir: test options: nosuid,nodev,noexec io: - stdout: - log: - level: DEBUG - tag: test-container - stderr: - log: - level: DEBUG - tag: test-container + stdout: pipe + stderr: pipe + rlimits: nproc: soft: 10000 diff --git a/northstar-tests/tests/tests.rs b/northstar-tests/tests/tests.rs index 1f8542a943..867ebd1c70 100644 --- a/northstar-tests/tests/tests.rs +++ b/northstar-tests/tests/tests.rs @@ -350,7 +350,7 @@ test!(container_shall_only_have_configured_fds, { client() .start_with_args(TEST_CONTAINER, ["inspect"]) .await?; - assume("/proc/self/fd/0: /dev/pts", 5).await?; + assume("/proc/self/fd/0: /dev/null", 5).await?; assume("/proc/self/fd/1: /dev/pts", 5).await?; assume("/proc/self/fd/2: /dev/pts", 5).await?; assume("total: 3", 5).await?; diff --git a/northstar/src/npk/manifest.rs b/northstar/src/npk/manifest.rs index e4ea5e5da6..fb7ec9c56f 100644 --- a/northstar/src/npk/manifest.rs +++ b/northstar/src/npk/manifest.rs @@ -60,7 +60,8 @@ pub struct Manifest { /// Resource limits pub rlimits: Option>, /// IO configuration - pub io: Option, + #[serde(default, skip_serializing_if = "Io::is_default")] + pub io: Io, /// Optional custom data. The runtime doesnt use this. pub custom: Option, } @@ -97,7 +98,6 @@ impl Manifest { || self.seccomp.is_some() || self.capabilities.is_some() || self.suppl_groups.is_some() - || self.io.is_some() { return Err(Error::Invalid( "Resource containers must not define any of the following manifest entries:\ @@ -444,31 +444,37 @@ pub enum Mount { } /// IO configuration for stdin, stdout, stderr -#[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize, JsonSchema)] +#[derive(Default, Clone, Eq, PartialEq, Debug, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Io { /// stdout configuration - #[serde(skip_serializing_if = "Option::is_none")] - pub stdout: Option, + pub stdout: Output, /// stderr configuration - #[serde(skip_serializing_if = "Option::is_none")] - pub stderr: Option, + pub stderr: Output, +} + +impl Io { + /// Returns true if the io configuration is the default one + pub fn is_default(&self) -> bool { + self.stdout == Output::default() && self.stderr == Output::default() + } } /// Io redirection for stdout/stderr #[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize, JsonSchema)] pub enum Output { - /// Inherit the runtimes stdout/stderr + /// Discard output + #[serde(rename = "discard")] + Discard, + /// Forward output to the logging system with level and optional tag #[serde(rename = "pipe")] Pipe, - /// Forward output to the logging system with level and optional tag - #[serde(rename = "log")] - Log { - /// Level - level: Level, - /// Tag - tag: String, - }, +} + +impl Default for Output { + fn default() -> Output { + Output::Discard + } } /// Log level @@ -840,7 +846,7 @@ mounts: options: noexec autostart: critical seccomp: - allow: + allow: fork: any waitpid: any cgroups: @@ -1099,10 +1105,7 @@ seccomp: capabilities: - CAP_NET_ADMIN io: - stdout: - log: - level: DEBUG - tag: test + stdout: pipe stderr: pipe cgroups: memory: diff --git a/northstar/src/runtime/fork/forker/impl.rs b/northstar/src/runtime/fork/forker/impl.rs index 96c06f16ed..608e10cc84 100644 --- a/northstar/src/runtime/fork/forker/impl.rs +++ b/northstar/src/runtime/fork/forker/impl.rs @@ -28,12 +28,21 @@ use nix::{ }; use std::{ collections::HashMap, - os::unix::{net::UnixStream, prelude::RawFd}, + os::unix::{ + net::UnixStream, + prelude::{AsRawFd, RawFd}, + }, thread::ThreadId, }; use tokio::select; -type Inits = HashMap; +type Inits = HashMap; + +struct InitComm { + pid: Pid, + message: AsyncMessage, + share_fds: UnixStream, +} /// Entry point of the forker process pub async fn run( @@ -84,8 +93,20 @@ pub async fn run( create(&mut inits, init, console_fd) } - Some(Message::ExecRequest { container, path, args, env }) => { - let (response, exit) = exec(&mut inits, container, path, args, env).await; + Some(Message::ExecRequest { container, path, args, env, stdout, stderr }) => { + // This file descriptor corresponds to either /dev/null or some pty + let stdout_fd = nix::fcntl::open( + &stdout, + nix::fcntl::OFlag::O_RDWR, + nix::sys::stat::Mode::empty(), + ).expect("Failed to open pty"); + let stderr_fd = nix::fcntl::open( + &stderr, + nix::fcntl::OFlag::O_RDWR, + nix::sys::stat::Mode::empty(), + ).expect("Failed to open pty"); + + let (response, exit) = exec(&mut inits, container, path, args, env, stdout_fd, stderr_fd).await; exits.push(exit); @@ -113,15 +134,17 @@ fn create(inits: &mut Inits, init: Init, console_fd: Option) -> Message { // TODO: Check if container is already there... let container = init.container.clone(); - let mut socket_pair = socket_pair().expect("Failed to create socket pair"); + let mut sockets = socket_pair().expect("Failed to create socket pair"); let mut init_pid_channel = Channel::new(); + let mut fd_share_pair = socket_pair().expect("Failed to create socket pair"); let trampoline_pid = fork(|| { set_log_target("northstar::forker-trampoline".into()); util::set_parent_death_signal(Signal::SIGKILL); let mut init_pid_tx = init_pid_channel.write_end(); - let socket = socket_pair.snd(); + let socket = sockets.snd(); + let fd_share = fd_share_pair.snd(); // Create pid namespace debug!("Creating pid namespace"); @@ -132,7 +155,7 @@ fn create(inits: &mut Inits, init: Init, console_fd: Option) -> Message { debug!("Forking init of {}", init.container); let init_pid = fork(|| { // Dive into init and never return - init.run(socket, console_fd); + init.run(socket, console_fd, fd_share); }) .expect("Failed to fork init"); @@ -167,10 +190,18 @@ fn create(inits: &mut Inits, init: Init, console_fd: Option) -> Message { debug!("Created container {} with pid {}", container, pid); - let socket = socket_pair.fst(); + let socket = sockets.fst(); + let fd_share = fd_share_pair.fst(); let forker_socket: AsyncMessage = socket.try_into().expect("Failed to create AsyncMessage"); - inits.insert(container, (pid, forker_socket)); + inits.insert( + container, + InitComm { + pid, + message: forker_socket, + share_fds: fd_share, + }, + ); Message::CreateResult { init: pid } } @@ -182,8 +213,10 @@ async fn exec( path: NonNullString, args: Vec, env: Vec, + stdout_fd: RawFd, + stderr_fd: RawFd, ) -> (Message, impl Future) { - if let Some((init_pid, mut init_socket)) = inits.remove(&container) { + if let Some(mut init) = inits.remove(&container) { debug!( "Forwarding exec request for container {}: {}", container, @@ -193,9 +226,18 @@ async fn exec( let path = path.into(); let args = args.iter().cloned().map(Into::into).collect(); let env = env.iter().cloned().map(Into::into).collect(); + let message = init::Message::Exec { path, args, env }; - init_socket.send(message).await.expect("Failed to send"); - match init_socket.recv().await.expect("Failed to receive") { + + init.message.send(message).await.expect("Failed to send"); + + // Sends the stdout and stderr FDs to the init process + share_fds::send_fd(init.share_fds.as_raw_fd(), stdout_fd).expect("Failed to send"); + share_fds::send_fd(init.share_fds.as_raw_fd(), stderr_fd).expect("Failed to send"); + unistd::close(stdout_fd).expect("Failed to close stdout_fd"); + unistd::close(stderr_fd).expect("Failed to close stderr_fd"); + + match init.message.recv().await.expect("Failed to receive") { Some(init::Message::Forked { .. }) => (), _ => panic!("Unexpected init message"), } @@ -203,21 +245,21 @@ async fn exec( // Construct a future that waits to the init to signal a exit of it's child // Afterwards reap the init process which should have exitted already let exit = async move { - match init_socket.recv().await { + match init.message.recv().await { Ok(Some(init::Message::Exit { pid: _, exit_status, })) => { // Reap init process - debug!("Reaping init process of {} ({})", container, init_pid); - waitpid(unistd::Pid::from_raw(init_pid as i32), None) + debug!("Reaping init process of {} ({})", container, init.pid); + waitpid(unistd::Pid::from_raw(init.pid as i32), None) .expect("Failed to reap init process"); (container, exit_status) } Ok(None) | Err(_) => { // Reap init process - debug!("Reaping init process of {} ({})", container, init_pid); - waitpid(unistd::Pid::from_raw(init_pid as i32), None) + debug!("Reaping init process of {} ({})", container, init.pid); + waitpid(unistd::Pid::from_raw(init.pid as i32), None) .expect("Failed to reap init process"); (container, ExitStatus::Exit(-1)) } diff --git a/northstar/src/runtime/fork/forker/messages.rs b/northstar/src/runtime/fork/forker/messages.rs index a9a7078098..05937a2fc2 100644 --- a/northstar/src/runtime/fork/forker/messages.rs +++ b/northstar/src/runtime/fork/forker/messages.rs @@ -1,3 +1,5 @@ +use std::path::PathBuf; + use super::init::Init; use crate::{ api::model::NonNullString, @@ -21,6 +23,8 @@ pub enum Message { path: NonNullString, args: Vec, env: Vec, + stdout: PathBuf, + stderr: PathBuf, }, ExecResult, // TODO } diff --git a/northstar/src/runtime/fork/forker/mod.rs b/northstar/src/runtime/fork/forker/mod.rs index 539d0d7758..def66b47d0 100644 --- a/northstar/src/runtime/fork/forker/mod.rs +++ b/northstar/src/runtime/fork/forker/mod.rs @@ -135,6 +135,8 @@ impl Forker { path: &Path, args: Vec, env: Vec, + stdout: PathBuf, + stderr: PathBuf, ) -> Result<(), Error> { let path = NonNullString::try_from(path.display().to_string()).unwrap(); self.request( @@ -143,6 +145,8 @@ impl Forker { path, args, env, + stdout, + stderr, }, None, ) diff --git a/northstar/src/runtime/fork/init/mod.rs b/northstar/src/runtime/fork/init/mod.rs index ca1842b5aa..a7c3e370e5 100644 --- a/northstar/src/runtime/fork/init/mod.rs +++ b/northstar/src/runtime/fork/init/mod.rs @@ -4,7 +4,10 @@ use crate::{ npk::manifest::{Capability, RLimitResource, RLimitValue}, runtime::{ fork::util::{self, fork, set_child_subreaper, set_log_target, set_process_name}, - ipc::message::{ReceiveMessage, SendMessage}, + ipc::{ + message::{ReceiveMessage, SendMessage}, + share_fds, + }, ExitStatus, Pid, }, seccomp::AllowList, @@ -27,7 +30,6 @@ use std::{ collections::{HashMap, HashSet}, env, ffi::CString, - fs::File, os::unix::{ net::UnixStream, prelude::{AsRawFd, RawFd}, @@ -69,16 +71,17 @@ pub struct Init { } impl Init { - pub fn run(self, mut forker: UnixStream, console_fd: Option) -> ! { + pub fn run( + self, + mut forker: UnixStream, + console_fd: Option, + shared_fds: UnixStream, + ) -> ! { set_log_target(format!("northstar::init::{}", self.container)); // Become a subreaper set_child_subreaper(true); - // Setup pty - // Open the pty before entering the mount namespace and processing of mounts - let pty = self.open_pty(); - // Set the process name to init. This process inherited the process name // from the runtime set_process_name(&format!("init-{}", self.container)); @@ -135,18 +138,26 @@ impl Init { env.push(CString::new(format!("NORTHSTAR_CONSOLE={}", fd)).unwrap()); } + let stdout_fd = share_fds::recv_fd(shared_fds.as_raw_fd()) + .expect("Failed to receive stdout fd"); + let stderr_fd = share_fds::recv_fd(shared_fds.as_raw_fd()) + .expect("Failed to receive stderr fd"); + // Start new process inside the container let pid = fork(|| { util::set_parent_death_signal(Signal::SIGKILL); - // TODO: use /dev/null as stdin - unistd::dup2(pty.as_raw_fd(), nix::libc::STDIN_FILENO) - .expect("Failed to dup2"); - unistd::dup2(pty.as_raw_fd(), nix::libc::STDOUT_FILENO) - .expect("Failed to dup2"); - unistd::dup2(pty.as_raw_fd(), nix::libc::STDERR_FILENO) - .expect("Failed to dup2"); - drop(pty); + let dev_null = nix::fcntl::open( + "/dev/null", + nix::fcntl::OFlag::O_RDONLY | nix::fcntl::OFlag::O_CLOEXEC, + nix::sys::stat::Mode::empty(), + ) + .unwrap(); + unistd::dup2(dev_null, nix::libc::STDIN_FILENO).expect("Failed to dup2"); + unistd::dup2(stdout_fd, nix::libc::STDOUT_FILENO).expect("Failed to dup2"); + unistd::dup2(stderr_fd, nix::libc::STDERR_FILENO).expect("Failed to dup2"); + unistd::close(stdout_fd).expect("Failed to close"); + unistd::close(stderr_fd).expect("Failed to close"); // Set seccomp filter if let Some(ref filter) = self.seccomp { @@ -162,6 +173,10 @@ impl Init { }) .expect("Failed to spawn child process"); + // close fds + unistd::close(stdout_fd).expect("Failed to close"); + unistd::close(stderr_fd).expect("Failed to close"); + let message = Message::Forked { pid }; forker.send(&message).expect("Failed to send fork result"); @@ -315,16 +330,6 @@ impl Init { caps::set(None, caps::CapSet::Effective, &all).expect("Failed to reset effective caps"); } - /// Wait for pty and open it - fn open_pty(&self) -> File { - debug!("Opening pty {}", self.pty.display()); - std::fs::OpenOptions::new() - .read(true) - .write(true) - .open(&self.pty) - .unwrap_or_else(|e| panic!("Failed to open {}: {}", self.pty.display(), e)) - } - /// Execute list of mount calls fn mount(&self) { for mount in &self.mounts { diff --git a/northstar/src/runtime/ipc/share_fds.rs b/northstar/src/runtime/ipc/share_fds.rs index 034ad1a34f..8853467bce 100644 --- a/northstar/src/runtime/ipc/share_fds.rs +++ b/northstar/src/runtime/ipc/share_fds.rs @@ -7,17 +7,38 @@ fn os_err(err: nix::Error) -> std::io::Error { std::io::Error::from_raw_os_error(err as i32) } -pub async fn send(socket: &mut UnixStream, fd: RawFd) -> std::io::Result<()> { - let sock = socket.as_raw_fd(); - let iov = &[uio::IoVec::from_slice(&[42])]; +/// Blocking call for sending a file descriptor through a socket +pub fn send_fd(socket: RawFd, fd: RawFd) -> std::io::Result<()> { + let iov = &[uio::IoVec::from_slice(&[b'\0'])]; let fds = [fd]; let cmsg = [socket::ControlMessage::ScmRights(&fds)]; let flags = socket::MsgFlags::empty(); - socket::sendmsg(sock, iov, &cmsg, flags, None).map_err(os_err)?; + socket::sendmsg(socket, iov, &cmsg, flags, None).map_err(os_err)?; Ok(()) } +/// Blocking call for receiving a file descriptor through a socket +pub fn recv_fd(socket: RawFd) -> std::io::Result { + let sock = socket.as_raw_fd(); + let mut buf = [0u8; 1]; + let iov = &[uio::IoVec::from_mut_slice(&mut buf)]; + let mut cmsg_buffer = nix::cmsg_space!(RawFd); + let flags = socket::MsgFlags::empty(); + + let message = socket::recvmsg(sock, iov, Some(&mut cmsg_buffer), flags).map_err(os_err)?; + + match message.cmsgs().next().expect("failed to receive fds") { + socket::ControlMessageOwned::ScmRights(fds) => Ok(fds[0]), + _ => panic!("Failed to receive fds"), + } +} + +pub async fn send(socket: &mut UnixStream, fd: RawFd) -> std::io::Result<()> { + let sock = socket.as_raw_fd(); + send_fd(sock, fd) +} + pub async fn recv(socket: &mut UnixStream) -> std::io::Result { let sock = socket.as_raw_fd(); let mut buf = [0u8; 1]; diff --git a/northstar/src/runtime/state.rs b/northstar/src/runtime/state.rs index 99bc2551ac..cdbec4ac5d 100644 --- a/northstar/src/runtime/state.rs +++ b/northstar/src/runtime/state.rs @@ -533,7 +533,25 @@ impl State { env.iter().map(|s| s.as_str()).join(", ") ); - if let Err(e) = self.launcher.exec(container, &path, args, env).await { + let output_file = |out: &crate::npk::manifest::Output| -> PathBuf { + match out { + crate::npk::manifest::Output::Discard => PathBuf::from("/dev/null"), + crate::npk::manifest::Output::Pipe => pty_slave_name.clone(), + } + }; + + if let Err(e) = self + .launcher + .exec( + container, + &path, + args, + env, + output_file(&manifest.io.stdout), + output_file(&manifest.io.stderr), + ) + .await + { warn!("Failed to exec {} ({}): {}", container, pid, e); debug.destroy().await.expect("Failed to destroy debug"); cgroups.destroy().await;