diff --git a/crates/libcontainer/src/rootfs/mount.rs b/crates/libcontainer/src/rootfs/mount.rs index ee65e06c8..1dc0cc533 100644 --- a/crates/libcontainer/src/rootfs/mount.rs +++ b/crates/libcontainer/src/rootfs/mount.rs @@ -1,8 +1,8 @@ #[cfg(feature = "v1")] use super::symlink::Symlink; -use super::utils::{find_parent_mount, parse_mount}; +use super::utils::{find_parent_mount, parse_mount, MountOptionConfig}; use crate::{ - syscall::{syscall::create_syscall, Syscall}, + syscall::{linux, syscall::create_syscall, Syscall}, utils, utils::PathBufExt, }; @@ -12,11 +12,14 @@ use anyhow::{bail, Context, Result}; use libcgroups::common::CgroupSetup::{Hybrid, Legacy, Unified}; #[cfg(feature = "v1")] use libcgroups::common::DEFAULT_CGROUP_ROOT; -use nix::{errno::Errno, mount::MsFlags}; +use nix::{dir::Dir, errno::Errno, fcntl::OFlag, mount::MsFlags, sys::stat::Mode}; use oci_spec::runtime::{Mount as SpecMount, MountBuilder as SpecMountBuilder}; use procfs::process::{MountInfo, MountOptFields, Process}; use std::fs::{canonicalize, create_dir_all, OpenOptions}; +use std::mem; +use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; + #[cfg(feature = "v1")] use std::{borrow::Cow, collections::HashMap}; @@ -46,7 +49,7 @@ impl Mount { pub fn setup_mount(&self, mount: &SpecMount, options: &MountOptions) -> Result<()> { log::debug!("Mounting {:?}", mount); - let (flags, data) = parse_mount(mount); + let mut mount_option_config = parse_mount(mount); match mount.typ().as_deref() { Some("cgroup") => { @@ -64,24 +67,29 @@ impl Mount { #[cfg(not(feature = "v2"))] panic!("libcontainer can't run in a Unified cgroup setup without the v2 feature"); #[cfg(feature = "v2")] - self.mount_cgroup_v2(mount, options, flags, &data) + self.mount_cgroup_v2(mount, options, &mount_option_config) .context("failed to mount cgroup v2")? } } } _ => { if *mount.destination() == PathBuf::from("/dev") { + mount_option_config.flags &= !MsFlags::MS_RDONLY; self.mount_into_container( mount, options.root, - flags & !MsFlags::MS_RDONLY, - &data, + &mount_option_config, options.label, ) .with_context(|| format!("failed to mount /dev: {:?}", mount))?; } else { - self.mount_into_container(mount, options.root, flags, &data, options.label) - .with_context(|| format!("failed to mount: {:?}", mount))?; + self.mount_into_container( + mount, + options.root, + &mount_option_config, + options.label, + ) + .with_context(|| format!("failed to mount: {:?}", mount))?; } } } @@ -197,11 +205,16 @@ impl Mount { subsystem_name.into() }; + let mount_options_config = MountOptionConfig { + flags: MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV, + data: data.to_string(), + rec_attr: None, + }; + self.mount_into_container( &subsystem_mount, options.root, - MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV, - &data, + &mount_options_config, options.label, ) .with_context(|| format!("failed to mount {:?}", subsystem_mount)) @@ -271,8 +284,7 @@ impl Mount { &self, cgroup_mount: &SpecMount, options: &MountOptions, - flags: MsFlags, - data: &str, + mount_option_config: &MountOptionConfig, ) -> Result<()> { log::debug!("Mounting cgroup v2 filesystem"); @@ -285,7 +297,12 @@ impl Mount { log::debug!("{:?}", cgroup_mount); if self - .mount_into_container(&cgroup_mount, options.root, flags, data, options.label) + .mount_into_container( + &cgroup_mount, + options.root, + mount_option_config, + options.label, + ) .context("failed to mount into container") .is_err() { @@ -309,11 +326,12 @@ impl Mount { .context("failed to build cgroup bind mount")?; log::debug!("{:?}", bind_mount); + let mut mount_option_config = (*mount_option_config).clone(); + mount_option_config.flags |= MsFlags::MS_BIND; self.mount_into_container( &bind_mount, options.root, - flags | MsFlags::MS_BIND, - data, + &mount_option_config, options.label, ) .context("failed to bind mount cgroup hierarchy")?; @@ -351,18 +369,17 @@ impl Mount { &self, m: &SpecMount, rootfs: &Path, - flags: MsFlags, - data: &str, + mount_option_config: &MountOptionConfig, label: Option<&str>, ) -> Result<()> { let typ = m.typ().as_deref(); - let mut d = data.to_string(); + let mut d = mount_option_config.data.to_string(); if let Some(l) = label { if typ != Some("proc") && typ != Some("sysfs") { - match data.is_empty() { + match mount_option_config.data.is_empty() { true => d = format!("context=\"{}\"", l), - false => d = format!("{},context=\"{}\"", data, l), + false => d = format!("{},context=\"{}\"", mount_option_config.data, l), } } } @@ -402,7 +419,10 @@ impl Mount { PathBuf::from(source) }; - if let Err(err) = self.syscall.mount(Some(&*src), dest, typ, flags, Some(&*d)) { + if let Err(err) = + self.syscall + .mount(Some(&*src), dest, typ, mount_option_config.flags, Some(&*d)) + { if let Some(errno) = err.downcast_ref() { if !matches!(errno, Errno::EINVAL) { bail!("mount of {:?} failed. {}", m.destination(), errno); @@ -410,12 +430,18 @@ impl Mount { } self.syscall - .mount(Some(&*src), dest, typ, flags, Some(data)) + .mount( + Some(&*src), + dest, + typ, + mount_option_config.flags, + Some(&mount_option_config.data), + ) .with_context(|| format!("failed to mount {:?} to {:?}", src, dest))?; } if typ == Some("bind") - && flags.intersects( + && mount_option_config.flags.intersects( !(MsFlags::MS_REC | MsFlags::MS_REMOUNT | MsFlags::MS_BIND @@ -425,10 +451,28 @@ impl Mount { ) { self.syscall - .mount(Some(dest), dest, None, flags | MsFlags::MS_REMOUNT, None) + .mount( + Some(dest), + dest, + None, + mount_option_config.flags | MsFlags::MS_REMOUNT, + None, + ) .with_context(|| format!("Failed to remount: {:?}", dest))?; } + if let Some(mount_attr) = &mount_option_config.rec_attr { + let open_dir = Dir::open(dest, OFlag::O_DIRECTORY, Mode::empty())?; + let dir_fd_pathbuf = PathBuf::from(format!("/proc/self/fd/{}", open_dir.as_raw_fd())); + self.syscall.mount_setattr( + -1, + &dir_fd_pathbuf, + linux::AT_RECURSIVE, + mount_attr, + mem::size_of::(), + )?; + } + Ok(()) } } @@ -462,10 +506,15 @@ mod tests { ]) .build() .unwrap(); - let (flags, data) = parse_mount(mount); + let mount_option_config = parse_mount(mount); assert!(m - .mount_into_container(mount, tmp_dir.path(), flags, &data, Some("defaults")) + .mount_into_container( + mount, + tmp_dir.path(), + &mount_option_config, + Some("defaults") + ) .is_ok()); let want = vec![MountArgs { @@ -495,7 +544,7 @@ mod tests { .options(vec!["ro".to_string()]) .build() .unwrap(); - let (flags, data) = parse_mount(mount); + let mount_option_config = parse_mount(mount); OpenOptions::new() .create(true) .write(true) @@ -503,7 +552,7 @@ mod tests { .unwrap(); assert!(m - .mount_into_container(mount, tmp_dir.path(), flags, &data, None) + .mount_into_container(mount, tmp_dir.path(), &mount_option_config, None) .is_ok()); let want = vec![ @@ -768,8 +817,13 @@ mod tests { let flags = MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV; // act + let mount_option_config = MountOptionConfig { + flags, + data: String::new(), + rec_attr: None, + }; mounter - .mount_cgroup_v2(&spec_cgroup_mount, &mount_opts, flags, "") + .mount_cgroup_v2(&spec_cgroup_mount, &mount_opts, &mount_option_config) .context("failed to mount cgroup v2")?; // assert diff --git a/crates/libcontainer/src/rootfs/utils.rs b/crates/libcontainer/src/rootfs/utils.rs index cfc9c5696..6adff2c4f 100644 --- a/crates/libcontainer/src/rootfs/utils.rs +++ b/crates/libcontainer/src/rootfs/utils.rs @@ -2,7 +2,24 @@ use anyhow::{anyhow, Result}; use nix::{mount::MsFlags, sys::stat::SFlag, NixPath}; use oci_spec::runtime::{LinuxDevice, LinuxDeviceBuilder, LinuxDeviceType, Mount}; use procfs::process::MountInfo; -use std::path::{Path, PathBuf}; +use std::{ + path::{Path, PathBuf}, + str::FromStr, +}; + +use crate::syscall::linux::{self, MountAttrOption}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct MountOptionConfig { + /// Mount Flags. + pub flags: MsFlags, + + /// Mount data applied to the mount. + pub data: String, + + /// RecAttr represents mount properties to be applied recrusively. + pub rec_attr: Option, +} pub fn default_devices() -> Vec { vec![ @@ -66,9 +83,11 @@ pub fn to_sflag(dev_type: LinuxDeviceType) -> SFlag { } } -pub fn parse_mount(m: &Mount) -> (MsFlags, String) { +pub fn parse_mount(m: &Mount) -> MountOptionConfig { let mut flags = MsFlags::empty(); let mut data = Vec::new(); + let mut mount_attr: Option = None; + if let Some(options) = &m.options() { for s in options { if let Some((is_clear, flag)) = match s.as_str() { @@ -112,12 +131,56 @@ pub fn parse_mount(m: &Mount) -> (MsFlags, String) { } else { flags |= flag; } - } else { - data.push(s.as_str()); - }; + continue; + } + + if let Ok(mount_attr_option) = linux::MountAttrOption::from_str(s.as_str()) { + let (is_clear, flag) = match mount_attr_option { + MountAttrOption::MountArrtRdonly(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrNosuid(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrNodev(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrNoexec(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrAtime(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrRelatime(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrNoatime(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrStrictAtime(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrNoDiratime(is_clear, flag) => (is_clear, flag), + MountAttrOption::MountAttrNosymfollow(is_clear, flag) => (is_clear, flag), + }; + + if mount_attr.is_none() { + mount_attr = Some(linux::MountAttr { + attr_set: 0, + attr_clr: 0, + propagation: 0, + userns_fd: 0, + }); + } + + if let Some(mount_attr) = &mut mount_attr { + if is_clear { + mount_attr.attr_clr |= flag; + } else { + mount_attr.attr_set |= flag; + if flag & linux::MOUNT_ATTR__ATIME == flag { + // https://man7.org/linux/man-pages/man2/mount_setattr.2.html + // cannot simply specify the access-time setting in attr_set, but must + // also include MOUNT_ATTR__ATIME in the attr_clr field. + mount_attr.attr_clr |= linux::MOUNT_ATTR__ATIME; + } + } + } + continue; + } + + data.push(s.as_str()); } } - (flags, data.join(",")) + MountOptionConfig { + flags, + data: data.join(","), + rec_attr: mount_attr, + } } /// Find parent mount of rootfs in given mount infos @@ -133,6 +196,8 @@ pub fn find_parent_mount(rootfs: &Path, mount_infos: Vec) -> Result Result { + match option { + "rro" => Ok(MountAttrOption::MountArrtRdonly(false, MOUNT_ATTR_RDONLY)), + "rrw" => Ok(MountAttrOption::MountArrtRdonly(true, MOUNT_ATTR_RDONLY)), + "rnosuid" => Ok(MountAttrOption::MountAttrNosuid(false, MOUNT_ATTR_NOSUID)), + "rsuid" => Ok(MountAttrOption::MountAttrNosuid(true, MOUNT_ATTR_NOSUID)), + "rnodev" => Ok(MountAttrOption::MountAttrNodev(false, MOUNT_ATTR_NODEV)), + "rdev" => Ok(MountAttrOption::MountAttrNodev(true, MOUNT_ATTR_NODEV)), + "rnoexec" => Ok(MountAttrOption::MountAttrNoexec(false, MOUNT_ATTR_NOEXEC)), + "rexec" => Ok(MountAttrOption::MountAttrNoexec(true, MOUNT_ATTR_NOEXEC)), + "rnodiratime" => Ok(MountAttrOption::MountAttrNoDiratime( + false, + MOUNT_ATTR_NODIRATIME, + )), + "rdiratime" => Ok(MountAttrOption::MountAttrNoDiratime( + true, + MOUNT_ATTR_NODIRATIME, + )), + "rrelatime" => Ok(MountAttrOption::MountAttrRelatime( + false, + MOUNT_ATTR_RELATIME, + )), + "rnorelatime" => Ok(MountAttrOption::MountAttrRelatime( + true, + MOUNT_ATTR_RELATIME, + )), + "rnoatime" => Ok(MountAttrOption::MountAttrNoatime(false, MOUNT_ATTR_NOATIME)), + "ratime" => Ok(MountAttrOption::MountAttrNoatime(true, MOUNT_ATTR_NOATIME)), + "rstrictatime" => Ok(MountAttrOption::MountAttrStrictAtime( + false, + MOUNT_ATTR_STRICTATIME, + )), + "rnostrictatime" => Ok(MountAttrOption::MountAttrStrictAtime( + true, + MOUNT_ATTR_STRICTATIME, + )), + "rnosymfollow" => Ok(MountAttrOption::MountAttrNosymfollow( + false, + MOUNT_ATTR_NOSYMFOLLOW, + )), + "rsymfollow" => Ok(MountAttrOption::MountAttrNosymfollow( + true, + MOUNT_ATTR_NOSYMFOLLOW, + )), + // No support for MOUNT_ATTR_IDMAP yet (needs UserNS FD) + _ => Err(anyhow!("Unexpected option.")), + } + } +} + +#[repr(C)] +#[derive(Debug, Clone, PartialEq, Eq)] +/// A structure used as te third argument of mount_setattr(2). +pub struct MountAttr { + /// Mount properties to set. + pub attr_set: u64, + + /// Mount properties to clear. + pub attr_clr: u64, + + /// Mount propagation type. + pub propagation: u64, + + /// User namespace file descriptor. + pub userns_fd: u64, +} + +impl MountAttr { + /// Return MountAttr with the flag raised. + /// This function is used in test code. + pub fn all() -> Self { + MountAttr { + attr_set: MOUNT_ATTR_RDONLY + | MOUNT_ATTR_NOSUID + | MOUNT_ATTR_NODEV + | MOUNT_ATTR_NOEXEC + | MOUNT_ATTR_NODIRATIME + | MOUNT_ATTR_RELATIME + | MOUNT_ATTR_NOATIME + | MOUNT_ATTR_STRICTATIME + | MOUNT_ATTR_NOSYMFOLLOW, + attr_clr: MOUNT_ATTR_RDONLY + | MOUNT_ATTR_NOSUID + | MOUNT_ATTR_NODEV + | MOUNT_ATTR_NOEXEC + | MOUNT_ATTR_NODIRATIME + | MOUNT_ATTR_RELATIME + | MOUNT_ATTR_NOATIME + | MOUNT_ATTR_STRICTATIME + | MOUNT_ATTR_NOSYMFOLLOW + | MOUNT_ATTR__ATIME, + propagation: 0, + userns_fd: 0, + } + } +} + /// Empty structure to implement Command trait for #[derive(Clone)] pub struct LinuxSyscall; @@ -334,6 +483,38 @@ impl Syscall for LinuxSyscall { Err(e) => bail!(e), } } + + fn mount_setattr( + &self, + dirfd: RawFd, + pathname: &Path, + flags: u32, + mount_attr: &MountAttr, + size: libc::size_t, + ) -> Result<()> { + let path_pathbuf = pathname.to_path_buf(); + let path_str = path_pathbuf.to_str(); + let path_c_string = match path_str { + Some(path_str) => CString::new(path_str)?, + None => bail!("Invalid filename"), + }; + let result = unsafe { + // TODO: nix/libc crate hasn't supported mount_setattr system call yet. + syscall!( + Sysno::mount_setattr, + dirfd, + path_c_string.as_ptr(), + flags, + mount_attr as *const MountAttr, + size + ) + }; + + match result { + Ok(_) => Ok(()), + Err(e) => bail!(e), + } + } } #[cfg(test)] diff --git a/crates/libcontainer/src/syscall/syscall.rs b/crates/libcontainer/src/syscall/syscall.rs index d10648244..924339acb 100644 --- a/crates/libcontainer/src/syscall/syscall.rs +++ b/crates/libcontainer/src/syscall/syscall.rs @@ -6,6 +6,7 @@ use std::{any::Any, ffi::OsStr, path::Path, sync::Arc}; use anyhow::Result; use bitflags::bitflags; use caps::{CapSet, CapsHashSet}; +use libc; use nix::{ mount::MsFlags, sched::CloneFlags, @@ -15,7 +16,10 @@ use nix::{ use oci_spec::runtime::LinuxRlimit; -use crate::syscall::{linux::LinuxSyscall, test::TestHelperSyscall}; +use crate::syscall::{ + linux::{LinuxSyscall, MountAttr}, + test::TestHelperSyscall, +}; /// This specifies various kernel/other functionalities required for /// container management @@ -44,6 +48,14 @@ pub trait Syscall { fn chown(&self, path: &Path, owner: Option, group: Option) -> Result<()>; fn set_groups(&self, groups: &[Gid]) -> Result<()>; fn close_range(&self, preserve_fds: i32) -> Result<()>; + fn mount_setattr( + &self, + dirfd: i32, + pathname: &Path, + flags: u32, + mount_attr: &MountAttr, + size: libc::size_t, + ) -> Result<()>; } pub fn create_syscall() -> Box { diff --git a/crates/libcontainer/src/syscall/test.rs b/crates/libcontainer/src/syscall/test.rs index c2d35fafb..2bf7f034f 100644 --- a/crates/libcontainer/src/syscall/test.rs +++ b/crates/libcontainer/src/syscall/test.rs @@ -17,7 +17,7 @@ use nix::{ use oci_spec::runtime::LinuxRlimit; -use super::Syscall; +use super::{linux, Syscall}; #[derive(Clone, PartialEq, Eq, Debug)] pub struct MountArgs { @@ -238,6 +238,17 @@ impl Syscall for TestHelperSyscall { fn close_range(&self, _: i32) -> anyhow::Result<()> { todo!() } + + fn mount_setattr( + &self, + _: i32, + _: &Path, + _: u32, + _: &linux::MountAttr, + _: libc::size_t, + ) -> anyhow::Result<()> { + todo!() + } } impl TestHelperSyscall { diff --git a/tests/rust-integration-tests/integration_test/src/main.rs b/tests/rust-integration-tests/integration_test/src/main.rs index 6bead642a..eaeadec8e 100644 --- a/tests/rust-integration-tests/integration_test/src/main.rs +++ b/tests/rust-integration-tests/integration_test/src/main.rs @@ -2,13 +2,13 @@ mod tests; mod utils; use crate::tests::hooks::get_hooks_tests; +use crate::tests::hostname::get_hostname_test; use crate::tests::lifecycle::{ContainerCreate, ContainerLifecycle}; use crate::tests::linux_ns_itype::get_ns_itype_tests; use crate::tests::pidfile::get_pidfile_test; use crate::tests::readonly_paths::get_ro_paths_test; use crate::tests::seccomp_notify::get_seccomp_notify_test; use crate::tests::tlb::get_tlb_test; -use crate::tests::hostname::get_hostname_test; use crate::utils::support::{set_runtime_path, set_runtimetest_path}; use anyhow::{Context, Result}; use clap::Parser; diff --git a/tests/rust-integration-tests/integration_test/src/tests/mod.rs b/tests/rust-integration-tests/integration_test/src/tests/mod.rs index ae2f3dbbf..64b1939d8 100644 --- a/tests/rust-integration-tests/integration_test/src/tests/mod.rs +++ b/tests/rust-integration-tests/integration_test/src/tests/mod.rs @@ -1,9 +1,9 @@ pub mod cgroups; pub mod hooks; +pub mod hostname; pub mod lifecycle; pub mod linux_ns_itype; pub mod pidfile; pub mod readonly_paths; pub mod seccomp_notify; pub mod tlb; -pub mod hostname;