From 83e4c8645b0a3359c4c08421d03d96e5f526712d Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 11 Nov 2021 21:03:53 +0900 Subject: [PATCH] Support recursive mount attrs ("rro", "rnosuid", "rnodev", ...) The new mount option "rro" makes the mount point recursively read-only, by calling `mount_setattr(2)` with `MOUNT_ATTR_RDONLY` and `AT_RECURSIVE`. https://man7.org/linux/man-pages/man2/mount_setattr.2.html Requires kernel >= 5.12. The "rro" option string conforms to the proposal in util-linux/util-linux Issue 1501. Fix issue 2823 Similary, this commit also adds the following mount options: - rrw - r[no]{suid,dev,exec,relatime,atime,strictatime,diratime,symfollow} - [no]symfollow Signed-off-by: Akihiro Suda --- libcontainer/configs/mount.go | 6 ++ libcontainer/rootfs_linux.go | 18 ++++- libcontainer/specconv/spec_linux.go | 61 +++++++++++++-- libcontainer/utils/syscallutil/syscallutil.go | 57 ++++++++++++++ tests/integration/mounts_recursive.bats | 78 +++++++++++++++++++ 5 files changed, 211 insertions(+), 9 deletions(-) create mode 100644 libcontainer/utils/syscallutil/syscallutil.go create mode 100644 tests/integration/mounts_recursive.bats diff --git a/libcontainer/configs/mount.go b/libcontainer/configs/mount.go index 3f315f7186c..8107d7277cf 100644 --- a/libcontainer/configs/mount.go +++ b/libcontainer/configs/mount.go @@ -30,6 +30,12 @@ type Mount struct { // Relabel source if set, "z" indicates shared, "Z" indicates unshared. Relabel string `json:"relabel"` + // Mount properties to set recursively (AT_RECURSIVE), see mount_setattr(2) + RecAttrSet uint64 `json:"ret_attr_set"` + + // Mount properties to clear recursively (AT_RECURSIVE), see mount_setattr(2) + RecAttrClr uint64 `json:"rec_attr_clr"` + // Extensions are additional flags that are specific to runc. Extensions int `json:"extensions"` diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 9578e442867..ec0235fdf3d 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -21,6 +21,7 @@ import ( "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/userns" "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runc/libcontainer/utils/syscallutil" "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" @@ -474,7 +475,6 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error { return err } } - if m.Relabel != "" { if err := label.Validate(m.Relabel); err != nil { return err @@ -498,6 +498,9 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error { } return mountPropagate(m, rootfs, mountLabel, mountFd) } + if err := setRecAttr(m, rootfs); err != nil { + return err + } return nil } @@ -1113,3 +1116,16 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string, mountFd } return nil } + +func setRecAttr(m *configs.Mount, rootfs string) error { + if m.RecAttrSet == 0 && m.RecAttrClr == 0 { + return nil + } + return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { + attr := syscallutil.MountAttr{ + AttrSet: m.RecAttrSet, + AttrClr: m.RecAttrClr, + } + return syscallutil.MountSetattr(-1, procfd, syscallutil.AT_RECURSIVE, &attr) + }) +} diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 44b035a86e6..ec41f16d073 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -18,6 +18,7 @@ import ( "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/seccomp" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runc/libcontainer/utils/syscallutil" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" @@ -332,7 +333,7 @@ func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error) // return nil, fmt.Errorf("mount destination %s is not absolute", m.Destination) logrus.Warnf("mount destination %s is not absolute. Support for non-absolute mount destinations will be removed in a future release.", m.Destination) } - flags, pgflags, data, ext := parseMountOptions(m.Options) + flags, pgflags, data, recAttrSet, recAttrClr, ext := parseMountOptions(m.Options) source := m.Source device := m.Type if flags&unix.MS_BIND != 0 { @@ -351,6 +352,8 @@ func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error) Data: data, Flags: flags, PropagationFlags: pgflags, + RecAttrSet: recAttrSet, + RecAttrClr: recAttrClr, Extensions: ext, }, nil } @@ -760,13 +763,15 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error { } // parseMountOptions parses the string and returns the flags, propagation -// flags and any mount data that it contains. -func parseMountOptions(options []string) (int, []int, string, int) { +// flags, any mount data that it contains, and {recAttrSet, recAttrClr, extFlags}. +func parseMountOptions(options []string) (int, []int, string, uint64, uint64, int) { var ( - flag int - pgflag []int - data []string - extFlags int + flag int + pgflag []int + data []string + recAttrSet uint64 + recAttrClr uint64 + extFlags int ) flags := map[string]struct { clear bool @@ -796,6 +801,7 @@ func parseMountOptions(options []string) (int, []int, string, int) { "norelatime": {true, unix.MS_RELATIME}, "nostrictatime": {true, unix.MS_STRICTATIME}, "nosuid": {false, unix.MS_NOSUID}, + "nosymfollow": {false, unix.MS_NOSYMFOLLOW}, // since kernel 5.10 "rbind": {false, unix.MS_BIND | unix.MS_REC}, "relatime": {false, unix.MS_RELATIME}, "remount": {false, unix.MS_REMOUNT}, @@ -805,6 +811,7 @@ func parseMountOptions(options []string) (int, []int, string, int) { "strictatime": {false, unix.MS_STRICTATIME}, "suid": {true, unix.MS_NOSUID}, "sync": {false, unix.MS_SYNCHRONOUS}, + "symfollow": {true, unix.MS_NOSYMFOLLOW}, // since kernel 5.10 } propagationFlags := map[string]int{ "private": unix.MS_PRIVATE, @@ -816,6 +823,33 @@ func parseMountOptions(options []string) (int, []int, string, int) { "rslave": unix.MS_SLAVE | unix.MS_REC, "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, } + + // See https://man7.org/linux/man-pages/man2/mount_setattr.2.html + recAttrFlags := map[string]struct { + clear bool + flag uint64 + }{ + "rro": {false, syscallutil.MOUNT_ATTR_RDONLY}, + "rrw": {true, syscallutil.MOUNT_ATTR_RDONLY}, + "rnosuid": {false, syscallutil.MOUNT_ATTR_NOSUID}, + "rsuid": {true, syscallutil.MOUNT_ATTR_NOSUID}, + "rnodev": {false, syscallutil.MOUNT_ATTR_NODEV}, + "rdev": {true, syscallutil.MOUNT_ATTR_NODEV}, + "rnoexec": {false, syscallutil.MOUNT_ATTR_NOEXEC}, + "rexec": {true, syscallutil.MOUNT_ATTR_NOEXEC}, + "rnodiratime": {false, syscallutil.MOUNT_ATTR_NODIRATIME}, + "rdiratime": {true, syscallutil.MOUNT_ATTR_NODIRATIME}, + "rrelatime": {false, syscallutil.MOUNT_ATTR_RELATIME}, + "rnorelatime": {true, syscallutil.MOUNT_ATTR_RELATIME}, + "rnoatime": {false, syscallutil.MOUNT_ATTR_NOATIME}, + "ratime": {true, syscallutil.MOUNT_ATTR_NOATIME}, + "rstrictatime": {false, syscallutil.MOUNT_ATTR_STRICTATIME}, + "rnostrictatime": {true, syscallutil.MOUNT_ATTR_STRICTATIME}, + "rnosymfollow": {false, syscallutil.MOUNT_ATTR_NOSYMFOLLOW}, // since kernel 5.14 + "rsymfollow": {true, syscallutil.MOUNT_ATTR_NOSYMFOLLOW}, // since kernel 5.14 + // No support for MOUNT_ATTR_IDMAP yet (needs UserNS FD) + } + extensionFlags := map[string]struct { clear bool flag int @@ -834,6 +868,17 @@ func parseMountOptions(options []string) (int, []int, string, int) { } } else if f, exists := propagationFlags[o]; exists && f != 0 { pgflag = append(pgflag, f) + } else if f, exists := recAttrFlags[o]; exists { + if f.clear { + recAttrClr |= f.flag + } else { + recAttrSet |= f.flag + if f.flag&syscallutil.MOUNT_ATTR__ATIME == syscallutil.MOUNT_ATTR__ATIME { + // https://man7.org/linux/man-pages/man2/mount_setattr.2.html + // "cannot simply specify the access-time setting in attr_set, but must also include MOUNT_ATTR__ATIME in the attr_clr field." + recAttrClr |= syscallutil.MOUNT_ATTR__ATIME + } + } } else if f, exists := extensionFlags[o]; exists && f.flag != 0 { if f.clear { extFlags &= ^f.flag @@ -844,7 +889,7 @@ func parseMountOptions(options []string) (int, []int, string, int) { data = append(data, o) } } - return flag, pgflag, strings.Join(data, ","), extFlags + return flag, pgflag, strings.Join(data, ","), recAttrSet, recAttrClr, extFlags } func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) { diff --git a/libcontainer/utils/syscallutil/syscallutil.go b/libcontainer/utils/syscallutil/syscallutil.go new file mode 100644 index 00000000000..ec9aca66fb6 --- /dev/null +++ b/libcontainer/utils/syscallutil/syscallutil.go @@ -0,0 +1,57 @@ +// Package syscallutil provdes addenda to golang.org/x/sys/unix +package syscallutil + +import ( + "unsafe" + + "golang.org/x/sys/unix" +) + +// nolint +const ( + AT_EMPTY_PATH = unix.AT_EMPTY_PATH + AT_RECURSIVE = 0x8000 // https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/fcntl.h#L112 + AT_SYMLINK_NOFOLLOW = unix.AT_SYMLINK_NOFOLLOW + AT_NO_AUTOMOUNT = unix.AT_NO_AUTOMOUNT + MOUNT_ATTR_RDONLY = 0x00000001 // since kernel 5.12, https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/mount.h#L113 + MOUNT_ATTR_NOSUID = 0x00000002 // since kernel 5.12 + MOUNT_ATTR_NODEV = 0x00000004 // since kernel 5.12 + MOUNT_ATTR_NOEXEC = 0x00000008 // since kernel 5.12 + MOUNT_ATTR__ATIME = 0x00000070 // since kernel 5.12 + MOUNT_ATTR_RELATIME = 0x00000000 // since kernel 5.12 + MOUNT_ATTR_NOATIME = 0x00000010 // since kernel 5.12 + MOUNT_ATTR_STRICTATIME = 0x00000020 // since kernel 5.12 + MOUNT_ATTR_NODIRATIME = 0x00000080 // since kernel 5.12 + MOUNT_ATTR_IDMAP = 0x00100000 // since kernel 5.12 + MOUNT_ATTR_NOSYMFOLLOW = 0x00200000 // since kernel 5.14, https://github.com/torvalds/linux/blob/v5.14/include/uapi/linux/mount.h#L123 + MOUNT_ATTR_SIZE_VER0 = 32 // https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/mount.h#L135 +) + +// MountAttr corresponds to struct mount_attr, version 0, appeared in kernel 5.12. +// https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/mount.h#L124-L132 +type MountAttr struct { + AttrSet uint64 // __u64 attr_set + AttrClr uint64 // __u64 attr_clr + Propagation uint64 // __u64 propagation + UsernsFd uint64 // __u64 userns_fd +} + +// MountSetattr is a wrapper for mount_setattr(2). +// +// int syscall(SYS_mount_setattr, int dirfd, const char *pathname, unsigned int flags, struct mount_attr *attr, size_t size); +// +// Requires kernel >= 5.12. +// https://man7.org/linux/man-pages/man2/mount_setattr.2.html +func MountSetattr(dirfd int, pathname string, flags uint, attr *MountAttr) error { + pathnamePtr, err := unix.BytePtrFromString(pathname) + if err != nil { + return err + } + _, _, errno := unix.Syscall6(unix.SYS_MOUNT_SETATTR, + uintptr(dirfd), uintptr(unsafe.Pointer(pathnamePtr)), uintptr(flags), + uintptr(unsafe.Pointer(attr)), unsafe.Sizeof(*attr), 0) + if errno != 0 { + return errno + } + return nil +} diff --git a/tests/integration/mounts_recursive.bats b/tests/integration/mounts_recursive.bats new file mode 100644 index 00000000000..b3ce579fc02 --- /dev/null +++ b/tests/integration/mounts_recursive.bats @@ -0,0 +1,78 @@ +#!/usr/bin/env bats + +load helpers + +TESTVOLUME="${BATS_RUN_TMPDIR}/mounts_recursive" + +function setup_volume() { + # requires root (in the current user namespace) to mount tmpfs outside runc + requires root + + mkdir -p "${TESTVOLUME}" + mount -t tmpfs none "${TESTVOLUME}" + echo "foo" >"${TESTVOLUME}/foo" + + mkdir "${TESTVOLUME}/subvol" + mount -t tmpfs none "${TESTVOLUME}/subvol" + echo "bar" >"${TESTVOLUME}/subvol/bar" +} + +function teardown_volume() { + umount -R "${TESTVOLUME}" +} + +function setup() { + setup_volume + setup_busybox +} + +function teardown() { + teardown_volume + teardown_bundle +} + +@test "runc run [rbind,ro mount is read-only but not recursively]" { + update_config ".mounts += [{source: \"${TESTVOLUME}\" , destination: \"/mnt\", options: [\"rbind\",\"ro\"]}]" + + runc run -d --console-socket "$CONSOLE_SOCKET" test_rbind_ro + [ "$status" -eq 0 ] + + runc exec test_rbind_ro touch /mnt/foo + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] + + runc exec test_rbind_ro touch /mnt/subvol/bar + [ "$status" -eq 0 ] +} + +@test "runc run [rbind,rro mount is recursively read-only]" { + requires_kernel 5.12 + update_config ".mounts += [{source: \"${TESTVOLUME}\" , destination: \"/mnt\", options: [\"rbind\",\"rro\"]}]" + + runc run -d --console-socket "$CONSOLE_SOCKET" test_rbind_rro + [ "$status" -eq 0 ] + + runc exec test_rbind_rro touch /mnt/foo + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] + + runc exec test_rbind_rro touch /mnt/subvol/bar + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] +} + +@test "runc run [rbind,ro,rro mount is recursively read-only too]" { + requires_kernel 5.12 + update_config ".mounts += [{source: \"${TESTVOLUME}\" , destination: \"/mnt\", options: [\"rbind\",\"ro\",\"rro\"]}]" + + runc run -d --console-socket "$CONSOLE_SOCKET" test_rbind_ro_rro + [ "$status" -eq 0 ] + + runc exec test_rbind_ro_rro touch /mnt/foo + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] + + runc exec test_rbind_ro_rro touch /mnt/subvol/bar + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] +}