From fa3354dc03eca6f65d54cce9baf86e7eb7bc0fb8 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 14 Jun 2022 17:19:10 -0700 Subject: [PATCH] libct: fix mounting via wrong proc fd Due to a bug in commit 9c444070ec7, when the user and mount namespaces are used, and the bind mount is followed by the cgroup mount in the spec, the cgroup is mounted using the bind mount's mount fd. This can be reproduced with podman 4.1 (when configured to use runc): $ podman run --uidmap 0:100:10000 quay.io/libpod/testimage:20210610 mount Error: /home/kir/git/runc/runc: runc create failed: unable to start container process: error during container init: error mounting "cgroup" to rootfs at "/sys/fs/cgroup": mount /proc/self/fd/11:/sys/fs/cgroup/systemd (via /proc/self/fd/12), flags: 0x20502f: operation not permitted: OCI permission denied or manually with the spec mounts containing something like this: { "destination": "/etc/resolv.conf", "type": "bind", "source": "/userdata/resolv.conf", "options": [ "bind" ] }, { "destination": "/sys/fs/cgroup", "type": "cgroup", "source": "cgroup", "options": [ "rprivate", "nosuid", "noexec", "nodev", "relatime", "ro" ] } The issue was not found earlier since it requires using userns, and even then mount fd is ignored by mountToRootfs, except for bind mounts, and all the bind mounts have mountfd set, except for the case of cgroup v1's /sys/fs/cgroup which is internally transformed into a bunch of bind mounts. This is a minimal fix for the issue, suitable for backporting. A test case is added which reproduces the issue without the fix applied. Fixes: 9c444070ec7 ("Open bind mount sources from the host userns") Signed-off-by: Kir Kolyshkin (cherry picked from commit d370e3c04660201e72ba6968342ce964c31a2d7f) Signed-off-by: Kir Kolyshkin --- libcontainer/rootfs_linux.go | 2 ++ tests/integration/userns.bats | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 3cfd2bf1e43..ec7638e4d51 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -80,6 +80,8 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig, mountFds []int) (err // Therefore, we can access mountFds[i] without any concerns. if mountFds != nil && mountFds[i] != -1 { mountConfig.fd = &mountFds[i] + } else { + mountConfig.fd = nil } if err := mountToRootfs(m, mountConfig); err != nil { diff --git a/tests/integration/userns.bats b/tests/integration/userns.bats index b1188592f14..eca1e0ce58a 100644 --- a/tests/integration/userns.bats +++ b/tests/integration/userns.bats @@ -64,3 +64,22 @@ function teardown() { runc exec test_busybox stat /tmp/mount-1/foo.txt /tmp/mount-2/foo.txt [ "$status" -eq 0 ] } + +# Issue fixed by https://github.com/opencontainers/runc/pull/3510. +@test "userns with bind mount before a cgroupfs mount" { + # This can only be reproduced on cgroup v1 (and no cgroupns) due to the + # way it is mounted in such case (a bunch of of bind mounts). + requires cgroups_v1 + + # Add a bind mount right before the /sys/fs/cgroup mount, + # and make sure cgroupns is not enabled. + update_config ' .mounts |= map(if .destination == "/sys/fs/cgroup" then ({"source": "source-accessible/dir", "destination": "/tmp/mount-1", "options": ["bind"]}, .) else . end) + | .linux.namespaces -= [{"type": "cgroup"}]' + + runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + [ "$status" -eq 0 ] + + # Make sure this is real cgroupfs. + runc exec test_busybox cat /sys/fs/cgroup/{pids,memory}/tasks + [ "$status" -eq 0 ] +}