Skip to content

Commit

Permalink
rootless: make /sys/fs/cgroup/* read-only
Browse files Browse the repository at this point in the history
The default rootless spec bind-mounts /sys with "rbind,ro" (because mounting sysfs
requires netns to be unshared), however, it does not make subfilesystems mounted
under /sys read-only.

So, files under /sys/fs/cgroup were unexpectedly writable when they are chmod/chowned
via privileged helpers such as pam_cgfs.

This patch fixes the issue by mounting /sys/fs/cgroup/* as read-only explicitly.

Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed Aug 17, 2018
1 parent 20aff4f commit 28dc2e6
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 17 deletions.
12 changes: 12 additions & 0 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
return err
}
// Add a dummy directory for cgroup2.
// Without this hack, `df` fails with `df: /sys/fs/cgroup/unified: No such file or directory`
// when /sys is bind-mounted from the host to the container (typically when netns is not unshared),
if err := os.MkdirAll(filepath.Join(rootfs, m.Destination, "unified"), 0755); err != nil {
return err
}
for _, b := range binds {
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
return err
Expand Down Expand Up @@ -360,6 +366,12 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
var binds []*configs.Mount

for _, mm := range mounts {
// when /sys is bind-mounted from the host to the container (typically when netns is not unshared),
// mm.Mountpoint can be like "/containers/foo/rootfs/sys/fs/cgroup/systemd/user.slice/user-1001.slice/session-1.scope/foo".
//
if !strings.HasPrefix(mm.Mountpoint, "/sys/fs/cgroup") {
continue
}
dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil {
return nil, err
Expand Down
100 changes: 83 additions & 17 deletions libcontainer/specconv/example.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ package specconv

import (
"os"
"path/filepath"
"sort"
"strings"

"github.com/opencontainers/runc/libcontainer/mount"
"github.com/opencontainers/runtime-spec/specs-go"
)

Expand Down Expand Up @@ -189,33 +192,96 @@ func ToRootless(spec *specs.Spec) {
}}

// Fix up mounts.
var mounts []specs.Mount
for _, mount := range spec.Mounts {
// Ignore all mounts that are under /sys.
if strings.HasPrefix(mount.Destination, "/sys") {
continue
}

mountInfo, _ := mount.GetMounts()
fixupRootlessBindSys(spec, mountInfo)
for i := range spec.Mounts {
mount := &spec.Mounts[i]
// Remove all gid= and uid= mappings.
var options []string
for _, option := range mount.Options {
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
options = append(options, option)
}
}

mount.Options = options
mounts = append(mounts, mount)
}
// Add the sysfs mount as an rbind.
mounts = append(mounts, specs.Mount{
Source: "/sys",
Destination: "/sys",
Type: "none",
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
})
spec.Mounts = mounts

// Remove cgroup settings.
spec.Linux.Resources = nil
}

// fixupRootlessBindSys bind-mounts /sys.
// This is required only when netns is not unshared.
func fixupRootlessBindSys(spec *specs.Spec, mountInfo []*mount.Info) {
// Fix up mounts.
var (
mounts []specs.Mount
mountsUnderSys []specs.Mount
)
for _, m := range spec.Mounts {
// ignore original /sys (cannot be mounted when netns is not unshared)
if filepath.Clean(m.Destination) == "/sys" {
continue
} else if strings.HasPrefix(m.Destination, "/sys") {
// Append all mounts that are under /sys (e.g. /sys/fs/cgroup )later
mountsUnderSys = append(mountsUnderSys, m)
} else {
mounts = append(mounts, m)
}
}
mounts = append(mounts, []specs.Mount{
// Add the sysfs mount as an rbind.
// Note:
// * "ro" does not make submounts read-only recursively.
// * rbind work for sysfs but bind does not.
{
Source: "/sys",
Destination: "/sys",
Type: "none",
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
},
}...)
spec.Mounts = append(mounts, mountsUnderSys...)
var (
maskedPaths []string
maskedPathsWritable = make(map[string]struct{})
)
for _, masked := range spec.Linux.MaskedPaths {
// e.g. when /sys/firmware is masked and /sys/firmware/efi/efivars is in /proc/self/mountinfo,
// we need to mask /sys/firmware/efi/efivars as well.
// (otherwise `df` fails with "df: /sys/firmware/efi/efivars: No such file or directory")
// also, to mask /sys/firmware/efi/efivars, we need to mask /sys/firmware as a writable tmpfs
if strings.HasPrefix(masked, "/sys") {
for _, mi := range mountInfo {
// e.g. mi.Mountpoint = /sys/firmware/efi/efivars, masked = /sys/firmware
if strings.HasPrefix(mi.Mountpoint, masked) {
maskedPathsWritable[masked] = struct{}{}
// mi.Mountpoint is added to maskedPathsWritable for ease of supporting nested case
maskedPathsWritable[mi.Mountpoint] = struct{}{}
}
}
}
if _, ok := maskedPathsWritable[masked]; !ok {
maskedPaths = append(maskedPaths, masked)
}
}
spec.Linux.MaskedPaths = maskedPaths
for _, s := range sortMapKeyStrings(maskedPathsWritable) {
spec.Mounts = append(spec.Mounts,
specs.Mount{
Source: "none",
Destination: s,
Type: "tmpfs",
Options: []string{"nosuid", "noexec", "nodev", "mode=0755"},
})
}
}

func sortMapKeyStrings(m map[string]struct{}) []string {
var ss []string
for s := range m {
ss = append(ss, s)
}
sort.Strings(ss)
return ss
}
16 changes: 16 additions & 0 deletions tests/integration/cgroups.bats
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,19 @@ EOF
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"cgroups_exec"* ]]
}

@test "runc exec write cgroup (limits + cgrouppath + permission on the cgroup dir) fails" {
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup

set_cgroups_path "$BUSYBOX_BUNDLE"
set_resources_limit "$BUSYBOX_BUNDLE"

runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
[ "$status" -eq 0 ]

# In previous version of rootless runc, the whole /sys was just recursively bind-mounted
# but not recursively read-only
runc exec test_cgroups_permissions mkdir -p /sys/fs/cgroup/cpuset/runc-cgroups-integration-test/dummy
[ "$status" -eq 1 ]
[[ ${lines[0]} == *"Read-only file system"* ]]
}
9 changes: 9 additions & 0 deletions tests/integration/mounts.bats
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,12 @@ function teardown() {
[ "$status" -eq 0 ]
[[ "${lines[0]}" =~ '/tmp/bind/config.json' ]]
}

# make sure there is no "df: /foo/bar: No such file or directory" issue in the default configuration
@test "runc run [df]" {
CONFIG=$(jq '.process.args = ["df"]' config.json)
echo "${CONFIG}" >config.json

runc run test_df
[ "$status" -eq 0 ]
}

0 comments on commit 28dc2e6

Please sign in to comment.