diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 69632daba2d..5e89285df0a 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -610,6 +610,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, + CreateConsole: process.ConsoleSocket != nil, + ConsoleWidth: process.ConsoleWidth, + ConsoleHeight: process.ConsoleHeight, } if process.NoNewPrivileges != nil { cfg.NoNewPrivileges = *process.NoNewPrivileges @@ -623,9 +626,10 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } - cfg.CreateConsole = process.ConsoleSocket != nil - cfg.ConsoleWidth = process.ConsoleWidth - cfg.ConsoleHeight = process.ConsoleHeight + if cgroups.IsCgroup2UnifiedMode() { + cfg.Cgroup2Path = c.cgroupManager.Path("") + } + return cfg } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index d6b50ff3b60..798e7a84d35 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -70,6 +70,7 @@ type initConfig struct { RootlessEUID bool `json:"rootless_euid,omitempty"` RootlessCgroups bool `json:"rootless_cgroups,omitempty"` SpecState *specs.State `json:"spec_state,omitempty"` + Cgroup2Path string `json:"cgroup2_path,omitempty"` } type initer interface { diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 3d672879263..1d8a5a03601 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -17,6 +17,7 @@ import ( "github.com/moby/sys/mountinfo" "github.com/mrunalp/fileutils" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs2" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/userns" @@ -29,6 +30,14 @@ import ( const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV +type mountConfig struct { + root string + label string + cgroup2Path string + rootlessCgroups bool + cgroupns bool +} + // needsSetupDev returns true if /dev needs to be set up. func needsSetupDev(config *configs.Config) bool { for _, m := range config.Mounts { @@ -48,7 +57,13 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { return newSystemErrorWithCause(err, "preparing rootfs") } - hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP) + mountConfig := &mountConfig{ + root: config.Rootfs, + label: config.MountLabel, + cgroup2Path: iConfig.Cgroup2Path, + rootlessCgroups: iConfig.RootlessCgroups, + cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), + } setupDev := needsSetupDev(config) for _, m := range config.Mounts { for _, precmd := range m.PremountCmds { @@ -56,7 +71,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { return newSystemErrorWithCause(err, "running premount command") } } - if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil { + if err := mountToRootfs(m, mountConfig); err != nil { return newSystemErrorWithCausef(err, "mounting %q to rootfs at %q", m.Source, m.Destination) } @@ -222,7 +237,7 @@ func prepareBindMount(m *configs.Mount, rootfs string) error { return nil } -func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { +func mountCgroupV1(m *configs.Mount, c *mountConfig) error { binds, err := getCgroupMounts(m) if err != nil { return err @@ -242,12 +257,12 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b Data: "mode=755", PropagationFlags: m.PropagationFlags, } - if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil { + if err := mountToRootfs(tmpfs, c); err != nil { return err } for _, b := range binds { - if enableCgroupns { - subsystemPath := filepath.Join(rootfs, b.Destination) + if c.cgroupns { + subsystemPath := filepath.Join(c.root, b.Destination) if err := os.MkdirAll(subsystemPath, 0755); err != nil { return err } @@ -266,7 +281,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b return err } } else { - if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil { + if err := mountToRootfs(b, c); err != nil { return err } } @@ -276,7 +291,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b // symlink(2) is very dumb, it will just shove the path into // the link and doesn't do any checks or relative path // conversion. Also, don't error out if the cgroup already exists. - if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) { + if err := os.Symlink(mc, filepath.Join(c.root, m.Destination, ss)); err != nil && !os.IsExist(err) { return err } } @@ -284,28 +299,39 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b return nil } -func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { - cgroupPath, err := securejoin.SecureJoin(rootfs, m.Destination) +func mountCgroupV2(m *configs.Mount, c *mountConfig) error { + dest, err := securejoin.SecureJoin(c.root, m.Destination) if err != nil { return err } - if err := os.MkdirAll(cgroupPath, 0755); err != nil { + if err := os.MkdirAll(dest, 0755); err != nil { return err } - if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil { + if err := unix.Mount(m.Source, dest, "cgroup2", uintptr(m.Flags), m.Data); err != nil { // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) if err == unix.EPERM || err == unix.EBUSY { - return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "") + src := fs2.UnifiedMountpoint + if c.cgroupns && c.cgroup2Path != "" { + // Emulate cgroupns by bind-mounting + // the container cgroup path rather than + // the whole /sys/fs/cgroup. + src = c.cgroup2Path + } + err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "") + if err == unix.ENOENT && c.rootlessCgroups { + err = nil + } + return err } return err } return nil } -func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { - var ( - dest = m.Destination - ) +func mountToRootfs(m *configs.Mount, c *mountConfig) error { + rootfs := c.root + mountLabel := c.label + dest := m.Destination if !strings.HasPrefix(dest, rootfs) { dest = filepath.Join(rootfs, dest) } @@ -424,9 +450,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b } case "cgroup": if cgroups.IsCgroup2UnifiedMode() { - return mountCgroupV2(m, rootfs, mountLabel, enableCgroupns) + return mountCgroupV2(m, c) } - return mountCgroupV1(m, rootfs, mountLabel, enableCgroupns) + return mountCgroupV1(m, c) default: // ensure that the destination of the mount is resolved of symlinks at mount time because // any previous mounts can invalidate the next mount's destination. diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go index 8a201bc78dd..56bab3bfbfa 100644 --- a/libcontainer/specconv/example.go +++ b/libcontainer/specconv/example.go @@ -2,6 +2,7 @@ package specconv import ( "os" + "path/filepath" "strings" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) { // Fix up mounts. var mounts []specs.Mount for _, mount := range spec.Mounts { - // Ignore all mounts that are under /sys. - if strings.HasPrefix(mount.Destination, "/sys") { + // Replace the /sys mount with an rbind. + if filepath.Clean(mount.Destination) == "/sys" { + mounts = append(mounts, specs.Mount{ + Source: "/sys", + Destination: "/sys", + Type: "none", + Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, + }) continue } @@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) { mount.Options = options mounts = append(mounts, mount) } - // Add the sysfs mount as an rbind. - mounts = append(mounts, specs.Mount{ - Source: "/sys", - Destination: "/sys", - Type: "none", - Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, - }) spec.Mounts = mounts // Remove cgroup settings. diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index c5a927bdfc3..50c4f9bb8ee 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -138,7 +138,8 @@ function setup() { } @test "runc run (blkio weight)" { - requires root cgroups_v2 + requires cgroups_v2 + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup set_cgroups_path update_config '.linux.resources.blockIO |= {"weight": 750}' @@ -235,3 +236,18 @@ function setup() { check_cpu_weight 42 } + +@test "runc run (cgroupv2 mount inside container)" { + requires cgroups_v2 + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + + set_cgroups_path + + runc run -d --console-socket "$CONSOLE_SOCKET" test_cgroups_unified + [ "$status" -eq 0 ] + + # Make sure we don't have any extra cgroups inside + runc exec test_cgroups_unified find /sys/fs/cgroup/ -type d + [ "$status" -eq 0 ] + [ "$(wc -l <<<"$output")" -eq 1 ] +}