Skip to content

Commit

Permalink
cgroupns: private cgroupns on cgroupv1 breaks --systemd
Browse files Browse the repository at this point in the history
On cgroup v1 we need to mount only the systemd named hierarchy as
writeable, so we configure the OCI runtime to mount /sys/fs/cgroup as
read-only and on top of that bind mount /sys/fs/cgroup/systemd.

But when we use a private cgroupns, we cannot do that since we don't
know the final cgroup path.

Also, do not override the mount if there is already one for
/sys/fs/cgroup/systemd.

Closes: #17727

Signed-off-by: Giuseppe Scrivano <[email protected]>
  • Loading branch information
giuseppe committed Mar 14, 2023
1 parent 01fd5bc commit 2d1f4a8
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 39 deletions.
84 changes: 45 additions & 39 deletions libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,17 +242,17 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
return err
}

hasCgroupNs := false
for _, ns := range c.config.Spec.Linux.Namespaces {
if ns.Type == spec.CgroupNamespace {
hasCgroupNs = true
break
}
}

if unified {
g.RemoveMount("/sys/fs/cgroup")

hasCgroupNs := false
for _, ns := range c.config.Spec.Linux.Namespaces {
if ns.Type == spec.CgroupNamespace {
hasCgroupNs = true
break
}
}

var systemdMnt spec.Mount
if hasCgroupNs {
systemdMnt = spec.Mount{
Expand All @@ -271,40 +271,46 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
}
g.AddMount(systemdMnt)
} else {
hasSystemdMount := MountExists(mounts, "/sys/fs/cgroup/systemd")
if hasCgroupNs && !hasSystemdMount {
return errors.New("cgroup namespace is not supported with cgroup v1 and systemd mode")
}
mountOptions := []string{"bind", "rprivate"}
skipMount := false

var statfs unix.Statfs_t
if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
if errors.Is(err, os.ErrNotExist) {
// If the mount is missing on the host, we cannot bind mount it so
// just skip it.
skipMount = true
}
mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
} else {
if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
mountOptions = append(mountOptions, "nodev")
}
if statfs.Flags&unix.MS_NOEXEC == unix.MS_NOEXEC {
mountOptions = append(mountOptions, "noexec")
}
if statfs.Flags&unix.MS_NOSUID == unix.MS_NOSUID {
mountOptions = append(mountOptions, "nosuid")
}
if statfs.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
mountOptions = append(mountOptions, "ro")

if !hasSystemdMount {
skipMount := hasSystemdMount
var statfs unix.Statfs_t
if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
if errors.Is(err, os.ErrNotExist) {
// If the mount is missing on the host, we cannot bind mount it so
// just skip it.
skipMount = true
}
mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
} else {
if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
mountOptions = append(mountOptions, "nodev")
}
if statfs.Flags&unix.MS_NOEXEC == unix.MS_NOEXEC {
mountOptions = append(mountOptions, "noexec")
}
if statfs.Flags&unix.MS_NOSUID == unix.MS_NOSUID {
mountOptions = append(mountOptions, "nosuid")
}
if statfs.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
mountOptions = append(mountOptions, "ro")
}
}
}
if !skipMount {
systemdMnt := spec.Mount{
Destination: "/sys/fs/cgroup/systemd",
Type: "bind",
Source: "/sys/fs/cgroup/systemd",
Options: mountOptions,
if !skipMount {
systemdMnt := spec.Mount{
Destination: "/sys/fs/cgroup/systemd",
Type: "bind",
Source: "/sys/fs/cgroup/systemd",
Options: mountOptions,
}
g.AddMount(systemdMnt)
g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
}
g.AddMount(systemdMnt)
g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
}
}

Expand Down
7 changes: 7 additions & 0 deletions test/system/250-systemd.bats
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,13 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
is "${container_uuid}" "${output:0:32}" "UUID should be first 32 chars of Container id"
}

@test "podman --systemd fails on cgroup v1 with a private cgroupns" {
skip_if_cgroupsv2

run_podman 126 run --systemd=always --cgroupns=private $IMAGE true
assert "$output" =~ ".*cgroup namespace is not supported with cgroup v1 and systemd mode"
}

# https://github.com/containers/podman/issues/13153
@test "podman rootless-netns slirp4netns process should be in different cgroup" {
is_rootless || skip "only meaningful for rootless"
Expand Down
9 changes: 9 additions & 0 deletions test/system/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,15 @@ function skip_if_cgroupsv1() {
fi
}

#######################
# skip_if_cgroupsv2 # ...with an optional message
#######################
function skip_if_cgroupsv2() {
if is_cgroupsv2; then
skip "${1:-test requires cgroupsv1}"
fi
}

######################
# skip_if_rootless_cgroupsv1 # ...with an optional message
######################
Expand Down

0 comments on commit 2d1f4a8

Please sign in to comment.