Skip to content

Commit

Permalink
libct/cg/sd/v2: rely on systemd to set device rules
Browse files Browse the repository at this point in the history
It seems that the code added by commit b810da1 had cgroup v1
in mind, where runc overwrites the rules set by systemd. It is all
different in v2, because both ebpf programs (systemd's and runc's) have
to say "allow" for the device to get access.

So, when using cgroup v2 with systemd cgroup driver, access to devices
rules for that can't be translated to systemd properties is not possible
at all, and it makes sense to error out (rather than warn) in such case,
as the container won't work as intended.

With this change in mind, provided that runc correctly translates all
the device access rule, and systemd correctly applies those, we no
longer have to create and apply a second eBPF program with own rules.
Let's stop doing that, instead relying on systemd only.

Having two sets of rules (two ebpf programs) for cgroupv2/ebpf is
problematic for two reasons:

1. Both sets should say "ok" for access to be granted (as explained by
   the previous commit).

2. After systemd daemon-reload (which happens during routine systemd
   upgrade) the program runc adds is removed, so it's a time-bomb.

Signed-off-by: Kir Kolyshkin <[email protected]>
  • Loading branch information
kolyshkin committed May 3, 2023
1 parent caf9ea0 commit 021cc72
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 19 deletions.
35 changes: 27 additions & 8 deletions libcontainer/cgroups/devices/systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (

// systemdProperties takes the configured device rules and generates a
// corresponding set of systemd properties to configure the devices correctly.
func systemdProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error) {
func systemdProperties(r *configs.Resources, sdVer int, cgroupVer int) ([]systemdDbus.Property, error) {
if r.SkipDevices {
return nil, nil
}
Expand Down Expand Up @@ -95,12 +95,26 @@ func systemdProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property,
// The only type of rule we can't handle is wildcard-major rules, and
// so we'll give a warning in that case (note that the fallback code
// will insert any rules systemd couldn't handle). What amazing fun.
//
// The properties generated here are applied by systemd. In case of
// cgroup v1 ("device.{allow,deny}" files), runc when overwrites the
// systemd rules, so the correct rules are in effect (except for the
// case when systemd is restarted and our rules gets overwritten, but
// there is nothing we can do here).
//
// For cgroup v2, we only use systemd rules, so we error out if
// we can't translate some rules.

if rule.Major == devices.Wildcard {
// "_ *:n _" rules aren't supported by systemd.
if rule.Minor != devices.Wildcard {
logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule)
w := fmt.Errorf("systemd does not support wildcard-major device rule: %+v", *rule)
if cgroupVer == 2 {
return nil, w
}
logrus.Warnf("temporarily ignoring rule: %v", w)
continue

}

// "_ *:* _" rules just wildcard everything.
Expand All @@ -117,7 +131,11 @@ func systemdProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property,
}
if group == "" {
// Couldn't find a group.
logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule)
w := fmt.Errorf("systemd older than v240 does not support wildcard-minor rules for devices not listed in /proc/devices: +%v", *rule)
if cgroupVer == 2 {
return nil, err
}
logrus.Warnf("temporarily ignoring rule: %v", w)
continue
}
entry.Path = group
Expand All @@ -131,12 +149,13 @@ func systemdProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property,
}
if sdVer < 240 {
// Old systemd versions use stat(2) on path to find out device major:minor
// numbers and type. If the path doesn't exist, it will not add the rule,
// emitting a warning instead.
// Since all of this logic is best-effort anyway (we manually set these
// rules separately to systemd) we can safely skip entries that don't
// have a corresponding path.
// numbers and type. If the path doesn't exist, it will not add the rule.
if _, err := os.Stat(entry.Path); err != nil {
w := fmt.Errorf("systemd older than v240 does not support device rules for non-existing device: %w", err)
if cgroupVer == 2 {
return nil, err
}
logrus.Warnf("temporarily ignoring rule: %v", w)
continue
}
}
Expand Down
6 changes: 3 additions & 3 deletions libcontainer/cgroups/systemd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ var (
isRunningSystemdOnce sync.Once
isRunningSystemd bool

GenerateDeviceProps func(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error)
GenerateDeviceProps func(r *configs.Resources, sdVer, cgroupVer int) ([]systemdDbus.Property, error)
)

// NOTE: This function comes from package github.com/coreos/go-systemd/util
Expand Down Expand Up @@ -342,13 +342,13 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st

// generateDeviceProperties takes the configured device rules and generates a
// corresponding set of systemd properties to configure the devices correctly.
func generateDeviceProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
func generateDeviceProperties(r *configs.Resources, cm *dbusConnManager, cgroupVersion int) ([]systemdDbus.Property, error) {
if GenerateDeviceProps == nil {
if len(r.Devices) > 0 {
return nil, cgroups.ErrDevicesUnsupported
}
return nil, nil
}

return GenerateDeviceProps(r, systemdVersion(cm))
return GenerateDeviceProps(r, systemdVersion(cm), cgroupVersion)
}
2 changes: 1 addition & 1 deletion libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ var legacySubsystems = []subsystem{
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
var properties []systemdDbus.Property

deviceProperties, err := generateDeviceProperties(r, cm)
deviceProperties, err := generateDeviceProperties(r, cm, 1)
if err != nil {
return nil, err
}
Expand Down
15 changes: 8 additions & 7 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,7 @@ func genV2ResourcesProperties(dirPath string, r *configs.Resources, cm *dbusConn

var properties []systemdDbus.Property

// NOTE: This is of questionable correctness because we insert our own
// devices eBPF program later. Two programs with identical rules
// aren't the end of the world, but it is a bit concerning. However
// it's unclear if systemd removes all eBPF programs attached when
// doing SetUnitProperties...
deviceProperties, err := generateDeviceProperties(r, cm)
deviceProperties, err := generateDeviceProperties(r, cm, 2)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -490,7 +485,13 @@ func (m *UnifiedManager) Set(r *configs.Resources) error {
return fmt.Errorf("unable to set unit properties: %w", err)
}

return m.fsMgr.Set(r)
// We back up systemd settings with our own (as we support more
// controllers than systemd), except for devices. To that effect,
// use a shallow copy of struct Resources with SkipDevices set.
rCopy := *r
rCopy.SkipDevices = true
rCopy.Devices = nil
return m.fsMgr.Set(&rCopy)
}

func (m *UnifiedManager) GetPaths() map[string]string {
Expand Down

0 comments on commit 021cc72

Please sign in to comment.