Skip to content

Commit

Permalink
cgroup v2: support rootless systemd
Browse files Browse the repository at this point in the history
Tested with both Podman (master) and Moby (master), on Ubuntu 19.10 .

$ podman --cgroup-manager=systemd run -it --rm --runtime=runc \
  --cgroupns=host --memory 42m --cpus 0.42 --pids-limit 42 alpine
/ # cat /proc/self/cgroup
0::/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/memory.max
44040192
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/cpu.max
42000 100000
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/pids.max
42

Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed Apr 28, 2020
1 parent 23ebc61 commit cfbcc3c
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 35 deletions.
22 changes: 8 additions & 14 deletions libcontainer/cgroups/systemd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,13 @@ func ExpandSlice(slice string) (string, error) {

// getDbusConnection lazy initializes systemd dbus connection
// and returns it
func getDbusConnection() (*systemdDbus.Conn, error) {
func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) {
connOnce.Do(func() {
connDbus, connErr = systemdDbus.New()
if rootless {
connDbus, connErr = NewUserSystemdDbus()
} else {
connDbus, connErr = systemdDbus.New()
}
})
return connDbus, connErr
}
Expand Down Expand Up @@ -103,12 +107,7 @@ func isUnitExists(err error) bool {
return false
}

func startUnit(unitName string, properties []systemdDbus.Property) error {
dbusConnection, err := getDbusConnection()
if err != nil {
return err
}

func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []systemdDbus.Property) error {
statusChan := make(chan string, 1)
if _, err := dbusConnection.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
select {
Expand All @@ -129,12 +128,7 @@ func startUnit(unitName string, properties []systemdDbus.Property) error {
return nil
}

func stopUnit(unitName string) error {
dbusConnection, err := getDbusConnection()
if err != nil {
return err
}

func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error {
statusChan := make(chan string, 1)
if _, err := dbusConnection.StopUnit(unitName, "replace", statusChan); err == nil {
select {
Expand Down
103 changes: 103 additions & 0 deletions libcontainer/cgroups/systemd/user.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// +build linux

package systemd

import (
"bufio"
"bytes"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"

systemdDbus "github.com/coreos/go-systemd/v22/dbus"
dbus "github.com/godbus/dbus/v5"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/pkg/errors"
)

// NewUserSystemdDbus creates a connection for systemd user-instance.
func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
addr, err := DetectUserDbusSessionBusAddress()
if err != nil {
return nil, err
}
uid, err := DetectUID()
if err != nil {
return nil, err
}

return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
conn, err := dbus.Dial(addr)
if err != nil {
return nil, errors.Wrapf(err, "error while dialing %q", addr)
}
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
err = conn.Auth(methods)
if err != nil {
conn.Close()
return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%s", addr, uid)
}
if err = conn.Hello(); err != nil {
conn.Close()
return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%s", addr, uid)
}
return conn, nil
})
}

// DetectUID detects UID from the OwnerUID field of `busctl --user status`
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
//
// Otherwise returns os.Getuid() .
func DetectUID() (int, error) {
if !system.RunningInUserNS() {
return os.Getuid(), nil
}
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
if err != nil {
return -1, errors.Wrap(err, "could not execute `busctl --user --no-pager status`")
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(s, "OwnerUID=") {
uidStr := strings.TrimPrefix(s, "OwnerUID=")
i, err := strconv.Atoi(uidStr)
if err != nil {
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
}
return i, nil
}
}
return 0, nil
}

// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
// Otherwise parses the value from `systemctl --user show-environment` .
func DetectUserDbusSessionBusAddress() (string, error) {
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
return env, nil
}
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
busPath := filepath.Join(xdr, "bus")
if _, err := os.Stat(busPath); err == nil {
busAddress := "unix:path=" + busPath
return busAddress, nil
}
}
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
if err != nil {
return "", errors.Wrap(err, "could not execute `systemctl --user --no-pager show-environment`")
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
}
}
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`")
}
14 changes: 11 additions & 3 deletions libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ func (m *LegacyManager) Apply(pid int) error {
properties = append(properties, resourcesProperties...)
properties = append(properties, c.SystemdProps...)

if err := startUnit(unitName, properties); err != nil {
dbusConnection, err := getDbusConnection(false)
if err != nil {
return err
}
if err := startUnit(dbusConnection, unitName, properties); err != nil {
return err
}

Expand Down Expand Up @@ -213,8 +217,12 @@ func (m *LegacyManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()

dbusConnection, err := getDbusConnection(false)
if err != nil {
return err
}
unitName := getUnitName(m.Cgroups)
if err := stopUnit(unitName); err != nil {
if err := stopUnit(dbusConnection, unitName); err != nil {
return nil
}
m.Paths = make(map[string]string)
Expand Down Expand Up @@ -371,7 +379,7 @@ func (m *LegacyManager) Set(container *configs.Config) error {
if err != nil {
return err
}
dbusConnection, err := getDbusConnection()
dbusConnection, err := getDbusConnection(false)
if err != nil {
return err
}
Expand Down
76 changes: 62 additions & 14 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"math"
"os"
"path/filepath"
"strconv"
"strings"
"sync"

Expand All @@ -14,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
)

type unifiedManager struct {
Expand Down Expand Up @@ -97,6 +99,9 @@ func (m *unifiedManager) Apply(pid int) error {
return cgroups.WriteCgroupProc(m.path, pid)
}

if m.rootless {
slice = "user.slice"
}
if c.Parent != "" {
slice = c.Parent
}
Expand Down Expand Up @@ -140,16 +145,20 @@ func (m *unifiedManager) Apply(pid int) error {
properties = append(properties, resourcesProperties...)
properties = append(properties, c.SystemdProps...)

if err := startUnit(unitName, properties); err != nil {
dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return err
}
if err := startUnit(dbusConnection, unitName, properties); err != nil {
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
}

_, err = m.GetUnifiedPath()
if err != nil {
return err
}
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
return err
return errors.Wrapf(err, "error while creating directory %q", m.path)
}
return nil
}
Expand All @@ -161,13 +170,17 @@ func (m *unifiedManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()

dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return err
}
unitName := getUnitName(m.cgroups)
if err := stopUnit(unitName); err != nil {
if err := stopUnit(dbusConnection, unitName); err != nil {
return nil
}

// XXX this is probably not needed, systemd should handle it
err := os.Remove(m.path)
err = os.Remove(m.path)
if err != nil && !os.IsNotExist(err) {
return err
}
Expand All @@ -190,31 +203,66 @@ func (m *unifiedManager) GetPaths() map[string]string {
return paths
}

// getSliceFull value is used in GetUnifiedPath.
// The value is incompatible with systemdDbus.PropSlice.
func (m *unifiedManager) getSliceFull() (string, error) {
c := m.cgroups
slice := "system.slice"
if m.rootless {
slice = "user.slice"
}
if c.Parent != "" {
var err error
slice, err = ExpandSlice(c.Parent)
if err != nil {
return "", err
}
}

if m.rootless {
dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return "", err
}
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
if err != nil {
return "", err
}
managerCG, err := strconv.Unquote(managerCGQuoted)
if err != nil {
return "", err
}
slice = filepath.Join(managerCG, slice)
}

// an example of the final slice in rootless: "/user.slice/user-1001.slice/[email protected]/user.slice"
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/[email protected]/" prefix NOT to be specified.
return slice, nil
}

func (m *unifiedManager) GetUnifiedPath() (string, error) {
m.mu.Lock()
defer m.mu.Unlock()
if m.path != "" {
return m.path, nil
}

c := m.cgroups
slice := "system.slice"
if c.Parent != "" {
slice = c.Parent
}

slice, err := ExpandSlice(slice)
sliceFull, err := m.getSliceFull()
if err != nil {
return "", err
}

path := filepath.Join(slice, getUnitName(c))
c := m.cgroups
path := filepath.Join(sliceFull, getUnitName(c))
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
if err != nil {
return "", err
}
m.path = path

// an example of the final path in rootless:
// "/sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
return m.path, nil
}

Expand Down Expand Up @@ -263,12 +311,12 @@ func (m *unifiedManager) Set(container *configs.Config) error {
if err != nil {
return err
}
dbusConnection, err := getDbusConnection()
dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return err
}
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
return err
return errors.Wrap(err, "error while setting unit properties")
}

fsMgr, err := m.fsManager()
Expand Down
12 changes: 12 additions & 0 deletions libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,18 @@ func SystemdCgroups(l *LinuxFactory) error {
return nil
}

// RootlessSystemdCgroups is rootless version of SystemdCgroups.
func RootlessSystemdCgroups(l *LinuxFactory) error {
if !systemd.IsRunningSystemd() {
return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager")
}

if !cgroups.IsCgroup2UnifiedMode() {
return fmt.Errorf("cgroup v2 not enabled on this host, can't use systemd (rootless) as cgroups manager")
}
return systemdCgroupV2(l, true)
}

func cgroupfs2(l *LinuxFactory, rootless bool) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless)
Expand Down
4 changes: 0 additions & 4 deletions rootless_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ func shouldUseRootlessCgroupManager(context *cli.Context) (bool, error) {
if b != nil {
return *b, nil
}

if context.GlobalBool("systemd-cgroup") {
return false, nil
}
}
if os.Geteuid() != 0 {
return true, nil
Expand Down
3 changes: 3 additions & 0 deletions utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
if context.GlobalBool("systemd-cgroup") {
if systemd.IsRunningSystemd() {
cgroupManager = libcontainer.SystemdCgroups
if rootlessCg {
cgroupManager = libcontainer.RootlessSystemdCgroups
}
} else {
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
}
Expand Down

0 comments on commit cfbcc3c

Please sign in to comment.