From b94a5e241095a55a6838970148d296e109b2afd1 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 6 Sep 2019 15:29:03 +0200 Subject: [PATCH 1/3] utils: use the user session for systemd when running as rootless, use the user session bus. It is already implemented in the pkg/cgroups so just re-use it. Signed-off-by: Giuseppe Scrivano --- utils/utils_supported.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/utils/utils_supported.go b/utils/utils_supported.go index 8b0ba44384..8bc232179d 100644 --- a/utils/utils_supported.go +++ b/utils/utils_supported.go @@ -3,6 +3,8 @@ package utils import ( + "github.com/containers/libpod/pkg/cgroups" + "github.com/containers/libpod/pkg/rootless" systemdDbus "github.com/coreos/go-systemd/dbus" "github.com/godbus/dbus" ) @@ -10,9 +12,19 @@ import ( // RunUnderSystemdScope adds the specified pid to a systemd scope func RunUnderSystemdScope(pid int, slice string, unitName string) error { var properties []systemdDbus.Property - conn, err := systemdDbus.New() - if err != nil { - return err + var conn *systemdDbus.Conn + var err error + + if rootless.IsRootless() { + conn, err = cgroups.GetUserConnection(rootless.GetRootlessUID()) + if err != nil { + return err + } + } else { + conn, err = systemdDbus.New() + if err != nil { + return err + } } properties = append(properties, systemdDbus.PropSlice(slice)) properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) From afd0818326aa37f03a3bc74f0269a06a403db16d Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 6 Sep 2019 15:30:30 +0200 Subject: [PATCH 2/3] rootless: automatically create a systemd scope when running in rootless mode and using systemd as cgroup manager create automatically a systemd scope when the user doesn't own the current cgroup. This solves a couple of issues: on cgroup v2 it is necessary that a process before it can moved to a different cgroup tree must be in a directory owned by the unprivileged user. This is not always true, e.g. when creating a session with su -l. Closes: https://github.com/containers/libpod/issues/3937 Also, for running systemd in a container it was before necessary to specify "systemd-run --scope --user podman ...", now this is done automatically as part of this PR. Signed-off-by: Giuseppe Scrivano --- cmd/podman/main_local.go | 49 ++++++++++++++++++----- libpod/oci_internal_linux.go | 54 ++++++++++++++++---------- pkg/cgroups/cgroups_supported.go | 62 ++++++++++++++++++++++++++++++ pkg/cgroups/cgroups_unsupported.go | 6 +++ 4 files changed, 140 insertions(+), 31 deletions(-) diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go index 0feba609bd..56874baad5 100644 --- a/cmd/podman/main_local.go +++ b/cmd/podman/main_local.go @@ -5,6 +5,7 @@ package main import ( "context" + "fmt" "log/syslog" "os" "runtime/pprof" @@ -18,6 +19,7 @@ import ( "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/tracing" "github.com/containers/libpod/pkg/util" + "github.com/containers/libpod/utils" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -120,6 +122,10 @@ func profileOff(cmd *cobra.Command) error { } func setupRootless(cmd *cobra.Command, args []string) error { + if !rootless.IsRootless() { + return nil + } + matches, err := rootless.ConfigurationMatches() if err != nil { return err @@ -128,9 +134,6 @@ func setupRootless(cmd *cobra.Command, args []string) error { logrus.Warningf("the current user namespace doesn't match the configuration in /etc/subuid or /etc/subgid") logrus.Warningf("you can use `%s system migrate` to recreate the user namespace and restart the containers", os.Args[0]) } - if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") { - return nil - } podmanCmd := cliconfig.PodmanCommand{ Command: cmd, @@ -139,6 +142,39 @@ func setupRootless(cmd *cobra.Command, args []string) error { Remote: remoteclient, } + runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd) + if err != nil { + return errors.Wrapf(err, "could not get runtime") + } + defer runtime.DeferredShutdown(false) + + // do it only after podman has already re-execed and running with uid==0. + if os.Geteuid() == 0 { + ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() + if err != nil { + return err + } + + if !ownsCgroup { + unitName := fmt.Sprintf("podman-%d.scope", os.Getpid()) + if err := utils.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil { + conf, err := runtime.GetConfig() + if err != nil { + return err + } + if conf.CgroupManager == libpod.SystemdCgroupsManager { + logrus.Warnf("Failed to add podman to systemd sandbox cgroup: %v", err) + } else { + logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err) + } + } + } + } + + if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") { + return nil + } + pausePidPath, err := util.GetRootlessPauseProcessPidPath() if err != nil { return errors.Wrapf(err, "could not get pause process pid file path") @@ -158,13 +194,6 @@ func setupRootless(cmd *cobra.Command, args []string) error { } // if there is no pid file, try to join existing containers, and create a pause process. - - runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd) - if err != nil { - return errors.Wrapf(err, "could not get runtime") - } - defer runtime.DeferredShutdown(false) - ctrs, err := runtime.GetRunningContainers() if err != nil { logrus.Errorf(err.Error()) diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go index f9e935d868..4df1e4010f 100644 --- a/libpod/oci_internal_linux.go +++ b/libpod/oci_internal_linux.go @@ -21,6 +21,7 @@ import ( "github.com/containers/libpod/pkg/cgroups" "github.com/containers/libpod/pkg/errorhandling" "github.com/containers/libpod/pkg/lookup" + "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/util" "github.com/containers/libpod/utils" "github.com/coreos/go-systemd/activation" @@ -359,35 +360,46 @@ func startCommandGivenSelinux(cmd *exec.Cmd) error { // moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup // it then signals for conmon to start by sending nonse data down the start fd func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error { + mustCreateCgroup := true // If cgroup creation is disabled - just signal. if ctr.config.NoCgroups { - return writeConmonPipeData(startFd) + mustCreateCgroup = false } - cgroupParent := ctr.CgroupParent() - if r.cgroupManager == SystemdCgroupsManager { - unitName := createUnitName("libpod-conmon", ctr.ID()) - - realCgroupParent := cgroupParent - splitParent := strings.Split(cgroupParent, "/") - if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { - realCgroupParent = splitParent[len(splitParent)-1] + if rootless.IsRootless() { + ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() + if err != nil { + return err } + mustCreateCgroup = !ownsCgroup + } - logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) - if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { - logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) - if err != nil { - logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + if mustCreateCgroup { + cgroupParent := ctr.CgroupParent() + if r.cgroupManager == SystemdCgroupsManager { + unitName := createUnitName("libpod-conmon", ctr.ID()) + + realCgroupParent := cgroupParent + splitParent := strings.Split(cgroupParent, "/") + if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { + realCgroupParent = splitParent[len(splitParent)-1] + } + + logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) + if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { + logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) + } } else { - // we need to remove this defer and delete the cgroup once conmon exits - // maybe need a conmon monitor? - if err := control.AddPid(cmd.Process.Pid); err != nil { + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) + if err != nil { logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else { + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + if err := control.AddPid(cmd.Process.Pid); err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } } } } diff --git a/pkg/cgroups/cgroups_supported.go b/pkg/cgroups/cgroups_supported.go index fcd44dfc80..2a36777d43 100644 --- a/pkg/cgroups/cgroups_supported.go +++ b/pkg/cgroups/cgroups_supported.go @@ -3,8 +3,15 @@ package cgroups import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" "sync" "syscall" + + "github.com/pkg/errors" ) var ( @@ -25,3 +32,58 @@ func IsCgroup2UnifiedMode() (bool, error) { }) return isUnified, isUnifiedErr } + +// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the +// current cgroup. +func UserOwnsCurrentSystemdCgroup() (bool, error) { + uid := os.Geteuid() + + cgroup2, err := IsCgroup2UnifiedMode() + if err != nil { + return false, err + } + + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return false, errors.Wrapf(err, "open file /proc/self/cgroup") + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + parts := strings.SplitN(line, ":", 3) + + if len(parts) < 3 { + continue + } + + var cgroupPath string + + if cgroup2 { + cgroupPath = filepath.Join(cgroupRoot, parts[2]) + } else { + if parts[1] != "name=systemd" { + continue + } + cgroupPath = filepath.Join(cgroupRoot, "systemd", parts[2]) + } + + st, err := os.Stat(cgroupPath) + if err != nil { + return false, err + } + s := st.Sys() + if s == nil { + return false, fmt.Errorf("error stat cgroup path %s", cgroupPath) + } + + if int(s.(*syscall.Stat_t).Uid) != uid { + return false, nil + } + } + if err := scanner.Err(); err != nil { + return false, errors.Wrapf(err, "parsing file /proc/self/cgroup") + } + return true, nil +} diff --git a/pkg/cgroups/cgroups_unsupported.go b/pkg/cgroups/cgroups_unsupported.go index 9dc196e42a..cd140fbf3c 100644 --- a/pkg/cgroups/cgroups_unsupported.go +++ b/pkg/cgroups/cgroups_unsupported.go @@ -6,3 +6,9 @@ package cgroups func IsCgroup2UnifiedMode() (bool, error) { return false, nil } + +// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the +// current cgroup. +func UserOwnsCurrentSystemdCgroup() (bool, error) { + return false, nil +} From 7e88bf7fd0207783e8feecb7ec7206df96897f4e Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 9 Sep 2019 11:12:45 +0200 Subject: [PATCH 3/3] rootless: run pause process in its own scope Signed-off-by: Giuseppe Scrivano --- cmd/podman/main_local.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go index 56874baad5..cad2566158 100644 --- a/cmd/podman/main_local.go +++ b/cmd/podman/main_local.go @@ -6,9 +6,11 @@ package main import ( "context" "fmt" + "io/ioutil" "log/syslog" "os" "runtime/pprof" + "strconv" "strings" "syscall" @@ -121,6 +123,24 @@ func profileOff(cmd *cobra.Command) error { return nil } +func movePauseProcessToScope() error { + pausePidPath, err := util.GetRootlessPauseProcessPidPath() + if err != nil { + return errors.Wrapf(err, "could not get pause process pid file path") + } + + data, err := ioutil.ReadFile(pausePidPath) + if err != nil { + return errors.Wrapf(err, "cannot read pause pid file") + } + pid, err := strconv.ParseUint(string(data), 10, 0) + if err != nil { + return errors.Wrapf(err, "cannot parse pid file %s", pausePidPath) + } + + return utils.RunUnderSystemdScope(int(pid), "user.slice", "podman-pause.scope") +} + func setupRootless(cmd *cobra.Command, args []string) error { if !rootless.IsRootless() { return nil @@ -206,6 +226,17 @@ func setupRootless(cmd *cobra.Command, args []string) error { } became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths) + if err := movePauseProcessToScope(); err != nil { + conf, err := runtime.GetConfig() + if err != nil { + return err + } + if conf.CgroupManager == libpod.SystemdCgroupsManager { + logrus.Warnf("Failed to add pause process to systemd sandbox cgroup: %v", err) + } else { + logrus.Debugf("Failed to add pause process to systemd sandbox cgroup: %v", err) + } + } if err != nil { logrus.Errorf(err.Error()) os.Exit(1)