From 011b357f918f29b9e493eae971b94e379f7e6fe9 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 8 Oct 2020 15:25:06 -0400 Subject: [PATCH] Store cgroup manager on a per-container basis When we create a container, we assign a cgroup parent based on the current cgroup manager in use. This parent is only usable with the cgroup manager the container is created with, so if the default cgroup manager is later changed or overridden, the container will not be able to start. To solve this, store the cgroup manager that created the container in container configuration, so we can guarantee a container with a systemd cgroup parent will always be started with systemd cgroups. Unfortunately, this is very difficult to test in CI, due to the fact that we hard-code cgroup manager on all invocations of Podman in CI. Fixes #7830 Signed-off-by: Matthew Heon --- libpod/container.go | 19 ++++++++++++++++--- libpod/container_config.go | 5 ++++- libpod/container_inspect.go | 3 ++- libpod/container_internal_linux.go | 7 ++++--- libpod/define/container_inspect.go | 3 +++ libpod/oci_conmon_linux.go | 12 ++++-------- libpod/runtime_ctr.go | 1 + 7 files changed, 34 insertions(+), 16 deletions(-) diff --git a/libpod/container.go b/libpod/container.go index 9b4ccbd5f8..01419500ef 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -888,9 +888,22 @@ func (c *Container) NamespacePath(linuxNS LinuxNS) (string, error) { //nolint:in return fmt.Sprintf("/proc/%d/ns/%s", c.state.PID, linuxNS.String()), nil } +// CgroupManager returns the cgroup manager used by the given container. +func (c *Container) CgroupManager() string { + cgroupManager := c.config.CgroupManager + if cgroupManager == "" { + cgroupManager = c.runtime.config.Engine.CgroupManager + } + return cgroupManager +} + // CGroupPath returns a cgroups "path" for a given container. func (c *Container) CGroupPath() (string, error) { + cgroupManager := c.CgroupManager() + switch { + case c.config.NoCgroups || c.config.CgroupsMode == "disabled": + return "", errors.Wrapf(define.ErrNoCgroups, "this container is not creating cgroups") case c.config.CgroupsMode == cgroupSplit: if c.config.CgroupParent != "" { return "", errors.Errorf("cannot specify cgroup-parent with cgroup-mode %q", cgroupSplit) @@ -906,9 +919,9 @@ func (c *Container) CGroupPath() (string, error) { return "", errors.Errorf("invalid cgroup for conmon %q", cg) } return strings.TrimSuffix(cg, "/supervisor") + "/container", nil - case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager: + case cgroupManager == config.CgroupfsCgroupsManager: return filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID())), nil - case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager: + case cgroupManager == config.SystemdCgroupsManager: if rootless.IsRootless() { uid := rootless.GetRootlessUID() parts := strings.SplitN(c.config.CgroupParent, "/", 2) @@ -922,7 +935,7 @@ func (c *Container) CGroupPath() (string, error) { } return filepath.Join(c.config.CgroupParent, createUnitName("libpod", c.ID())), nil default: - return "", errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager %s in use", c.runtime.config.Engine.CgroupManager) + return "", errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager %s in use", cgroupManager) } } diff --git a/libpod/container_config.go b/libpod/container_config.go index 3fc058d523..03eee3f019 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -275,13 +275,16 @@ type ContainerMiscConfig struct { StopTimeout uint `json:"stopTimeout,omitempty"` // Time container was created CreatedTime time.Time `json:"createdTime"` + // CgroupManager is the cgroup manager used to create this container. + // If empty, the runtime default will be used. + CgroupManager string `json:"cgroupManager,omitempty"` // NoCgroups indicates that the container will not create CGroups. It is // incompatible with CgroupParent. Deprecated in favor of CgroupsMode. NoCgroups bool `json:"noCgroups,omitempty"` // CgroupsMode indicates how the container will create cgroups // (disabled, no-conmon, enabled). It supersedes NoCgroups. CgroupsMode string `json:"cgroupsMode,omitempty"` - // Cgroup parent of the container + // Cgroup parent of the container. CgroupParent string `json:"cgroupParent"` // LogPath log location LogPath string `json:"logPath"` diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 835dccd713..b8bce12721 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -729,7 +729,7 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named // CGroup parent // Need to check if it's the default, and not print if so. defaultCgroupParent := "" - switch c.runtime.config.Engine.CgroupManager { + switch c.CgroupManager() { case config.CgroupfsCgroupsManager: defaultCgroupParent = CgroupfsDefaultCgroupParent case config.SystemdCgroupsManager: @@ -738,6 +738,7 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named if c.config.CgroupParent != defaultCgroupParent { hostConfig.CgroupParent = c.config.CgroupParent } + hostConfig.CgroupManager = c.CgroupManager() // PID namespace mode pidMode := "" diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 894982973a..96d2dcdd0f 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1968,6 +1968,7 @@ func (c *Container) getOCICgroupPath() (string, error) { if err != nil { return "", err } + cgroupManager := c.CgroupManager() switch { case (rootless.IsRootless() && !unified) || c.config.NoCgroups: return "", nil @@ -1980,14 +1981,14 @@ func (c *Container) getOCICgroupPath() (string, error) { return "", err } return filepath.Join(selfCgroup, "container"), nil - case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager: + case cgroupManager == config.SystemdCgroupsManager: // When the OCI runtime is set to use Systemd as a cgroup manager, it // expects cgroups to be passed as follows: // slice:prefix:name systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID()) logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups) return systemdCgroups, nil - case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager: + case cgroupManager == config.CgroupfsCgroupsManager: cgroupPath, err := c.CGroupPath() if err != nil { return "", err @@ -1995,7 +1996,7 @@ func (c *Container) getOCICgroupPath() (string, error) { logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath) return cgroupPath, nil default: - return "", errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager %s requested", c.runtime.config.Engine.CgroupManager) + return "", errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager %s requested", cgroupManager) } } diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 44c3d515b2..38b3a6686d 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -236,6 +236,9 @@ type InspectContainerHostConfig struct { // include a Mounts field in inspect. // Format: :[:] Binds []string `json:"Binds"` + // CgroupManager is the cgroup manager used by the container. + // At present, allowed values are either "cgroupfs" or "systemd". + CgroupManager string `json:"CgroupManager,omitempty"` // CgroupMode is the configuration of the container's cgroup namespace. // Populated as follows: // private - a cgroup namespace has been created diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index b13d25e229..cfd4b1251f 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -57,7 +57,6 @@ type ConmonOCIRuntime struct { path string conmonPath string conmonEnv []string - cgroupManager string tmpDir string exitsDir string socketsDir string @@ -102,7 +101,6 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime runtime.runtimeFlags = runtimeFlags runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars - runtime.cgroupManager = runtimeCfg.Engine.CgroupManager runtime.tmpDir = runtimeCfg.Engine.TmpDir runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax runtime.noPivot = runtimeCfg.Engine.NoPivotRoot @@ -149,10 +147,6 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits") runtime.socketsDir = filepath.Join(runtime.tmpDir, "socket") - if runtime.cgroupManager != config.CgroupfsCgroupsManager && runtime.cgroupManager != config.SystemdCgroupsManager { - return nil, errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager specified: %s", runtime.cgroupManager) - } - // Create the exit files and attach sockets directories if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil { // The directory is allowed to exist @@ -1325,7 +1319,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p args = append(args, rFlags...) } - if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit { + if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit { args = append(args, "-s") } @@ -1437,8 +1431,10 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec } if mustCreateCgroup { + // TODO: This should be a switch - we are not guaranteed that + // there are only 2 valid cgroup managers cgroupParent := ctr.CgroupParent() - if r.cgroupManager == config.SystemdCgroupsManager { + if ctr.CgroupManager() == config.SystemdCgroupsManager { unitName := createUnitName("libpod-conmon", ctr.ID()) realCgroupParent := cgroupParent diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index abb97293f2..51b4c5f039 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -208,6 +208,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai // Check CGroup parent sanity, and set it if it was not set. // Only if we're actually configuring CGroups. if !ctr.config.NoCgroups { + ctr.config.CgroupManager = r.config.Engine.CgroupManager switch r.config.Engine.CgroupManager { case config.CgroupfsCgroupsManager: if ctr.config.CgroupParent == "" {