Skip to content

Commit

Permalink
Merge pull request #6666 from giuseppe/conmon-delegate
Browse files Browse the repository at this point in the history
podman: add new cgroup mode split
  • Loading branch information
openshift-merge-robot authored Jun 29, 2020
2 parents 771c887 + 6ee5f74 commit 6ac009d
Show file tree
Hide file tree
Showing 13 changed files with 201 additions and 15 deletions.
2 changes: 1 addition & 1 deletion cmd/podman/common/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func GetCreateFlags(cf *ContainerCLIOpts) *pflag.FlagSet {
createFlags.StringVar(
&cf.CGroupsMode,
"cgroups", containerConfig.Cgroups(),
`control container cgroup configuration ("enabled"|"disabled"|"no-conmon")`,
`control container cgroup configuration ("enabled"|"disabled"|"no-conmon"|"split")`,
)
createFlags.StringVar(
&cf.CGroupParent,
Expand Down
4 changes: 4 additions & 0 deletions cmd/podman/containers/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ func createInit(c *cobra.Command) error {
cliVals.Env = env
}

if c.Flag("cgroups").Changed && cliVals.CGroupsMode == "split" && registry.IsRemote() {
return errors.Errorf("the option --cgroups=%q is not supported in remote mode", cliVals.CGroupsMode)
}

// Docker-compatibility: the "-h" flag for run/create is reserved for
// the hostname (see https://github.com/containers/libpod/issues/1367).

Expand Down
5 changes: 4 additions & 1 deletion docs/source/markdown/podman-create.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,12 @@ If the host uses cgroups v1, the default is set to **host**. On cgroups v2 the
**--cgroups**=*mode*

Determines whether the container will create CGroups.
Valid values are *enabled*, *disabled*, *no-conmon*, which the default being *enabled*.
Valid values are *enabled*, *disabled*, *no-conmon*, *split*, which the default being *enabled*.

The *enabled* option will create a new cgroup under the cgroup-parent.
The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**).
The *no-conmon* option disables a new CGroup only for the conmon process.
The *split* option splits the current cgroup in two sub-cgroups: one for conmon and one for the container payload. It is not possible to set *--cgroup-parent* with *split*.

**--cgroup-parent**=*path*

Expand Down
10 changes: 6 additions & 4 deletions docs/source/markdown/podman-run.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,16 @@ Set the cgroup namespace mode for the container.

If the host uses cgroups v1, the default is set to **host**. On cgroups v2, the default is **private**.

**--cgroups**=**enabled**|**disabled**|**no-conmon**
**--cgroups**=**enabled**|**disabled**|**no-conmon**|**split**

Determines whether the container will create CGroups.

Default is **enabled**. The **disabled** option will force the container
to not create CGroups, and thus conflicts with CGroup options
(**--cgroupns** and **--cgroup-parent**).
Default is **enabled**.

The **enabled** option will create a new cgroup under the cgroup-parent.
The **disabled** option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**).
The **no-conmon** option disables a new CGroup only for the **conmon** process.
The **split** option splits the current cgroup in two sub-cgroups: one for conmon and one for the container payload. It is not possible to set **--cgroup-parent** with **split**.

**--cgroup-parent**=*path*

Expand Down
22 changes: 19 additions & 3 deletions libpod/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/containers/libpod/libpod/lock"
"github.com/containers/libpod/pkg/namespaces"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/utils"
"github.com/containers/storage"
"github.com/cri-o/ocicni/pkg/ocicni"
spec "github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -1089,10 +1090,25 @@ func (c *Container) NamespacePath(linuxNS LinuxNS) (string, error) { //nolint:in

// CGroupPath returns a cgroups "path" for a given container.
func (c *Container) CGroupPath() (string, error) {
switch c.runtime.config.Engine.CgroupManager {
case config.CgroupfsCgroupsManager:
switch {
case c.config.CgroupsMode == cgroupSplit:
if c.config.CgroupParent != "" {
return "", errors.Errorf("cannot specify cgroup-parent with cgroup-mode %q", cgroupSplit)
}
cg, err := utils.GetCgroupProcess(c.state.ConmonPID)
if err != nil {
return "", err
}
// Use the conmon cgroup for two reasons: we validate the container
// delegation was correct, and the conmon cgroup doesn't change at runtime
// while we are not sure about the container that can create sub cgroups.
if !strings.HasSuffix(cg, "supervisor") {
return "", errors.Errorf("invalid cgroup for conmon %q", cg)
}
return strings.TrimSuffix(cg, "/supervisor") + "/container", nil
case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager:
return filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID())), nil
case config.SystemdCgroupsManager:
case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager:
if rootless.IsRootless() {
uid := rootless.GetRootlessUID()
parts := strings.SplitN(c.config.CgroupParent, "/", 2)
Expand Down
12 changes: 11 additions & 1 deletion libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/containers/libpod/pkg/resolvconf"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/util"
"github.com/containers/libpod/utils"
"github.com/containers/storage/pkg/archive"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/user"
Expand Down Expand Up @@ -1505,8 +1506,17 @@ func (c *Container) getOCICgroupPath() (string, error) {
switch {
case (rootless.IsRootless() && !unified) || c.config.NoCgroups:
return "", nil
case c.config.CgroupsMode == cgroupSplit:
if c.config.CgroupParent != "" {
return c.config.CgroupParent, nil
}
selfCgroup, err := utils.GetOwnCgroup()
if err != nil {
return "", err
}
return filepath.Join(selfCgroup, "container"), nil
case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager:
// When runc is set to use Systemd as a cgroup manager, it
// When the OCI runtime is set to use Systemd as a cgroup manager, it
// expects cgroups to be passed as follows:
// slice:prefix:name
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
Expand Down
4 changes: 4 additions & 0 deletions libpod/container_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ func (c *Container) validate() error {
return errors.Wrapf(define.ErrInvalidArg, "cannot both create a network namespace and join another container's network namespace")
}

if c.config.CgroupsMode == cgroupSplit && c.config.CgroupParent != "" {
return errors.Wrapf(define.ErrInvalidArg, "cannot specify --cgroup-mode=split with a cgroup-parent")
}

// Not creating cgroups has a number of requirements, mostly related to
// the PID namespace.
if c.config.NoCgroups || c.config.CgroupsMode == "disabled" {
Expand Down
7 changes: 7 additions & 0 deletions libpod/oci_conmon.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package libpod

const (
// cgroupSplit is the cgroup mode for reusing the current cgroup both
// for conmon and for the container payload.
cgroupSplit = "split"
)
10 changes: 8 additions & 2 deletions libpod/oci_conmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,12 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
return err
}

if ctr.config.CgroupsMode == cgroupSplit {
if err := utils.MoveUnderCgroupSubtree("supervisor"); err != nil {
return err
}
}

args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), filepath.Join(ctr.state.RunDir, "pidfile"), ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)

if ctr.config.Spec.Process.Terminal {
Expand Down Expand Up @@ -1173,7 +1179,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"--socket-dir-path", r.socketsDir,
}

if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups {
if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
args = append(args, "-s")
}

Expand Down Expand Up @@ -1275,7 +1281,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec

// If cgroup creation is disabled - just signal.
switch ctr.config.CgroupsMode {
case "disabled", "no-conmon":
case "disabled", "no-conmon", cgroupSplit:
mustCreateCgroup = false
}

Expand Down
2 changes: 1 addition & 1 deletion libpod/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,7 @@ func WithCgroupsMode(mode string) CtrCreateOption {
case "disabled":
ctr.config.NoCgroups = true
ctr.config.CgroupsMode = mode
case "enabled", "no-conmon":
case "enabled", "no-conmon", cgroupSplit:
ctr.config.CgroupsMode = mode
default:
return errors.Wrapf(define.ErrInvalidArg, "Invalid cgroup mode %q", mode)
Expand Down
4 changes: 2 additions & 2 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,9 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
case rootless.IsRootless():
case rootless.IsRootless() && ctr.config.CgroupsMode != cgroupSplit:
ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent
default:
case ctr.config.CgroupsMode != cgroupSplit:
ctr.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
Expand Down
122 changes: 122 additions & 0 deletions utils/utils_supported.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,20 @@
package utils

import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"

"github.com/containers/libpod/pkg/cgroups"
"github.com/containers/libpod/pkg/rootless"
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)

// RunUnderSystemdScope adds the specified pid to a systemd scope
Expand Down Expand Up @@ -43,6 +53,118 @@ func RunUnderSystemdScope(pid int, slice string, unitName string) error {
return nil
}

func getCgroupProcess(procFile string) (string, error) {
f, err := os.Open(procFile)
if err != nil {
return "", errors.Wrapf(err, "open file %q", procFile)
}
defer f.Close()

scanner := bufio.NewScanner(f)
cgroup := "/"
for scanner.Scan() {
line := scanner.Text()
parts := strings.Split(line, ":")
if len(parts) != 3 {
return "", errors.Errorf("cannot parse cgroup line %q", line)
}
if strings.HasPrefix(line, "0::") {
cgroup = line[3:]
break
}
// root cgroup, skip it
if parts[2] == "/" {
continue
}
// The process must have the same cgroup path for all controllers
// The OCI runtime spec file allow us to specify only one path.
if cgroup != "/" && cgroup != parts[2] {
return "", errors.Errorf("cgroup configuration not supported, the process is in two different cgroups")
}
cgroup = parts[2]
}
if cgroup == "/" {
return "", errors.Errorf("could not find cgroup mount in %q", procFile)
}
return cgroup, nil
}

// GetOwnCgroup returns the cgroup for the current process.
func GetOwnCgroup() (string, error) {
return getCgroupProcess("/proc/self/cgroup")
}

// GetCgroupProcess returns the cgroup for the specified process process.
func GetCgroupProcess(pid int) (string, error) {
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid))
}

// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
func MoveUnderCgroupSubtree(subtree string) error {
procFile := "/proc/self/cgroup"
f, err := os.Open(procFile)
if err != nil {
return errors.Wrapf(err, "open file %q", procFile)
}
defer f.Close()

unifiedMode, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return err
}

scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
parts := strings.Split(line, ":")
if len(parts) != 3 {
return errors.Errorf("cannot parse cgroup line %q", line)
}

// root cgroup, skip it
if parts[2] == "/" {
continue
}

cgroupRoot := "/sys/fs/cgroup"
// Special case the unified mount on hybrid cgroup and named hierarchies.
// This works on Fedora 31, but we should really parse the mounts to see
// where the cgroup hierarchy is mounted.
if parts[1] == "" && !unifiedMode {
// If it is not using unified mode, the cgroup v2 hierarchy is
// usually mounted under /sys/fs/cgroup/unified
cgroupRoot = filepath.Join(cgroupRoot, "unified")
} else if parts[1] != "" {
// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
controller := strings.TrimPrefix(parts[1], "name=")
cgroupRoot = filepath.Join(cgroupRoot, controller)
}

processes, err := ioutil.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
if err != nil {
return err
}

newCgroup := filepath.Join(cgroupRoot, parts[2], subtree)
if err := os.Mkdir(newCgroup, 0755); err != nil {
return err
}

f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0755)
if err != nil {
return err
}
defer f.Close()

for _, pid := range bytes.Split(processes, []byte("\n")) {
if _, err := f.Write(pid); err != nil {
logrus.Warnf("Cannot move process %s to cgroup %q", pid, newCgroup)
}
}
}
return nil
}

func newProp(name string, units interface{}) systemdDbus.Property {
return systemdDbus.Property{
Name: name,
Expand Down
12 changes: 12 additions & 0 deletions utils/utils_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,15 @@ import "github.com/pkg/errors"
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
return errors.New("not implemented for windows")
}

func MoveUnderCgroupSubtree(subtree string) error {
return errors.New("not implemented for windows")
}

func GetOwnCgroup() (string, error) {
return "", errors.New("not implemented for windows")
}

func GetCgroupProcess(pid int) (string, error) {
return "", errors.New("not implemented for windows")
}

0 comments on commit 6ac009d

Please sign in to comment.