Skip to content

Commit

Permalink
podman: add new cgroup mode conmon-delegated
Browse files Browse the repository at this point in the history
add a new mode cgroup mode conmon-delegated.

When running under systemd there is no need to create yet another
cgroup for the container.

With conmon-delegated the current cgroup will be split in two sub
cgroups:

- supervisor
- container

The supervisor cgroup will hold conmon and the podman process, while
the container cgroup is used by the OCI runtime (using the cgroupfs
backend).

Closes: containers#6400

Depends on: containers/crun#409

Signed-off-by: Giuseppe Scrivano <[email protected]>
  • Loading branch information
giuseppe committed Jun 18, 2020
1 parent b5f7afd commit 173e414
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 9 deletions.
2 changes: 1 addition & 1 deletion cmd/podman/common/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func GetCreateFlags(cf *ContainerCLIOpts) *pflag.FlagSet {
createFlags.StringVar(
&cf.CGroupsMode,
"cgroups", containerConfig.Cgroups(),
`control container cgroup configuration ("enabled"|"disabled"|"no-conmon")`,
`control container cgroup configuration ("enabled"|"disabled"|"no-conmon"|"conmon-delegated")`,
)
createFlags.StringVar(
&cf.CGroupParent,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/markdown/podman-create.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ If the host uses cgroups v1, the default is set to **host**. On cgroups v2 the
Determines whether the container will create CGroups.
Valid values are *enabled*, *disabled*, *no-conmon*, which the default being *enabled*.
The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**).
The *no-conmon* option disables a new CGroup only for the conmon process.
The *no-conmon* option disables a new CGroup only for the conmon process. The *conmon-delegated* option reuses the current cgroup for both the conmon and the container payload, it works only on cgroup v2.

**--cgroup-parent**=*path*

Expand Down
3 changes: 2 additions & 1 deletion docs/source/markdown/podman-run.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,15 @@ Set the cgroup namespace mode for the container.

If the host uses cgroups v1, the default is set to **host**. On cgroups v2, the default is **private**.

**--cgroups**=**enabled**|**disabled**|**no-conmon**
**--cgroups**=**enabled**|**disabled**|**no-conmon**|**conmon-delegated**

Determines whether the container will create CGroups.

Default is **enabled**. The **disabled** option will force the container
to not create CGroups, and thus conflicts with CGroup options
(**--cgroupns** and **--cgroup-parent**).
The **no-conmon** option disables a new CGroup only for the **conmon** process.
The **conmon-delegated** option reuses the current cgroup for both the conmon and the container payload, it works only on cgroup v2.

**--cgroup-parent**=*path*

Expand Down
14 changes: 13 additions & 1 deletion libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/containers/libpod/pkg/resolvconf"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/util"
"github.com/containers/libpod/utils"
"github.com/containers/storage/pkg/archive"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/user"
Expand Down Expand Up @@ -1505,7 +1506,18 @@ func (c *Container) getOCICgroupPath() (string, error) {
case (rootless.IsRootless() && !unified) || c.config.NoCgroups:
return "", nil
case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager:
// When runc is set to use Systemd as a cgroup manager, it
if c.config.CgroupsMode == conmonDelegated {
if c.config.CgroupParent != "" {
return c.config.CgroupParent, nil
}
selfCgroup, err := utils.GetPidCgroupv2(0)
if err != nil {
return "", err
}
return filepath.Join(selfCgroup, "container"), nil
}

// When the OCI runtime is set to use Systemd as a cgroup manager, it
// expects cgroups to be passed as follows:
// slice:prefix:name
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
Expand Down
7 changes: 7 additions & 0 deletions libpod/oci_conmon.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package libpod

const (
// conmonDelegated is the cgroup mode for reusing the current cgroup both
// for conmon and for the container payload.
conmonDelegated = "conmon-delegated"
)
10 changes: 8 additions & 2 deletions libpod/oci_conmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,12 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
return err
}

if ctr.config.CgroupsMode == conmonDelegated {
if err := utils.MoveUnderCgroup2Subtree("supervisor"); err != nil {
return err
}
}

args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), filepath.Join(ctr.state.RunDir, "pidfile"), ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)

if ctr.config.Spec.Process.Terminal {
Expand Down Expand Up @@ -1151,7 +1157,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"--socket-dir-path", r.socketsDir,
}

if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups {
if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != conmonDelegated {
args = append(args, "-s")
}

Expand Down Expand Up @@ -1253,7 +1259,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec

// If cgroup creation is disabled - just signal.
switch ctr.config.CgroupsMode {
case "disabled", "no-conmon":
case "disabled", "no-conmon", conmonDelegated:
mustCreateCgroup = false
}

Expand Down
2 changes: 1 addition & 1 deletion libpod/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,7 @@ func WithCgroupsMode(mode string) CtrCreateOption {
case "disabled":
ctr.config.NoCgroups = true
ctr.config.CgroupsMode = mode
case "enabled", "no-conmon":
case "enabled", "no-conmon", conmonDelegated:
ctr.config.CgroupsMode = mode
default:
return errors.Wrapf(define.ErrInvalidArg, "Invalid cgroup mode %q", mode)
Expand Down
4 changes: 2 additions & 2 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,9 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
case rootless.IsRootless():
case rootless.IsRootless() && ctr.config.CgroupsMode != conmonDelegated:
ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent
default:
case ctr.config.CgroupsMode != conmonDelegated:
ctr.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
Expand Down
81 changes: 81 additions & 0 deletions utils/utils_supported.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,20 @@
package utils

import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"

"github.com/containers/libpod/pkg/cgroups"
"github.com/containers/libpod/pkg/rootless"
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)

// RunUnderSystemdScope adds the specified pid to a systemd scope
Expand Down Expand Up @@ -43,6 +53,77 @@ func RunUnderSystemdScope(pid int, slice string, unitName string) error {
return nil
}

// GetPidCgroupv2 returns the unified cgroup for the specified pid.
func GetPidCgroupv2(pid int) (string, error) {
if pid == 0 {
pid = os.Getpid()
}

unified, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return "", err
}
if !unified {
return "", errors.New("move under subtree supported only on cgroup v2")
}

procFile := fmt.Sprintf("/proc/%d/cgroup", pid)
f, err := os.Open(procFile)
if err != nil {
return "", errors.Wrapf(err, "open file %q", procFile)
}
defer f.Close()

scanner := bufio.NewScanner(f)
cgroup := ""
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "0::") {
cgroup = line[3:]
break
}
}
if cgroup == "" {
return "", errors.Errorf("could not find cgroup v2 mount in %q", procFile)
}
return cgroup, nil

}

// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
func MoveUnderCgroup2Subtree(subtree string) error {
cgroup, err := GetPidCgroupv2(0)
if err != nil {
return err
}

cgroupRoot := "/sys/fs/cgroup"

processes, err := ioutil.ReadFile(filepath.Join(cgroupRoot, cgroup, "cgroup.procs"))
if err != nil {
return err
}

newCgroup := filepath.Join(cgroupRoot, cgroup, subtree)
if err := os.Mkdir(newCgroup, 0755); err != nil {
return err
}

f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0755)
if err != nil {
return err
}
defer f.Close()

for _, pid := range bytes.Split(processes, []byte("\n")) {
if _, err := f.Write(pid); err != nil {
logrus.Warnf("Cannot move process %s to cgroup %q", pid, newCgroup)
}
}
return nil

}

func newProp(name string, units interface{}) systemdDbus.Property {
return systemdDbus.Property{
Name: name,
Expand Down
8 changes: 8 additions & 0 deletions utils/utils_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,11 @@ import "github.com/pkg/errors"
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
return errors.New("not implemented for windows")
}

func MoveUnderCgroup2Subtree(subtree string) error {
return errors.New("not implemented for windows")
}

func GetPidCgroupv2(pid int) (string, error) {
return "", errors.New("not implemented for windows")
}

0 comments on commit 173e414

Please sign in to comment.