Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix pod cgroup lifecycle #19888

Merged
merged 10 commits into from
Sep 11, 2023
24 changes: 24 additions & 0 deletions cmd/podman/pods/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ func create(cmd *cobra.Command, args []string) error {
return fmt.Errorf("cannot set share(%s) namespaces without an infra container", cmd.Flag("share").Value)
}
createOptions.Share = nil

infraOptions, err = containers.CreateInit(cmd, infraOptions, true)
if err != nil {
return err
}
err = common.ContainerToPodOptions(&infraOptions, &createOptions)
if err != nil {
return err
}
} else {
// reassign certain options for lbpod api, these need to be populated in spec
flags := cmd.Flags()
Expand Down Expand Up @@ -280,6 +289,21 @@ func create(cmd *cobra.Command, args []string) error {
return err
}
podSpec.Name = podName
} else {
ctrSpec := specgen.NewSpecGenerator("", false)
err = specgenutil.FillOutSpecGen(ctrSpec, &infraOptions, []string{})
if err != nil {
return err
}
// Marshall and Unmarshal the spec in order to map similar entities
wrapped, err := json.Marshal(ctrSpec)
if err != nil {
return err
}
err = json.Unmarshal(wrapped, podSpec)
if err != nil {
return err
}
}
PodSpec := entities.PodSpec{PodSpecGen: *podSpec}
response, err := registry.ContainerEngine().PodCreate(context.Background(), PodSpec)
Expand Down
13 changes: 13 additions & 0 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,19 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
shutdown.Inhibit()
defer shutdown.Uninhibit()

// If the container is part of a pod, make sure the pod cgroup is created before the container
// so the limits can be applied.
if c.PodID() != "" {
pod, err := c.runtime.LookupPod(c.PodID())
if err != nil {
return err
}

if _, err := c.runtime.platformMakePod(pod, &pod.config.ResourceLimits); err != nil {
return err
}
}

// With the spec complete, do an OCI create
if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil {
return err
Expand Down
3 changes: 0 additions & 3 deletions libpod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,9 +361,6 @@ func (p *Pod) CgroupPath() (string, error) {
if err := p.updatePod(); err != nil {
return "", err
}
if p.state.InfraContainerID == "" {
return "", fmt.Errorf("pod has no infra container: %w", define.ErrNoSuchCtr)
}
return p.state.CgroupPath, nil
}

Expand Down
7 changes: 7 additions & 0 deletions libpod/pod_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,13 @@ func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
return nil, err
}

if err := p.updatePod(); err != nil {
return nil, err
}
if err := p.removePodCgroup(); err != nil {
return nil, err
}

return nil, nil
}

Expand Down
135 changes: 72 additions & 63 deletions libpod/runtime_pod_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ import (
"context"
"errors"
"fmt"
"path"
"path/filepath"

"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/podman/v4/utils"
"github.com/hashicorp/go-multierror"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -56,9 +58,13 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option

pod.valid = true

if err := r.platformMakePod(pod, p); err != nil {
parentCgroup, err := r.platformMakePod(pod, p.ResourceLimits)
if err != nil {
return nil, err
}
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = parentCgroup
}

if !pod.HasInfraContainer() && pod.SharesNamespaces() {
return nil, errors.New("Pods must have an infra container to share namespaces")
Expand Down Expand Up @@ -192,6 +198,65 @@ func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Contai
return removedCtrs, nil
}

func (p *Pod) removePodCgroup() error {
// Remove pod cgroup, if present
if p.state.CgroupPath == "" {
return nil
}
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)

cgroup, err := utils.GetOwnCgroup()
if err != nil {
return err
}

// if we are trying to delete a cgroup that is our ancestor, we need to move the
// current process out of it before the cgroup is destroyed.
if isSubDir(cgroup, string(filepath.Separator)+p.state.CgroupPath) {
parent := path.Dir(p.state.CgroupPath)
if err := utils.MoveUnderCgroup(parent, "cleanup", nil); err != nil {
return err
}
}

switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
return fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
}
case config.CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
// Make sure the conmon cgroup is deleted first
// Since the pod is almost gone, don't bother failing
// hard - instead, just log errors.
conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
conmonCgroup, err := cgroups.Load(conmonCgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
return fmt.Errorf("retrieving pod %s conmon cgroup: %w", p.ID(), err)
}
if err == nil {
if err = conmonCgroup.Delete(); err != nil {
return fmt.Errorf("removing pod %s conmon cgroup: %w", p.ID(), err)
}
}
cgroup, err := cgroups.Load(p.state.CgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
return fmt.Errorf("retrieving pod %s cgroup: %w", p.ID(), err)
}
if err == nil {
if err := cgroup.Delete(); err != nil {
return fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
}
}
default:
// This should be caught much earlier, but let's still
// keep going so we make sure to evict the pod before
// ending up with an inconsistent state.
return fmt.Errorf("unrecognized cgroup manager %s when removing pod %s cgroups: %w", p.runtime.config.Engine.CgroupManager, p.ID(), define.ErrInternal)
}
return nil
}

func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) (map[string]error, error) {
removedCtrs := make(map[string]error)

Expand Down Expand Up @@ -269,68 +334,12 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
}
}

// Remove pod cgroup, if present
if p.state.CgroupPath != "" {
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)

switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
case config.CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
// Make sure the conmon cgroup is deleted first
// Since the pod is almost gone, don't bother failing
// hard - instead, just log errors.
conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
conmonCgroup, err := cgroups.Load(conmonCgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
removalErr = fmt.Errorf("retrieving pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
}
if err == nil {
if err = conmonCgroup.Delete(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
}
}
cgroup, err := cgroups.Load(p.state.CgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
removalErr = fmt.Errorf("retrieving pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
if err == nil {
if err := cgroup.Delete(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
}
default:
// This should be caught much earlier, but let's still
// keep going so we make sure to evict the pod before
// ending up with an inconsistent state.
if removalErr == nil {
removalErr = fmt.Errorf("unrecognized cgroup manager %s when removing pod %s cgroups: %w", p.runtime.config.Engine.CgroupManager, p.ID(), define.ErrInternal)
} else {
logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.Engine.CgroupManager, p.ID())
}
// Remove pod cgroup
if err := p.removePodCgroup(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}

Expand Down
6 changes: 3 additions & 3 deletions libpod/runtime_pod_freebsd.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package libpod

import (
"github.com/containers/podman/v4/pkg/specgen"
spec "github.com/opencontainers/runtime-spec/specs-go"
)

func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
return nil
func (r *Runtime) platformMakePod(pod *Pod, resourceLimits *spec.LinuxResources) (string, error) {
return "", nil
}
59 changes: 28 additions & 31 deletions libpod/runtime_pod_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ import (
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/specgen"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)

func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
func (r *Runtime) platformMakePod(pod *Pod, resourceLimits *spec.LinuxResources) (string, error) {
cgroupParent := ""
// Check Cgroup parent sanity, and set it if it was not set
if r.config.Cgroups() != "disabled" {
switch r.config.Engine.CgroupManager {
Expand All @@ -25,32 +26,30 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
if pod.config.CgroupParent == "" {
pod.config.CgroupParent = CgroupfsDefaultCgroupParent
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return fmt.Errorf("systemd slice received as cgroup parent when using cgroupfs: %w", define.ErrInvalidArg)
return "", fmt.Errorf("systemd slice received as cgroup parent when using cgroupfs: %w", define.ErrInvalidArg)
}
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
if pod.config.UsePodCgroup {
pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
// cgroupfs + rootless = permission denied when creating the cgroup.
if !rootless.IsRootless() {
res, err := GetLimits(p.ResourceLimits)
if err != nil {
return err
}
// Need to both create and update the cgroup
// rather than create a new path in c/common for pod cgroup creation
// just create as if it is a ctr and then update figures out that we need to
// populate the resource limits on the pod level
cgc, err := cgroups.New(pod.state.CgroupPath, &res)
if err != nil {
return err
}
err = cgc.Update(&res)
if err != nil {
return err
}
cgroupParent = pod.state.CgroupPath
// cgroupfs + rootless = permission denied when creating the cgroup.
if !rootless.IsRootless() {
res, err := GetLimits(resourceLimits)
if err != nil {
return "", err
}
// Need to both create and update the cgroup
// rather than create a new path in c/common for pod cgroup creation
// just create as if it is a ctr and then update figures out that we need to
// populate the resource limits on the pod level
cgc, err := cgroups.New(pod.state.CgroupPath, &res)
if err != nil {
return "", err
}
err = cgc.Update(&res)
if err != nil {
return "", err
}
}
}
Expand All @@ -63,28 +62,26 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
pod.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return fmt.Errorf("did not receive systemd slice as cgroup parent when using systemd to manage cgroups: %w", define.ErrInvalidArg)
return "", fmt.Errorf("did not receive systemd slice as cgroup parent when using systemd to manage cgroups: %w", define.ErrInvalidArg)
}
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
if pod.config.UsePodCgroup {
cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.ResourceLimits)
cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), resourceLimits)
if err != nil {
return fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err)
return "", fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err)
}
pod.state.CgroupPath = cgroupPath
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
}
cgroupParent = pod.state.CgroupPath
}
default:
return fmt.Errorf("unsupported Cgroup manager: %s - cannot validate cgroup parent: %w", r.config.Engine.CgroupManager, define.ErrInvalidArg)
return "", fmt.Errorf("unsupported Cgroup manager: %s - cannot validate cgroup parent: %w", r.config.Engine.CgroupManager, define.ErrInvalidArg)
}
}

if pod.config.UsePodCgroup {
logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath)
}

return nil
return cgroupParent, nil
}
Loading