Skip to content

Commit

Permalink
Merge pull request #19888 from giuseppe/fix-pod-lifecycle
Browse files Browse the repository at this point in the history
fix pod cgroup lifecycle
  • Loading branch information
rhatdan authored Sep 11, 2023
2 parents 8d37db7 + 0c75eac commit 8acd66c
Show file tree
Hide file tree
Showing 11 changed files with 288 additions and 170 deletions.
24 changes: 24 additions & 0 deletions cmd/podman/pods/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,15 @@ func create(cmd *cobra.Command, args []string) error {
return fmt.Errorf("cannot set share(%s) namespaces without an infra container", cmd.Flag("share").Value)
}
createOptions.Share = nil

infraOptions, err = containers.CreateInit(cmd, infraOptions, true)
if err != nil {
return err
}
err = common.ContainerToPodOptions(&infraOptions, &createOptions)
if err != nil {
return err
}
} else {
// reassign certain options for lbpod api, these need to be populated in spec
flags := cmd.Flags()
Expand Down Expand Up @@ -284,6 +293,21 @@ func create(cmd *cobra.Command, args []string) error {
return err
}
podSpec.Name = podName
} else {
ctrSpec := specgen.NewSpecGenerator("", false)
err = specgenutil.FillOutSpecGen(ctrSpec, &infraOptions, []string{})
if err != nil {
return err
}
// Marshall and Unmarshal the spec in order to map similar entities
wrapped, err := json.Marshal(ctrSpec)
if err != nil {
return err
}
err = json.Unmarshal(wrapped, podSpec)
if err != nil {
return err
}
}
PodSpec := entities.PodSpec{PodSpecGen: *podSpec}
response, err := registry.ContainerEngine().PodCreate(context.Background(), PodSpec)
Expand Down
13 changes: 13 additions & 0 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,19 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
shutdown.Inhibit()
defer shutdown.Uninhibit()

// If the container is part of a pod, make sure the pod cgroup is created before the container
// so the limits can be applied.
if c.PodID() != "" {
pod, err := c.runtime.LookupPod(c.PodID())
if err != nil {
return err
}

if _, err := c.runtime.platformMakePod(pod, &pod.config.ResourceLimits); err != nil {
return err
}
}

// With the spec complete, do an OCI create
if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil {
return err
Expand Down
3 changes: 0 additions & 3 deletions libpod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,9 +361,6 @@ func (p *Pod) CgroupPath() (string, error) {
if err := p.updatePod(); err != nil {
return "", err
}
if p.state.InfraContainerID == "" {
return "", fmt.Errorf("pod has no infra container: %w", define.ErrNoSuchCtr)
}
return p.state.CgroupPath, nil
}

Expand Down
7 changes: 7 additions & 0 deletions libpod/pod_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,13 @@ func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
return nil, err
}

if err := p.updatePod(); err != nil {
return nil, err
}
if err := p.removePodCgroup(); err != nil {
return nil, err
}

return nil, nil
}

Expand Down
135 changes: 72 additions & 63 deletions libpod/runtime_pod_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ import (
"context"
"errors"
"fmt"
"path"
"path/filepath"

"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/podman/v4/utils"
"github.com/hashicorp/go-multierror"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -56,9 +58,13 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option

pod.valid = true

if err := r.platformMakePod(pod, p); err != nil {
parentCgroup, err := r.platformMakePod(pod, p.ResourceLimits)
if err != nil {
return nil, err
}
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = parentCgroup
}

if !pod.HasInfraContainer() && pod.SharesNamespaces() {
return nil, errors.New("Pods must have an infra container to share namespaces")
Expand Down Expand Up @@ -192,6 +198,65 @@ func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Contai
return removedCtrs, nil
}

func (p *Pod) removePodCgroup() error {
// Remove pod cgroup, if present
if p.state.CgroupPath == "" {
return nil
}
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)

cgroup, err := utils.GetOwnCgroup()
if err != nil {
return err
}

// if we are trying to delete a cgroup that is our ancestor, we need to move the
// current process out of it before the cgroup is destroyed.
if isSubDir(cgroup, string(filepath.Separator)+p.state.CgroupPath) {
parent := path.Dir(p.state.CgroupPath)
if err := utils.MoveUnderCgroup(parent, "cleanup", nil); err != nil {
return err
}
}

switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
return fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
}
case config.CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
// Make sure the conmon cgroup is deleted first
// Since the pod is almost gone, don't bother failing
// hard - instead, just log errors.
conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
conmonCgroup, err := cgroups.Load(conmonCgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
return fmt.Errorf("retrieving pod %s conmon cgroup: %w", p.ID(), err)
}
if err == nil {
if err = conmonCgroup.Delete(); err != nil {
return fmt.Errorf("removing pod %s conmon cgroup: %w", p.ID(), err)
}
}
cgroup, err := cgroups.Load(p.state.CgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
return fmt.Errorf("retrieving pod %s cgroup: %w", p.ID(), err)
}
if err == nil {
if err := cgroup.Delete(); err != nil {
return fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
}
}
default:
// This should be caught much earlier, but let's still
// keep going so we make sure to evict the pod before
// ending up with an inconsistent state.
return fmt.Errorf("unrecognized cgroup manager %s when removing pod %s cgroups: %w", p.runtime.config.Engine.CgroupManager, p.ID(), define.ErrInternal)
}
return nil
}

func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) (map[string]error, error) {
removedCtrs := make(map[string]error)

Expand Down Expand Up @@ -269,68 +334,12 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
}
}

// Remove pod cgroup, if present
if p.state.CgroupPath != "" {
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)

switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
case config.CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
// Make sure the conmon cgroup is deleted first
// Since the pod is almost gone, don't bother failing
// hard - instead, just log errors.
conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
conmonCgroup, err := cgroups.Load(conmonCgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
removalErr = fmt.Errorf("retrieving pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
}
if err == nil {
if err = conmonCgroup.Delete(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
}
}
cgroup, err := cgroups.Load(p.state.CgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
removalErr = fmt.Errorf("retrieving pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
if err == nil {
if err := cgroup.Delete(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
}
default:
// This should be caught much earlier, but let's still
// keep going so we make sure to evict the pod before
// ending up with an inconsistent state.
if removalErr == nil {
removalErr = fmt.Errorf("unrecognized cgroup manager %s when removing pod %s cgroups: %w", p.runtime.config.Engine.CgroupManager, p.ID(), define.ErrInternal)
} else {
logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.Engine.CgroupManager, p.ID())
}
// Remove pod cgroup
if err := p.removePodCgroup(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}

Expand Down
6 changes: 3 additions & 3 deletions libpod/runtime_pod_freebsd.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package libpod

import (
"github.com/containers/podman/v4/pkg/specgen"
spec "github.com/opencontainers/runtime-spec/specs-go"
)

func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
return nil
func (r *Runtime) platformMakePod(pod *Pod, resourceLimits *spec.LinuxResources) (string, error) {
return "", nil
}
59 changes: 28 additions & 31 deletions libpod/runtime_pod_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ import (
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/specgen"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)

func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
func (r *Runtime) platformMakePod(pod *Pod, resourceLimits *spec.LinuxResources) (string, error) {
cgroupParent := ""
// Check Cgroup parent sanity, and set it if it was not set
if r.config.Cgroups() != "disabled" {
switch r.config.Engine.CgroupManager {
Expand All @@ -25,32 +26,30 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
if pod.config.CgroupParent == "" {
pod.config.CgroupParent = CgroupfsDefaultCgroupParent
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return fmt.Errorf("systemd slice received as cgroup parent when using cgroupfs: %w", define.ErrInvalidArg)
return "", fmt.Errorf("systemd slice received as cgroup parent when using cgroupfs: %w", define.ErrInvalidArg)
}
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
if pod.config.UsePodCgroup {
pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
// cgroupfs + rootless = permission denied when creating the cgroup.
if !rootless.IsRootless() {
res, err := GetLimits(p.ResourceLimits)
if err != nil {
return err
}
// Need to both create and update the cgroup
// rather than create a new path in c/common for pod cgroup creation
// just create as if it is a ctr and then update figures out that we need to
// populate the resource limits on the pod level
cgc, err := cgroups.New(pod.state.CgroupPath, &res)
if err != nil {
return err
}
err = cgc.Update(&res)
if err != nil {
return err
}
cgroupParent = pod.state.CgroupPath
// cgroupfs + rootless = permission denied when creating the cgroup.
if !rootless.IsRootless() {
res, err := GetLimits(resourceLimits)
if err != nil {
return "", err
}
// Need to both create and update the cgroup
// rather than create a new path in c/common for pod cgroup creation
// just create as if it is a ctr and then update figures out that we need to
// populate the resource limits on the pod level
cgc, err := cgroups.New(pod.state.CgroupPath, &res)
if err != nil {
return "", err
}
err = cgc.Update(&res)
if err != nil {
return "", err
}
}
}
Expand All @@ -63,28 +62,26 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
pod.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return fmt.Errorf("did not receive systemd slice as cgroup parent when using systemd to manage cgroups: %w", define.ErrInvalidArg)
return "", fmt.Errorf("did not receive systemd slice as cgroup parent when using systemd to manage cgroups: %w", define.ErrInvalidArg)
}
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
if pod.config.UsePodCgroup {
cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.ResourceLimits)
cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), resourceLimits)
if err != nil {
return fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err)
return "", fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err)
}
pod.state.CgroupPath = cgroupPath
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
}
cgroupParent = pod.state.CgroupPath
}
default:
return fmt.Errorf("unsupported Cgroup manager: %s - cannot validate cgroup parent: %w", r.config.Engine.CgroupManager, define.ErrInvalidArg)
return "", fmt.Errorf("unsupported Cgroup manager: %s - cannot validate cgroup parent: %w", r.config.Engine.CgroupManager, define.ErrInvalidArg)
}
}

if pod.config.UsePodCgroup {
logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath)
}

return nil
return cgroupParent, nil
}
Loading

0 comments on commit 8acd66c

Please sign in to comment.