From 4a7a45f97319abd2cfd4e140b0278b9c06d6a71b Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Thu, 5 Jan 2023 14:01:21 +0100 Subject: [PATCH] remove service container _after_ pods Do not allow for removing the service container unless all associated pods have been removed. Previously, the service container could be removed when all pods have exited which can lead to a number of issues. Now, the service container is treated like an infra container and can only be removed along with the pods. Also make sure that a pod is unlinked from the service container once it's being removed. Fixes: #16964 Signed-off-by: Valentin Rothberg --- libpod/runtime_ctr.go | 12 ++++++----- libpod/service.go | 44 +++++++++++++++++++++++++++++++-------- test/system/700-play.bats | 2 ++ 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index f44a01f0f7..4757471c98 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -687,11 +687,13 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo } if c.IsService() { - canStop, err := c.canStopServiceContainer() - if err != nil { - return err - } - if !canStop { + for _, id := range c.state.Service.Pods { + if _, err := c.runtime.LookupPod(id); err != nil { + if errors.Is(err, define.ErrNoSuchPod) { + continue + } + return err + } return fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ",")) } } diff --git a/libpod/service.go b/libpod/service.go index a8928277fd..e234254d59 100644 --- a/libpod/service.go +++ b/libpod/service.go @@ -89,6 +89,9 @@ func (c *Container) canStopServiceContainer() (bool, error) { status, err := pod.GetPodStatus() if err != nil { + if errors.Is(err, define.ErrNoSuchPod) { + continue + } return false, err } @@ -112,6 +115,9 @@ func (p *Pod) maybeStopServiceContainer() error { serviceCtr, err := p.serviceContainer() if err != nil { + if errors.Is(err, define.ErrNoSuchCtr) { + return nil + } return fmt.Errorf("getting pod's service container: %w", err) } // Checking whether the service can be stopped must be done in @@ -163,13 +169,7 @@ func (p *Pod) maybeStartServiceContainer(ctx context.Context) error { // canRemoveServiceContainer returns true if all pods of the service are removed. // Note that the method acquires the container lock. -func (c *Container) canRemoveServiceContainerLocked() (bool, error) { - c.lock.Lock() - defer c.lock.Unlock() - if err := c.syncContainer(); err != nil { - return false, err - } - +func (c *Container) canRemoveServiceContainer() (bool, error) { if !c.IsService() { return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID()) } @@ -188,6 +188,7 @@ func (c *Container) canRemoveServiceContainerLocked() (bool, error) { } // Checks whether the service container can be removed and does so. +// It also unlinks the pod from the service container. func (p *Pod) maybeRemoveServiceContainer() error { if !p.hasServiceContainer() { return nil @@ -195,6 +196,9 @@ func (p *Pod) maybeRemoveServiceContainer() error { serviceCtr, err := p.serviceContainer() if err != nil { + if errors.Is(err, define.ErrNoSuchCtr) { + return nil + } return fmt.Errorf("getting pod's service container: %w", err) } // Checking whether the service can be stopped must be done in @@ -202,9 +206,31 @@ func (p *Pod) maybeRemoveServiceContainer() error { // pod->container->servicePods hierarchy. p.runtime.queueWork(func() { logrus.Debugf("Pod %s has a service %s: checking if it can be removed", p.ID(), serviceCtr.ID()) - canRemove, err := serviceCtr.canRemoveServiceContainerLocked() + canRemove, err := func() (bool, error) { // Anonymous func for easy locking + serviceCtr.lock.Lock() + defer serviceCtr.lock.Unlock() + if err := serviceCtr.syncContainer(); err != nil { + return false, err + } + + // Unlink the pod from the service container. + servicePods := make([]string, 0, len(serviceCtr.state.Service.Pods)-1) + for _, id := range serviceCtr.state.Service.Pods { + if id != p.ID() { + servicePods = append(servicePods, id) + } + } + serviceCtr.state.Service.Pods = servicePods + if err := serviceCtr.save(); err != nil { + return false, err + } + + return serviceCtr.canRemoveServiceContainer() + }() if err != nil { - logrus.Errorf("Checking whether service of container %s can be removed: %v", serviceCtr.ID(), err) + if !errors.Is(err, define.ErrNoSuchCtr) { + logrus.Errorf("Checking whether service container %s can be removed: %v", serviceCtr.ID(), err) + } return } if !canRemove { diff --git a/test/system/700-play.bats b/test/system/700-play.bats index 7eb44c6323..f9651d2316 100644 --- a/test/system/700-play.bats +++ b/test/system/700-play.bats @@ -170,6 +170,8 @@ EOF # Check for an error when trying to remove the service container run_podman 125 container rm $service_container is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)" + run_podman 125 container rm --force $service_container + is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)" # Kill the pod and make sure the service is not running run_podman pod kill test_pod