Skip to content

Commit

Permalink
remove service container _after_ pods
Browse files Browse the repository at this point in the history
Do not allow for removing the service container unless all associated
pods have been removed.  Previously, the service container could be
removed when all pods have exited which can lead to a number of issues.

Now, the service container is treated like an infra container and can
only be removed along with the pods.

Also make sure that a pod is unlinked from the service container once
it's being removed.

Fixes: containers#16964
Signed-off-by: Valentin Rothberg <[email protected]>
  • Loading branch information
vrothberg committed Jan 9, 2023
1 parent bc6908e commit 4a7a45f
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 14 deletions.
12 changes: 7 additions & 5 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,11 +687,13 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
}

if c.IsService() {
canStop, err := c.canStopServiceContainer()
if err != nil {
return err
}
if !canStop {
for _, id := range c.state.Service.Pods {
if _, err := c.runtime.LookupPod(id); err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return err
}
return fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
}
}
Expand Down
44 changes: 35 additions & 9 deletions libpod/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ func (c *Container) canStopServiceContainer() (bool, error) {

status, err := pod.GetPodStatus()
if err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return false, err
}

Expand All @@ -112,6 +115,9 @@ func (p *Pod) maybeStopServiceContainer() error {

serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
Expand Down Expand Up @@ -163,13 +169,7 @@ func (p *Pod) maybeStartServiceContainer(ctx context.Context) error {

// canRemoveServiceContainer returns true if all pods of the service are removed.
// Note that the method acquires the container lock.
func (c *Container) canRemoveServiceContainerLocked() (bool, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return false, err
}

func (c *Container) canRemoveServiceContainer() (bool, error) {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}
Expand All @@ -188,23 +188,49 @@ func (c *Container) canRemoveServiceContainerLocked() (bool, error) {
}

// Checks whether the service container can be removed and does so.
// It also unlinks the pod from the service container.
func (p *Pod) maybeRemoveServiceContainer() error {
if !p.hasServiceContainer() {
return nil
}

serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
// the runtime's work queue to resolve ABBA dead locks in the
// pod->container->servicePods hierarchy.
p.runtime.queueWork(func() {
logrus.Debugf("Pod %s has a service %s: checking if it can be removed", p.ID(), serviceCtr.ID())
canRemove, err := serviceCtr.canRemoveServiceContainerLocked()
canRemove, err := func() (bool, error) { // Anonymous func for easy locking
serviceCtr.lock.Lock()
defer serviceCtr.lock.Unlock()
if err := serviceCtr.syncContainer(); err != nil {
return false, err
}

// Unlink the pod from the service container.
servicePods := make([]string, 0, len(serviceCtr.state.Service.Pods)-1)
for _, id := range serviceCtr.state.Service.Pods {
if id != p.ID() {
servicePods = append(servicePods, id)
}
}
serviceCtr.state.Service.Pods = servicePods
if err := serviceCtr.save(); err != nil {
return false, err
}

return serviceCtr.canRemoveServiceContainer()
}()
if err != nil {
logrus.Errorf("Checking whether service of container %s can be removed: %v", serviceCtr.ID(), err)
if !errors.Is(err, define.ErrNoSuchCtr) {
logrus.Errorf("Checking whether service container %s can be removed: %v", serviceCtr.ID(), err)
}
return
}
if !canRemove {
Expand Down
2 changes: 2 additions & 0 deletions test/system/700-play.bats
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ EOF
# Check for an error when trying to remove the service container
run_podman 125 container rm $service_container
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"
run_podman 125 container rm --force $service_container
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"

# Kill the pod and make sure the service is not running
run_podman pod kill test_pod
Expand Down

0 comments on commit 4a7a45f

Please sign in to comment.