Skip to content

Commit

Permalink
Merge pull request #17003 from vrothberg/fix-16964
Browse files Browse the repository at this point in the history
remove service container _after_ pods
  • Loading branch information
openshift-merge-robot authored Jan 9, 2023
2 parents 31e22aa + 4a7a45f commit f7c9f93
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 14 deletions.
12 changes: 7 additions & 5 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,11 +687,13 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
}

if c.IsService() {
canStop, err := c.canStopServiceContainer()
if err != nil {
return err
}
if !canStop {
for _, id := range c.state.Service.Pods {
if _, err := c.runtime.LookupPod(id); err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return err
}
return fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
}
}
Expand Down
44 changes: 35 additions & 9 deletions libpod/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ func (c *Container) canStopServiceContainer() (bool, error) {

status, err := pod.GetPodStatus()
if err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return false, err
}

Expand All @@ -112,6 +115,9 @@ func (p *Pod) maybeStopServiceContainer() error {

serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
Expand Down Expand Up @@ -163,13 +169,7 @@ func (p *Pod) maybeStartServiceContainer(ctx context.Context) error {

// canRemoveServiceContainer returns true if all pods of the service are removed.
// Note that the method acquires the container lock.
func (c *Container) canRemoveServiceContainerLocked() (bool, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return false, err
}

func (c *Container) canRemoveServiceContainer() (bool, error) {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}
Expand All @@ -188,23 +188,49 @@ func (c *Container) canRemoveServiceContainerLocked() (bool, error) {
}

// Checks whether the service container can be removed and does so.
// It also unlinks the pod from the service container.
func (p *Pod) maybeRemoveServiceContainer() error {
if !p.hasServiceContainer() {
return nil
}

serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
// the runtime's work queue to resolve ABBA dead locks in the
// pod->container->servicePods hierarchy.
p.runtime.queueWork(func() {
logrus.Debugf("Pod %s has a service %s: checking if it can be removed", p.ID(), serviceCtr.ID())
canRemove, err := serviceCtr.canRemoveServiceContainerLocked()
canRemove, err := func() (bool, error) { // Anonymous func for easy locking
serviceCtr.lock.Lock()
defer serviceCtr.lock.Unlock()
if err := serviceCtr.syncContainer(); err != nil {
return false, err
}

// Unlink the pod from the service container.
servicePods := make([]string, 0, len(serviceCtr.state.Service.Pods)-1)
for _, id := range serviceCtr.state.Service.Pods {
if id != p.ID() {
servicePods = append(servicePods, id)
}
}
serviceCtr.state.Service.Pods = servicePods
if err := serviceCtr.save(); err != nil {
return false, err
}

return serviceCtr.canRemoveServiceContainer()
}()
if err != nil {
logrus.Errorf("Checking whether service of container %s can be removed: %v", serviceCtr.ID(), err)
if !errors.Is(err, define.ErrNoSuchCtr) {
logrus.Errorf("Checking whether service container %s can be removed: %v", serviceCtr.ID(), err)
}
return
}
if !canRemove {
Expand Down
2 changes: 2 additions & 0 deletions test/system/700-play.bats
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ EOF
# Check for an error when trying to remove the service container
run_podman 125 container rm $service_container
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"
run_podman 125 container rm --force $service_container
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"

# Kill the pod and make sure the service is not running
run_podman pod kill test_pod
Expand Down

0 comments on commit f7c9f93

Please sign in to comment.