Skip to content

Commit

Permalink
container removal: handle already removed containers
Browse files Browse the repository at this point in the history
Since commit d54478d, a container's lock is released before
attempting to stop it via the OCI runtime.  This opened the window
for various kinds of race conditions.  One of them led to containers#9479 where
the removal+cleanup sequences of a `run --rm` session overlapped with
`rm -af`.  Make both execution paths more robust by handling the case of
an already removed container.

Fixes: containers#9479
Signed-off-by: Valentin Rothberg <[email protected]>
  • Loading branch information
vrothberg committed Feb 23, 2021
1 parent 96fc9d9 commit e5ac28f
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
16 changes: 10 additions & 6 deletions libpod/container_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -954,18 +954,22 @@ func (c *Container) removeAllExecSessions() error {
}
// Delete all exec sessions
if err := c.runtime.state.RemoveContainerExecSessions(c); err != nil {
if lastErr != nil {
logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr)
if errors.Cause(err) != define.ErrCtrRemoved {
if lastErr != nil {
logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr)
}
lastErr = err
}
lastErr = err
}
c.state.ExecSessions = nil
c.state.LegacyExecSessions = nil
if err := c.save(); err != nil {
if lastErr != nil {
logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr)
if errors.Cause(err) != define.ErrCtrRemoved {
if lastErr != nil {
logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr)
}
lastErr = err
}
lastErr = err
}

return lastErr
Expand Down
12 changes: 9 additions & 3 deletions pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,12 +319,18 @@ func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string,

errMap, err := parallelctr.ContainerOp(ctx, ctrs, func(c *libpod.Container) error {
err := ic.Libpod.RemoveContainer(ctx, c, options.Force, options.Volumes)
if err != nil {
if options.Ignore && errors.Cause(err) == define.ErrNoSuchCtr {
if err == nil {
return nil
}
logrus.Debugf("Failed to remove container %s: %s", c.ID(), err.Error())
switch errors.Cause(err) {
case define.ErrNoSuchCtr:
if options.Ignore {
logrus.Debugf("Ignoring error (--allow-missing): %v", err)
return nil
}
logrus.Debugf("Failed to remove container %s: %s", c.ID(), err.Error())
case define.ErrCtrRemoved:
return nil
}
return err
})
Expand Down
7 changes: 7 additions & 0 deletions test/system/055-rm.bats
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ load helpers
run_podman rm $rand $external_cid
}

@test "podman rm <-> run --rm race" {
# A container's lock is released before attempting to stop it. This opens
# the window for race conditions that led to #9479.
run_podman run --rm -d $IMAGE sleep infinity
run_podman rm -af
}

# I'm sorry! This test takes 13 seconds. There's not much I can do about it,
# please know that I think it's justified: podman 1.5.0 had a strange bug
# in with exit status was not preserved on some code paths with 'rm -f'
Expand Down

0 comments on commit e5ac28f

Please sign in to comment.