Skip to content

Commit

Permalink
Merge pull request #17165 from vrothberg/fix-17142
Browse files Browse the repository at this point in the history
StopContainer: return if cleanup process changed state
  • Loading branch information
openshift-merge-robot authored Jan 19, 2023
2 parents 17f89c9 + 4faa139 commit 3276c36
Showing 1 changed file with 33 additions and 40 deletions.
73 changes: 33 additions & 40 deletions libpod/oci_conmon_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,12 +400,11 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
return nil
}

stopSignal := ctr.config.StopSignal
if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM)
}

if timeout > 0 {
stopSignal := ctr.config.StopSignal
if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM)
}
if err := r.KillContainer(ctr, stopSignal, all); err != nil {
// Is the container gone?
// If so, it probably died between the first check and
Expand All @@ -428,21 +427,26 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
}
}

// If the timeout was set to 0 or if stopping the container with the
// specified signal did not work, use the big hammer with SIGKILL.
if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
// Ignore the error if KillContainer complains about it already
// being stopped or exited. There's an inherent race with the
// cleanup process (see #16142).
if !(errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited)) {
// If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 {
return nil
}
// Again, check if the container is gone. If it is, exit cleanly.
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
// There's an inherent race with the cleanup process (see
// #16142, #17142). If the container has already been marked as
// stopped or exited by the cleanup process, we can return
// immediately.
if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
return nil
}

// If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 {
return nil
}
// Again, check if the container is gone. If it is, exit cleanly.
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
}

// Give runtime a few seconds to make it happen
Expand Down Expand Up @@ -949,31 +953,20 @@ func waitContainerStop(ctr *Container, timeout time.Duration) error {

// Wait for a given PID to stop
func waitPidStop(pid int, timeout time.Duration) error {
done := make(chan struct{})
chControl := make(chan struct{})
go func() {
for {
select {
case <-chControl:
return
default:
if err := unix.Kill(pid, 0); err != nil {
if err == unix.ESRCH {
close(done)
return
}
logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
timer := time.NewTimer(timeout)
for {
select {
case <-timer.C:
return fmt.Errorf("given PID did not die within timeout")
default:
if err := unix.Kill(pid, 0); err != nil {
if err == unix.ESRCH {
return nil
}
time.Sleep(100 * time.Millisecond)
logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
}
time.Sleep(10 * time.Millisecond)
}
}()
select {
case <-done:
return nil
case <-time.After(timeout):
close(chControl)
return fmt.Errorf("given PIDs did not die within timeout")
}
}

Expand Down

0 comments on commit 3276c36

Please sign in to comment.