diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index d803276b15..95065bf28c 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -356,10 +356,20 @@ func generateResourceFile(res *spec.LinuxResources) (string, []string, error) { // If all is set, send to all PIDs in the container. // All is only supported if the container created cgroups. func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error { + if _, err := r.killContainer(ctr, signal, all, false); err != nil { + return err + } + + return nil +} + +// If captureStderr is requested, OCI runtime STDERR will be captured as a +// *bytes.buffer and returned; otherwise, it is set to os.Stderr. +func (r *ConmonOCIRuntime) killContainer(ctr *Container, signal uint, all, captureStderr bool) (*bytes.Buffer, error) { logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID()) runtimeDir, err := util.GetRuntimeDir() if err != nil { - return err + return nil, err } env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} var args []string @@ -369,19 +379,27 @@ func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) } else { args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal)) } - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil { + var ( + stderr io.Writer = os.Stderr + stderrBuffer *bytes.Buffer + ) + if captureStderr { + stderrBuffer = new(bytes.Buffer) + stderr = stderrBuffer + } + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, stderr, env, r.path, args...); err != nil { // Update container state - there's a chance we failed because // the container exited in the meantime. if err2 := r.UpdateContainerStatus(ctr); err2 != nil { logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2) } if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) { - return fmt.Errorf("%w: %s", define.ErrCtrStateInvalid, ctr.state.State) + return stderrBuffer, fmt.Errorf("%w: %s", define.ErrCtrStateInvalid, ctr.state.State) } - return fmt.Errorf("sending signal to container %s: %w", ctr.ID(), err) + return stderrBuffer, fmt.Errorf("sending signal to container %s: %w", ctr.ID(), err) } - return nil + return stderrBuffer, nil } // StopContainer stops a container, first using its given stop signal (or @@ -400,23 +418,65 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) return nil } - if timeout > 0 { - stopSignal := ctr.config.StopSignal - if stopSignal == 0 { - stopSignal = uint(syscall.SIGTERM) + killCtr := func(signal uint) (bool, error) { + stderr, err := r.killContainer(ctr, signal, all, true) + + // Before handling error from KillContainer, convert STDERR to a []string + // (one string per line of output) and print it, ignoring known OCI runtime + // errors that we don't care about + stderrLines := strings.Split(stderr.String(), "\n") + for _, line := range stderrLines { + if line == "" { + continue + } + if strings.Contains(line, "container not running") || strings.Contains(line, "open pidfd: No such process") { + logrus.Debugf("Failure to kill container (already stopped?): logged %s", line) + continue + } + fmt.Fprintf(os.Stderr, "%s\n", line) } - if err := r.KillContainer(ctr, stopSignal, all); err != nil { + + if err != nil { + // There's an inherent race with the cleanup process (see + // #16142, #17142). If the container has already been marked as + // stopped or exited by the cleanup process, we can return + // immediately. + if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) { + return true, nil + } + + // If the PID is 0, then the container is already stopped. + if ctr.state.PID == 0 { + return true, nil + } + // Is the container gone? // If so, it probably died between the first check and // our sending the signal // The container is stopped, so exit cleanly err := unix.Kill(ctr.state.PID, 0) if err == unix.ESRCH { - return nil + return true, nil } + return false, err + } + return false, nil + } + + if timeout > 0 { + stopSignal := ctr.config.StopSignal + if stopSignal == 0 { + stopSignal = uint(syscall.SIGTERM) + } + + stopped, err := killCtr(stopSignal) + if err != nil { return err } + if stopped { + return nil + } if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil { logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err) @@ -427,27 +487,13 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) } } - // If the timeout was set to 0 or if stopping the container with the - // specified signal did not work, use the big hammer with SIGKILL. - if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil { - // There's an inherent race with the cleanup process (see - // #16142, #17142). If the container has already been marked as - // stopped or exited by the cleanup process, we can return - // immediately. - if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) { - return nil - } - - // If the PID is 0, then the container is already stopped. - if ctr.state.PID == 0 { - return nil - } - // Again, check if the container is gone. If it is, exit cleanly. - if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) { - return nil - } + stopped, err := killCtr(uint(unix.SIGKILL)) + if err != nil { return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err) } + if stopped { + return nil + } // Give runtime a few seconds to make it happen if err := waitContainerStop(ctr, killContainerTimeout); err != nil {