Skip to content

Commit

Permalink
libpod: simplify WaitForExit()
Browse files Browse the repository at this point in the history
The current code did several complicated state checks that simply do not
work properly on a fast restarting container. It uses a special case for
--restart=always but forgot to take care of --restart=on-failure which
always hang for 20s until it run into the timeout.

The old logic also used to call CheckConmonRunning() but synced the
state before which means it may check a new conmon every time and thus
misses exits.

To fix the new the code is much simpler. Check the conmon pid, if it is
no longer running then get then check exit file and get exit code.

This is related to #23473 but I am not sure if this fixes it because we
cannot reproduce.

Signed-off-by: Paul Holzinger <[email protected]>
  • Loading branch information
Luap99 committed Aug 15, 2024
1 parent f456b53 commit 8a94331
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 106 deletions.
160 changes: 69 additions & 91 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,119 +541,97 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
}
return -1, define.ErrCtrRemoved
}
var conmonTimer time.Timer
conmonTimerSet := false

conmonPidFd := c.getConmonPidFd()
if conmonPidFd != -1 {
defer unix.Close(conmonPidFd)
}
conmonPidFdTriggered := false

getExitCode := func() (bool, int32, error) {
containerRemoved := false
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
}

if err := c.syncContainer(); err != nil {
if !errors.Is(err, define.ErrNoSuchCtr) {
return false, -1, err
}
containerRemoved = true
}

// If conmon is not alive anymore set a timer to make sure
// we're returning even if conmon has forcefully been killed.
if !conmonTimerSet && !containerRemoved {
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
switch {
case errors.Is(err, define.ErrNoSuchCtr):
// Container has been removed, so we assume the
// exit code is present in the DB.
containerRemoved = true
case err != nil:
return false, -1, err
case !conmonAlive:
// Give the exit code at most 20 seconds to
// show up in the DB. That should largely be
// enough for the cleanup process.
timerDuration := time.Second * 20
conmonTimer = *time.NewTimer(timerDuration)
conmonTimerSet = true
case conmonAlive:
// Continue waiting if conmon's still running.
return false, -1, nil
}
}
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()

timedout := ""
if !containerRemoved {
// If conmon is dead for more than $timerDuration or if the
// container has exited properly, try to look up the exit code.
select {
case <-conmonTimer.C:
logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
timedout = " [exceeded conmon timeout waiting for container to exit]"
default:
switch c.state.State {
case define.ContainerStateExited, define.ContainerStateConfigured:
// Container exited, so we can look up the exit code.
case define.ContainerStateStopped:
// Continue looping unless the restart policy is always.
// In this case, the container would never transition to
// the exited state, so we need to look up the exit code.
if c.config.RestartPolicy != define.RestartPolicyAlways {
return false, -1, nil
}
default:
// Continue looping
return false, -1, nil
// Note this is one of the rare cases where we do not like to use syncContainer() as it does read the exit file
// We like to avoid that here because that means we might read it before container cleanup run and possible
// removed the container
if err := c.runtime.state.UpdateContainer(c); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
// if the container is not valid at this point as it was deleted,
// check if the exit code was recorded in the db.
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err == nil {
return exitCode, nil
}
}
return -1, err
}
}

conmonPID := c.state.ConmonPID
// conmonPID == 0 means container is not running
if conmonPID == 0 {
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err != nil {
if errors.Is(err, define.ErrNoSuchExitCode) {
// If the container is configured or created, we must assume it never ran.
if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) {
return true, 0, nil
return 0, nil
}
}
return true, -1, fmt.Errorf("%w (container in state %s)%s", err, c.state.State, timedout)
return -1, err
}
return exitCode, nil
}

return true, exitCode, nil
conmonPidFd := c.getConmonPidFd()
if conmonPidFd > -1 {
defer unix.Close(conmonPidFd)
}

for {
hasExited, exitCode, err := getExitCode()
if hasExited {
return exitCode, err
// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
c.lock.Unlock()
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
c.lock.Lock()
if err != nil {
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
}

// we locked again so we must sync the state
if err := c.syncContainer(); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
// if the container is not valid at this point as it was deleted,
// check if the exit code was recorded in the db.
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err == nil {
return exitCode, nil
}
}
if err != nil {
return -1, err
return -1, err
}

// syncContainer already did the exit file read so nothing extra for us to do here
return c.runtime.state.GetContainerExitCode(id)
}

func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
if conmonPidFd > -1 {
for {
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
if _, err := unix.Poll(fds, -1); err != nil {
if err == unix.EINTR {
continue
}
return err
}
return nil
}
select {
case <-ctx.Done():
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
default:
if conmonPidFd != -1 && !conmonPidFdTriggered {
// If possible (pidfd works), the first cycle we block until conmon dies
// If this happens, and we fall back to the old poll delay
// There is a deadlock in the cleanup code for "play kube" which causes
// conmon to not exit, so unfortunately we have to use the poll interval
// timeout here to avoid hanging.
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
_, _ = unix.Poll(fds, int(pollInterval.Milliseconds()))
conmonPidFdTriggered = true
} else {
time.Sleep(pollInterval)
}
// no pidfd support, we must poll the pid
for {
if err := unix.Kill(conmonPID, 0); err != nil {
if err == unix.ESRCH {
break
}
return err
}
time.Sleep(pollInterval)
}
return nil
}

type waitResult struct {
Expand Down
18 changes: 8 additions & 10 deletions libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,16 +694,14 @@ func (c *Container) makePlatformBindMounts() error {
}

func (c *Container) getConmonPidFd() int {
if c.state.ConmonPID != 0 {
// Track lifetime of conmon precisely using pidfd_open + poll.
// There are many cases for this to fail, for instance conmon is dead
// or pidfd_open is not supported (pre linux 5.3), so fall back to the
// traditional loop with poll + sleep
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
return fd
} else if err != unix.ENOSYS && err != unix.ESRCH {
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
}
// Track lifetime of conmon precisely using pidfd_open + poll.
// There are many cases for this to fail, for instance conmon is dead
// or pidfd_open is not supported (pre linux 5.3), so fall back to the
// traditional loop with poll + sleep
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
return fd
} else if err != unix.ENOSYS && err != unix.ESRCH {
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
}
return -1
}
Expand Down
12 changes: 7 additions & 5 deletions test/system/030-run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -1289,13 +1289,15 @@ EOF
rm -rf $romount
}

@test "podman run --restart=always -- wait" {
@test "podman run --restart=always/on-failure -- wait" {
# regression test for #18572 to make sure Podman waits less than 20 seconds
ctr=c_$(safename)
run_podman run -d --restart=always --name=$ctr $IMAGE false
PODMAN_TIMEOUT=20 run_podman wait $ctr
is "$output" "1" "container should exit 1"
run_podman rm -f -t0 $ctr
for policy in always on-failure; do
run_podman run -d --restart=$policy --name=$ctr $IMAGE false
PODMAN_TIMEOUT=20 run_podman wait $ctr
is "$output" "1" "container should exit 1 (policy: $policy)"
run_podman rm -f -t0 $ctr
done
}

@test "podman run - custom static_dir" {
Expand Down

0 comments on commit 8a94331

Please sign in to comment.