Skip to content

Commit

Permalink
Merge pull request #23601 from Luap99/wait
Browse files Browse the repository at this point in the history
libpod: simplify WaitForExit()
  • Loading branch information
openshift-merge-bot[bot] authored Aug 15, 2024
2 parents f456b53 + 6fb1042 commit b282902
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 133 deletions.
11 changes: 7 additions & 4 deletions docs/source/markdown/podman-wait.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ separated by newline in the same order as they were given to the command. An
exit code of -1 is emitted for all conditions other than "stopped" and
"exited".

NOTE: there is an inherent race condition when waiting for containers with a
restart policy of `always` or `on-failure`, such as those created by `podman
kube play`. Such containers may be repeatedly exiting and restarting, possibly
with different exit codes, but `podman wait` can only display and detect one.
When waiting for containers with a restart policy of `always` or `on-failure`,
such as those created by `podman kube play`, the containers may be repeatedly
exiting and restarting, possibly with different exit codes. `podman wait` will
only display and detect the first exit after the wait command was started.

When running a container with podman run --rm wait should wait for the
container to be fully removed as well before it returns.

## OPTIONS

Expand Down
160 changes: 69 additions & 91 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,119 +541,97 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
}
return -1, define.ErrCtrRemoved
}
var conmonTimer time.Timer
conmonTimerSet := false

conmonPidFd := c.getConmonPidFd()
if conmonPidFd != -1 {
defer unix.Close(conmonPidFd)
}
conmonPidFdTriggered := false

getExitCode := func() (bool, int32, error) {
containerRemoved := false
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
}

if err := c.syncContainer(); err != nil {
if !errors.Is(err, define.ErrNoSuchCtr) {
return false, -1, err
}
containerRemoved = true
}

// If conmon is not alive anymore set a timer to make sure
// we're returning even if conmon has forcefully been killed.
if !conmonTimerSet && !containerRemoved {
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
switch {
case errors.Is(err, define.ErrNoSuchCtr):
// Container has been removed, so we assume the
// exit code is present in the DB.
containerRemoved = true
case err != nil:
return false, -1, err
case !conmonAlive:
// Give the exit code at most 20 seconds to
// show up in the DB. That should largely be
// enough for the cleanup process.
timerDuration := time.Second * 20
conmonTimer = *time.NewTimer(timerDuration)
conmonTimerSet = true
case conmonAlive:
// Continue waiting if conmon's still running.
return false, -1, nil
}
}
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()

timedout := ""
if !containerRemoved {
// If conmon is dead for more than $timerDuration or if the
// container has exited properly, try to look up the exit code.
select {
case <-conmonTimer.C:
logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
timedout = " [exceeded conmon timeout waiting for container to exit]"
default:
switch c.state.State {
case define.ContainerStateExited, define.ContainerStateConfigured:
// Container exited, so we can look up the exit code.
case define.ContainerStateStopped:
// Continue looping unless the restart policy is always.
// In this case, the container would never transition to
// the exited state, so we need to look up the exit code.
if c.config.RestartPolicy != define.RestartPolicyAlways {
return false, -1, nil
}
default:
// Continue looping
return false, -1, nil
// Note this is one of the rare cases where we do not like to use syncContainer() as it does read the exit file
// We like to avoid that here because that means we might read it before container cleanup run and possible
// removed the container
if err := c.runtime.state.UpdateContainer(c); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
// if the container is not valid at this point as it was deleted,
// check if the exit code was recorded in the db.
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err == nil {
return exitCode, nil
}
}
return -1, err
}
}

conmonPID := c.state.ConmonPID
// conmonPID == 0 means container is not running
if conmonPID == 0 {
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err != nil {
if errors.Is(err, define.ErrNoSuchExitCode) {
// If the container is configured or created, we must assume it never ran.
if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) {
return true, 0, nil
return 0, nil
}
}
return true, -1, fmt.Errorf("%w (container in state %s)%s", err, c.state.State, timedout)
return -1, err
}
return exitCode, nil
}

return true, exitCode, nil
conmonPidFd := c.getConmonPidFd()
if conmonPidFd > -1 {
defer unix.Close(conmonPidFd)
}

for {
hasExited, exitCode, err := getExitCode()
if hasExited {
return exitCode, err
// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
c.lock.Unlock()
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
c.lock.Lock()
if err != nil {
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
}

// we locked again so we must sync the state
if err := c.syncContainer(); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
// if the container is not valid at this point as it was deleted,
// check if the exit code was recorded in the db.
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err == nil {
return exitCode, nil
}
}
if err != nil {
return -1, err
return -1, err
}

// syncContainer already did the exit file read so nothing extra for us to do here
return c.runtime.state.GetContainerExitCode(id)
}

func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
if conmonPidFd > -1 {
for {
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
if _, err := unix.Poll(fds, -1); err != nil {
if err == unix.EINTR {
continue
}
return err
}
return nil
}
select {
case <-ctx.Done():
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
default:
if conmonPidFd != -1 && !conmonPidFdTriggered {
// If possible (pidfd works), the first cycle we block until conmon dies
// If this happens, and we fall back to the old poll delay
// There is a deadlock in the cleanup code for "play kube" which causes
// conmon to not exit, so unfortunately we have to use the poll interval
// timeout here to avoid hanging.
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
_, _ = unix.Poll(fds, int(pollInterval.Milliseconds()))
conmonPidFdTriggered = true
} else {
time.Sleep(pollInterval)
}
// no pidfd support, we must poll the pid
for {
if err := unix.Kill(conmonPID, 0); err != nil {
if err == unix.ESRCH {
break
}
return err
}
time.Sleep(pollInterval)
}
return nil
}

type waitResult struct {
Expand Down
5 changes: 5 additions & 0 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -2163,6 +2163,11 @@ func (c *Container) stopPodIfNeeded(ctx context.Context) error {
return nil
}

// Never try to stop the pod when a init container stopped
if c.IsInitCtr() {
return nil
}

pod, err := c.runtime.state.Pod(c.config.Pod)
if err != nil {
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
Expand Down
18 changes: 8 additions & 10 deletions libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,16 +694,14 @@ func (c *Container) makePlatformBindMounts() error {
}

func (c *Container) getConmonPidFd() int {
if c.state.ConmonPID != 0 {
// Track lifetime of conmon precisely using pidfd_open + poll.
// There are many cases for this to fail, for instance conmon is dead
// or pidfd_open is not supported (pre linux 5.3), so fall back to the
// traditional loop with poll + sleep
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
return fd
} else if err != unix.ENOSYS && err != unix.ESRCH {
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
}
// Track lifetime of conmon precisely using pidfd_open + poll.
// There are many cases for this to fail, for instance conmon is dead
// or pidfd_open is not supported (pre linux 5.3), so fall back to the
// traditional loop with poll + sleep
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
return fd
} else if err != unix.ENOSYS && err != unix.ESRCH {
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
}
return -1
}
Expand Down
7 changes: 0 additions & 7 deletions libpod/runtime_volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,6 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool, timeo
return define.ErrRuntimeStopped
}

if !v.valid {
if ok, _ := r.state.HasVolume(v.Name()); !ok {
// Volume probably already removed
// Or was never in the runtime to begin with
return nil
}
}
return r.removeVolume(ctx, v, force, timeout, false)
}

Expand Down
21 changes: 14 additions & 7 deletions libpod/runtime_volume_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,13 +357,11 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo
return define.ErrVolumeRemoved
}

v.lock.Lock()
defer v.lock.Unlock()

// Update volume status to pick up a potential removal from state
if err := v.update(); err != nil {
return err
}
// DANGEROUS: Do not lock here yet because we might needed to remove containers first.
// In general we must always acquire the ctr lock before a volume lock so we cannot lock.
// THIS MUST BE DONE to prevent ABBA deadlocks.
// It also means the are several races around creating containers with volumes and removing
// them in parallel. However that problem exists regadless of taking the lock here or not.

deps, err := r.state.VolumeInUse(v)
if err != nil {
Expand Down Expand Up @@ -400,6 +398,15 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo
}
}

// Ok now we are good to lock.
v.lock.Lock()
defer v.lock.Unlock()

// Update volume status to pick up a potential removal from state
if err := v.update(); err != nil {
return err
}

// If the volume is still mounted - force unmount it
if err := v.unmount(true); err != nil {
if force {
Expand Down
12 changes: 7 additions & 5 deletions test/system/030-run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -1289,13 +1289,15 @@ EOF
rm -rf $romount
}

@test "podman run --restart=always -- wait" {
@test "podman run --restart=always/on-failure -- wait" {
# regression test for #18572 to make sure Podman waits less than 20 seconds
ctr=c_$(safename)
run_podman run -d --restart=always --name=$ctr $IMAGE false
PODMAN_TIMEOUT=20 run_podman wait $ctr
is "$output" "1" "container should exit 1"
run_podman rm -f -t0 $ctr
for policy in always on-failure; do
run_podman run -d --restart=$policy --name=$ctr $IMAGE false
PODMAN_TIMEOUT=20 run_podman wait $ctr
is "$output" "1" "container should exit 1 (policy: $policy)"
run_podman rm -f -t0 $ctr
done
}

@test "podman run - custom static_dir" {
Expand Down
37 changes: 28 additions & 9 deletions test/system/500-networking.bats
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,27 @@ EOF
fi
}

function wait_for_restart_count() {
local cname="$1"
local count="$2"
local tname="$3"

local timeout=10
while :; do
# Previously this would fail as the container would run out of ips after 5 restarts.
run_podman inspect --format "{{.RestartCount}}" $cname
if [[ "$output" == "$2" ]]; then
break
fi

timeout=$((timeout - 1))
if [[ $timeout -eq 0 ]]; then
die "Timed out waiting for RestartCount with $tname"
fi
sleep 0.5
done
}

# Test for https://github.com/containers/podman/issues/18615
@test "podman network cleanup --userns + --restart" {
skip_if_cgroupsv1 "run --uidmap fails on cgroups v1 (issue 15025, wontfix)"
Expand All @@ -955,10 +976,8 @@ EOF
# This will cause 7 containers runs with the restart policy (one more than the on failure limit)
# Since they run sequentially they should run fine without allocating all ips.
run_podman 1 run --name $cname --network $net1 --restart on-failure:6 --userns keep-id $IMAGE false

# Previously this would fail as the container would run out of ips after 5 restarts.
run_podman inspect --format "{{.RestartCount}}" $cname
assert "$output" == "6" "RestartCount for failing container with bridge network"
wait_for_restart_count $cname 6 "custom network"
run_podman wait $cname

# Now make sure we can still run a container with free ips.
run_podman run --rm --network $net1 $IMAGE true
Expand All @@ -969,23 +988,23 @@ EOF
if has_slirp4netns; then
cname2=con2-$(random_string 10)
run_podman 1 run --name $cname2 --network slirp4netns --restart on-failure:2 --userns keep-id $IMAGE false
run_podman inspect --format "{{.RestartCount}}" $cname2
assert "$output" == "2" "RestartCount for failing container with slirp4netns"
wait_for_restart_count $cname2 2 "slirp4netns"
run_podman wait $cname2
fi

if is_rootless; then
# pasta can only run rootless
cname3=con3-$(random_string 10)
run_podman 1 run --name $cname3 --network pasta --restart on-failure:2 --userns keep-id $IMAGE false
run_podman inspect --format "{{.RestartCount}}" $cname3
assert "$output" == "2" "RestartCount for failing container with pasta"
wait_for_restart_count $cname3 2 "pasta"
run_podman wait $cname3
else
# This is racy if other programs modify /run/netns while the test is running.
# However I think the risk is minimal and I think checking for this is important.
assert "$(ls /run/netns | wc -l)" == "$netns_count" "/run/netns has no leaked netns files"
fi

run_podman rm -f -t0 $cname $cname2 $cname3
run_podman rm $cname $cname2 $cname3
run_podman network rm $net1
}

Expand Down

0 comments on commit b282902

Please sign in to comment.