Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid unnecessary timeout of 250msec when waiting on container shutdown #16088

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/containers/storage/pkg/archive"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

// Init creates a container in the OCI runtime, moving a container from
Expand Down Expand Up @@ -515,6 +516,22 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
var conmonTimer time.Timer
conmonTimerSet := false

conmonPidFd := -1
conmonPidFdTriggered := false

if c.state.ConmonPID != 0 {
// Track lifetime of conmon precisely using pidfd_open + poll.
// There are many cases for this to fail, for instance conmon is dead
// or pidfd_open is not supported (pre linux 5.3), so fall back to the
// traditional loop with poll + sleep
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
conmonPidFd = fd
defer unix.Close(conmonPidFd)
} else if err != unix.ENOSYS && err != unix.ESRCH {
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
}
alexlarsson marked this conversation as resolved.
Show resolved Hide resolved
}

getExitCode := func() (bool, int32, error) {
containerRemoved := false
if !c.batched {
Expand Down Expand Up @@ -582,7 +599,18 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
case <-ctx.Done():
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
default:
time.Sleep(pollInterval)
if conmonPidFd != -1 && !conmonPidFdTriggered {
// If possible (pidfd works), the first cycle we block until conmon dies
// If this happens, and we fall back to the old poll delay
// There is a deadlock in the cleanup code for "play kube" which causes
// conmon to not exit, so unfortunately we have to use the poll interval
// timeout here to avoid hanging.
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
_, _ = unix.Poll(fds, int(pollInterval.Milliseconds()))
conmonPidFdTriggered = true
} else {
time.Sleep(pollInterval)
}
}
}
}
Expand Down