Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a potential race around the exec cleanup process #13600

Merged
merged 1 commit into from
Mar 23, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 47 additions & 9 deletions libpod/container_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,22 +341,60 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS
}
lastErr = tmpErr

exitCode, err := c.readExecExitCode(session.ID())
if err != nil {
exitCode, exitCodeErr := c.readExecExitCode(session.ID())

// Lock again.
// Important: we must lock and sync *before* the above error is handled.
// We need info from the database to handle the error.
if !c.batched {
c.lock.Lock()
mheon marked this conversation as resolved.
Show resolved Hide resolved
}
// We can't reuse the old exec session (things may have changed from
// other use, the container was unlocked).
// So re-sync and get a fresh copy.
// If we can't do this, no point in continuing, any attempt to save
// would write garbage to the DB.
if err := c.syncContainer(); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
// We can't save status, but since the container has
// been entirely removed, we don't have to; exit cleanly
return lastErr
}
if lastErr != nil {
logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
}
lastErr = err
}
return errors.Wrapf(err, "error syncing container %s state to update exec session %s", c.ID(), sessionID)
}
mheon marked this conversation as resolved.
Show resolved Hide resolved

// Now handle the error from readExecExitCode above.
if exitCodeErr != nil {
newSess, ok := c.state.ExecSessions[sessionID]
if !ok {
// The exec session was removed entirely, probably by
// the cleanup process. When it did so, it should have
// written an event with the exit code.
// Given that, there's nothing more we can do.
logrus.Infof("Container %s exec session %s already removed", c.ID(), session.ID())
return lastErr
}

logrus.Debugf("Container %s exec session %s completed with exit code %d", c.ID(), session.ID(), exitCode)
if newSess.State == define.ExecStateStopped {
// Exec session already cleaned up.
// Exit code should be recorded, so it's OK if we were
// not able to read it.
logrus.Infof("Container %s exec session %s already cleaned up", c.ID(), session.ID())
return lastErr
}

// Lock again
if !c.batched {
c.lock.Lock()
if lastErr != nil {
logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
}
lastErr = exitCodeErr
}

if err := writeExecExitCode(c, session.ID(), exitCode); err != nil {
logrus.Debugf("Container %s exec session %s completed with exit code %d", c.ID(), session.ID(), exitCode)

if err := justWriteExecExitCode(c, session.ID(), exitCode); err != nil {
if lastErr != nil {
logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
}
Expand Down