Skip to content

Commit

Permalink
Add ExecDied event and use it to retrieve exit codes
Browse files Browse the repository at this point in the history
When making Exec Cleanup processes mandatory, I introduced a race
wherein attached exec sessions could be cleaned up and removed by
the cleanup process before the frontend had a chance to get their
exit code. Fortunately, we've dealt with this issue before in
containers, and the same solution can be applied here. I added an
event for an exec session's process exiting, `exec_died` (Docker
has an identical event, so this actually improves our
compatibility there) that includes the exit code of the exec
session. If the race happens and the exec session no longer
exists when we go to remove it, pick up exit code from the event
and exit cleanly.

Signed-off-by: Matthew Heon <[email protected]>
  • Loading branch information
mheon committed Jun 10, 2021
1 parent 341e6a1 commit 62f4b0a
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 25 deletions.
64 changes: 40 additions & 24 deletions libpod/container_exec.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package libpod

import (
"context"
"io/ioutil"
"net/http"
"os"
Expand Down Expand Up @@ -539,18 +540,7 @@ func (c *Container) ExecStop(sessionID string, timeout *uint) error {
var cleanupErr error

// Retrieve exit code and update status
exitCode, err := c.readExecExitCode(session.ID())
if err != nil {
cleanupErr = err
}
session.ExitCode = exitCode
session.PID = 0
session.State = define.ExecStateStopped

if err := c.save(); err != nil {
if cleanupErr != nil {
logrus.Errorf("Error stopping container %s exec session %s: %v", c.ID(), session.ID(), cleanupErr)
}
if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil {
cleanupErr = err
}

Expand Down Expand Up @@ -592,15 +582,7 @@ func (c *Container) ExecCleanup(sessionID string) error {
return errors.Wrapf(define.ErrExecSessionStateInvalid, "cannot clean up container %s exec session %s as it is running", c.ID(), session.ID())
}

exitCode, err := c.readExecExitCode(session.ID())
if err != nil {
return err
}
session.ExitCode = exitCode
session.PID = 0
session.State = define.ExecStateStopped

if err := c.save(); err != nil {
if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil {
return err
}
}
Expand Down Expand Up @@ -637,9 +619,9 @@ func (c *Container) ExecRemove(sessionID string, force bool) error {
return err
}
if !running {
session.State = define.ExecStateStopped
// TODO: should we retrieve exit code here?
// TODO: Might be worth saving state here.
if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil {
return err
}
}
}

Expand All @@ -653,6 +635,10 @@ func (c *Container) ExecRemove(sessionID string, force bool) error {
return err
}

if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil {
return err
}

if err := c.cleanupExecBundle(session.ID()); err != nil {
return err
}
Expand Down Expand Up @@ -757,6 +743,18 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi

session, err := c.ExecSession(sessionID)
if err != nil {
if errors.Cause(err) == define.ErrNoSuchExecSession {
// TODO: If a proper Context is ever plumbed in here, we
// should use it.
// As things stand, though, it's not worth it - this
// should always terminate quickly since it's not
// streaming.
diedEvent, err := c.runtime.GetExecDiedEvent(context.Background(), c.ID(), sessionID)
if err != nil {
return -1, errors.Wrapf(err, "error retrieving exec session %s exit code", sessionID)
}
return diedEvent.ContainerExitCode, nil
}
return -1, err
}
exitCode := session.ExitCode
Expand Down Expand Up @@ -930,6 +928,8 @@ func (c *Container) getActiveExecSessions() ([]string, error) {
session.PID = 0
session.State = define.ExecStateStopped

c.newExecDiedEvent(session.ID(), exitCode)

needSave = true
}
if err := c.cleanupExecBundle(id); err != nil {
Expand Down Expand Up @@ -1039,6 +1039,22 @@ func writeExecExitCode(c *Container, sessionID string, exitCode int) error {
return errors.Wrapf(err, "error syncing container %s state to remove exec session %s", c.ID(), sessionID)
}

return justWriteExecExitCode(c, sessionID, exitCode)
}

func retrieveAndWriteExecExitCode(c *Container, sessionID string) error {
exitCode, err := c.readExecExitCode(sessionID)
if err != nil {
return err
}

return justWriteExecExitCode(c, sessionID, exitCode)
}

func justWriteExecExitCode(c *Container, sessionID string, exitCode int) error {
// Write an event first
c.newExecDiedEvent(sessionID, exitCode)

session, ok := c.state.ExecSessions[sessionID]
if !ok {
// Exec session already removed.
Expand Down
39 changes: 38 additions & 1 deletion libpod/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,22 @@ func (c *Container) newContainerExitedEvent(exitCode int32) {
e.Type = events.Container
e.ContainerExitCode = int(exitCode)
if err := c.runtime.eventer.Write(e); err != nil {
logrus.Errorf("unable to write pod event: %q", err)
logrus.Errorf("unable to write container exited event: %q", err)
}
}

// newExecDiedEvent creates a new event for an exec session's death
func (c *Container) newExecDiedEvent(sessionID string, exitCode int) {
e := events.NewEvent(events.ExecDied)
e.ID = c.ID()
e.Name = c.Name()
e.Image = c.config.RootfsImageName
e.Type = events.Container
e.ContainerExitCode = exitCode
e.Attributes = make(map[string]string)
e.Attributes["execID"] = sessionID
if err := c.runtime.eventer.Write(e); err != nil {
logrus.Errorf("unable to write exec died event: %q", err)
}
}

Expand Down Expand Up @@ -154,3 +169,25 @@ func (r *Runtime) GetLastContainerEvent(ctx context.Context, nameOrID string, co
// return the last element in the slice
return containerEvents[len(containerEvents)-1], nil
}

// GetExecDiedEvent takes a container name or ID, exec session ID, and returns
// that exec session's Died event (if it has already occurred).
func (r *Runtime) GetExecDiedEvent(ctx context.Context, nameOrID, execSessionID string) (*events.Event, error) {
filters := []string{
fmt.Sprintf("container=%s", nameOrID),
"event=exec_died",
"type=container",
fmt.Sprintf("label=execID=%s", execSessionID),
}

containerEvents, err := r.GetEvents(ctx, filters)
if err != nil {
return nil, err
}
// There *should* only be one event maximum.
// But... just in case... let's not blow up if there's more than one.
if len(containerEvents) < 1 {
return nil, errors.Wrapf(events.ErrEventNotFound, "exec died event for session %s (container %s) not found", execSessionID, nameOrID)
}
return containerEvents[len(containerEvents)-1], nil
}
2 changes: 2 additions & 0 deletions libpod/events/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ const (
Create Status = "create"
// Exec ...
Exec Status = "exec"
// ExecDied indicates that an exec session in a container died.
ExecDied Status = "exec_died"
// Exited indicates that a container's process died
Exited Status = "died"
// Export ...
Expand Down
2 changes: 2 additions & 0 deletions libpod/events/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ func StringToStatus(name string) (Status, error) {
return Create, nil
case Exec.String():
return Exec, nil
case ExecDied.String():
return ExecDied, nil
case Exited.String():
return Exited, nil
case Export.String():
Expand Down

0 comments on commit 62f4b0a

Please sign in to comment.