From 341e6a1628a35198500fcfc1bb65b377ff9b270b Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Wed, 19 May 2021 14:49:26 -0400 Subject: [PATCH 1/2] Always spawn a cleanup process with exec We were previously only doing this for detached exec. I don't know why we did that, but I don't see any reason not to extend it to all exec sessions - it guarantees that we will always clean up exec sessions, even if the original `podman exec` process died. [NO TESTS NEEDED] because I don't really know how to test this one. Signed-off-by: Matthew Heon --- libpod/container_exec.go | 3 +++ pkg/domain/infra/abi/containers.go | 36 +++++++++++++++++------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/libpod/container_exec.go b/libpod/container_exec.go index c359f1e5df..09a5b95976 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -761,6 +761,9 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi } exitCode := session.ExitCode if err := c.ExecRemove(sessionID, false); err != nil { + if errors.Cause(err) == define.ErrNoSuchExecSession { + return exitCode, nil + } return -1, err } diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 4908e72f64..5a953c0472 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -595,7 +595,7 @@ func (ic *ContainerEngine) ContainerAttach(ctx context.Context, nameOrID string, return nil } -func makeExecConfig(options entities.ExecOptions) *libpod.ExecConfig { +func makeExecConfig(options entities.ExecOptions, rt *libpod.Runtime) (*libpod.ExecConfig, error) { execConfig := new(libpod.ExecConfig) execConfig.Command = options.Cmd execConfig.Terminal = options.Tty @@ -607,7 +607,20 @@ func makeExecConfig(options entities.ExecOptions) *libpod.ExecConfig { execConfig.PreserveFDs = options.PreserveFDs execConfig.AttachStdin = options.Interactive - return execConfig + // Make an exit command + storageConfig := rt.StorageConfig() + runtimeConfig, err := rt.GetConfig() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving Libpod configuration to build exec exit command") + } + // TODO: Add some ability to toggle syslog + exitCommandArgs, err := generate.CreateExitCommandArgs(storageConfig, runtimeConfig, false, true, true) + if err != nil { + return nil, errors.Wrapf(err, "error constructing exit command for exec session") + } + execConfig.ExitCommand = exitCommandArgs + + return execConfig, nil } func checkExecPreserveFDs(options entities.ExecOptions) error { @@ -647,7 +660,10 @@ func (ic *ContainerEngine) ContainerExec(ctx context.Context, nameOrID string, o } ctr := ctrs[0] - execConfig := makeExecConfig(options) + execConfig, err := makeExecConfig(options, ic.Libpod) + if err != nil { + return ec, err + } ec, err = terminal.ExecAttachCtr(ctx, ctr, execConfig, &streams) return define.TranslateExecErrorToExitCode(ec, err), err @@ -664,20 +680,10 @@ func (ic *ContainerEngine) ContainerExecDetached(ctx context.Context, nameOrID s } ctr := ctrs[0] - execConfig := makeExecConfig(options) - - // Make an exit command - storageConfig := ic.Libpod.StorageConfig() - runtimeConfig, err := ic.Libpod.GetConfig() - if err != nil { - return "", errors.Wrapf(err, "error retrieving Libpod configuration to build exec exit command") - } - // TODO: Add some ability to toggle syslog - exitCommandArgs, err := generate.CreateExitCommandArgs(storageConfig, runtimeConfig, false, true, true) + execConfig, err := makeExecConfig(options, ic.Libpod) if err != nil { - return "", errors.Wrapf(err, "error constructing exit command for exec session") + return "", err } - execConfig.ExitCommand = exitCommandArgs // Create and start the exec session id, err := ctr.ExecCreate(execConfig) From 62f4b0a1955853592c01310a2cf7e0ae041b9566 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 10 Jun 2021 14:12:30 -0400 Subject: [PATCH 2/2] Add ExecDied event and use it to retrieve exit codes When making Exec Cleanup processes mandatory, I introduced a race wherein attached exec sessions could be cleaned up and removed by the cleanup process before the frontend had a chance to get their exit code. Fortunately, we've dealt with this issue before in containers, and the same solution can be applied here. I added an event for an exec session's process exiting, `exec_died` (Docker has an identical event, so this actually improves our compatibility there) that includes the exit code of the exec session. If the race happens and the exec session no longer exists when we go to remove it, pick up exit code from the event and exit cleanly. Signed-off-by: Matthew Heon --- libpod/container_exec.go | 64 +++++++++++++++++++++++++--------------- libpod/events.go | 39 +++++++++++++++++++++++- libpod/events/config.go | 2 ++ libpod/events/events.go | 2 ++ 4 files changed, 82 insertions(+), 25 deletions(-) diff --git a/libpod/container_exec.go b/libpod/container_exec.go index 09a5b95976..737bf74ad5 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -1,6 +1,7 @@ package libpod import ( + "context" "io/ioutil" "net/http" "os" @@ -539,18 +540,7 @@ func (c *Container) ExecStop(sessionID string, timeout *uint) error { var cleanupErr error // Retrieve exit code and update status - exitCode, err := c.readExecExitCode(session.ID()) - if err != nil { - cleanupErr = err - } - session.ExitCode = exitCode - session.PID = 0 - session.State = define.ExecStateStopped - - if err := c.save(); err != nil { - if cleanupErr != nil { - logrus.Errorf("Error stopping container %s exec session %s: %v", c.ID(), session.ID(), cleanupErr) - } + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { cleanupErr = err } @@ -592,15 +582,7 @@ func (c *Container) ExecCleanup(sessionID string) error { return errors.Wrapf(define.ErrExecSessionStateInvalid, "cannot clean up container %s exec session %s as it is running", c.ID(), session.ID()) } - exitCode, err := c.readExecExitCode(session.ID()) - if err != nil { - return err - } - session.ExitCode = exitCode - session.PID = 0 - session.State = define.ExecStateStopped - - if err := c.save(); err != nil { + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { return err } } @@ -637,9 +619,9 @@ func (c *Container) ExecRemove(sessionID string, force bool) error { return err } if !running { - session.State = define.ExecStateStopped - // TODO: should we retrieve exit code here? - // TODO: Might be worth saving state here. + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { + return err + } } } @@ -653,6 +635,10 @@ func (c *Container) ExecRemove(sessionID string, force bool) error { return err } + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { + return err + } + if err := c.cleanupExecBundle(session.ID()); err != nil { return err } @@ -757,6 +743,18 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi session, err := c.ExecSession(sessionID) if err != nil { + if errors.Cause(err) == define.ErrNoSuchExecSession { + // TODO: If a proper Context is ever plumbed in here, we + // should use it. + // As things stand, though, it's not worth it - this + // should always terminate quickly since it's not + // streaming. + diedEvent, err := c.runtime.GetExecDiedEvent(context.Background(), c.ID(), sessionID) + if err != nil { + return -1, errors.Wrapf(err, "error retrieving exec session %s exit code", sessionID) + } + return diedEvent.ContainerExitCode, nil + } return -1, err } exitCode := session.ExitCode @@ -930,6 +928,8 @@ func (c *Container) getActiveExecSessions() ([]string, error) { session.PID = 0 session.State = define.ExecStateStopped + c.newExecDiedEvent(session.ID(), exitCode) + needSave = true } if err := c.cleanupExecBundle(id); err != nil { @@ -1039,6 +1039,22 @@ func writeExecExitCode(c *Container, sessionID string, exitCode int) error { return errors.Wrapf(err, "error syncing container %s state to remove exec session %s", c.ID(), sessionID) } + return justWriteExecExitCode(c, sessionID, exitCode) +} + +func retrieveAndWriteExecExitCode(c *Container, sessionID string) error { + exitCode, err := c.readExecExitCode(sessionID) + if err != nil { + return err + } + + return justWriteExecExitCode(c, sessionID, exitCode) +} + +func justWriteExecExitCode(c *Container, sessionID string, exitCode int) error { + // Write an event first + c.newExecDiedEvent(sessionID, exitCode) + session, ok := c.state.ExecSessions[sessionID] if !ok { // Exec session already removed. diff --git a/libpod/events.go b/libpod/events.go index 8392296744..22c51aeec8 100644 --- a/libpod/events.go +++ b/libpod/events.go @@ -46,7 +46,22 @@ func (c *Container) newContainerExitedEvent(exitCode int32) { e.Type = events.Container e.ContainerExitCode = int(exitCode) if err := c.runtime.eventer.Write(e); err != nil { - logrus.Errorf("unable to write pod event: %q", err) + logrus.Errorf("unable to write container exited event: %q", err) + } +} + +// newExecDiedEvent creates a new event for an exec session's death +func (c *Container) newExecDiedEvent(sessionID string, exitCode int) { + e := events.NewEvent(events.ExecDied) + e.ID = c.ID() + e.Name = c.Name() + e.Image = c.config.RootfsImageName + e.Type = events.Container + e.ContainerExitCode = exitCode + e.Attributes = make(map[string]string) + e.Attributes["execID"] = sessionID + if err := c.runtime.eventer.Write(e); err != nil { + logrus.Errorf("unable to write exec died event: %q", err) } } @@ -154,3 +169,25 @@ func (r *Runtime) GetLastContainerEvent(ctx context.Context, nameOrID string, co // return the last element in the slice return containerEvents[len(containerEvents)-1], nil } + +// GetExecDiedEvent takes a container name or ID, exec session ID, and returns +// that exec session's Died event (if it has already occurred). +func (r *Runtime) GetExecDiedEvent(ctx context.Context, nameOrID, execSessionID string) (*events.Event, error) { + filters := []string{ + fmt.Sprintf("container=%s", nameOrID), + "event=exec_died", + "type=container", + fmt.Sprintf("label=execID=%s", execSessionID), + } + + containerEvents, err := r.GetEvents(ctx, filters) + if err != nil { + return nil, err + } + // There *should* only be one event maximum. + // But... just in case... let's not blow up if there's more than one. + if len(containerEvents) < 1 { + return nil, errors.Wrapf(events.ErrEventNotFound, "exec died event for session %s (container %s) not found", execSessionID, nameOrID) + } + return containerEvents[len(containerEvents)-1], nil +} diff --git a/libpod/events/config.go b/libpod/events/config.go index 085fa9d527..d88d7b6e33 100644 --- a/libpod/events/config.go +++ b/libpod/events/config.go @@ -127,6 +127,8 @@ const ( Create Status = "create" // Exec ... Exec Status = "exec" + // ExecDied indicates that an exec session in a container died. + ExecDied Status = "exec_died" // Exited indicates that a container's process died Exited Status = "died" // Export ... diff --git a/libpod/events/events.go b/libpod/events/events.go index 01ea6a3862..e03215eff8 100644 --- a/libpod/events/events.go +++ b/libpod/events/events.go @@ -149,6 +149,8 @@ func StringToStatus(name string) (Status, error) { return Create, nil case Exec.String(): return Exec, nil + case ExecDied.String(): + return ExecDied, nil case Exited.String(): return Exited, nil case Export.String():