diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index c3db6152a9..e5219ab8c9 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -5,8 +5,10 @@ import ( "fmt" "net" "os" + "strconv" "strings" "sync" + "time" "github.com/containers/common/libnetwork/types" "github.com/containers/podman/v4/libpod/define" @@ -63,6 +65,13 @@ type BoltState struct { // initially created the database. This must match for any further instances // that access the database, to ensure that state mismatches with // containers/storage do not occur. +// - exitCodeBucket/exitCodeTimeStampBucket: (#14559) exit codes must be part +// of the database to resolve a previous race condition when one process waits +// for the exit file to be written and another process removes it along with +// the container during auto-removal. The same race would happen trying to +// read the exit code from the containers bucket. Hence, exit codes go into +// their own bucket. To avoid the rather expensive JSON (un)marshaling, we +// have two buckets: one for the exit codes, the other for the timestamps. // NewBoltState creates a new bolt-backed state database func NewBoltState(path string, runtime *Runtime) (State, error) { @@ -98,6 +107,8 @@ func NewBoltState(path string, runtime *Runtime) (State, error) { allVolsBkt, execBkt, runtimeConfigBkt, + exitCodeBkt, + exitCodeTimeStampBkt, } // Does the DB need an update? @@ -150,6 +161,10 @@ func (s *BoltState) Refresh() error { return define.ErrDBClosed } + if err := s.PruneContainerExitCodes(); err != nil { + return err + } + db, err := s.getDBCon() if err != nil { return err @@ -1341,6 +1356,166 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) { return config, nil } +// AddContainerExitCode adds the exit code for the specified container to the database. +func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error { + db, err := s.getDBCon() + if err != nil { + return err + } + defer s.deferredCloseDBCon(db) + + rawID := []byte(id) + rawExitCode := []byte(strconv.Itoa(int(exitCode))) + rawTimeStamp, err := time.Now().MarshalText() + if err != nil { + return fmt.Errorf("marshaling exit-code time stamp: %w", err) + } + + return db.Update(func(tx *bolt.Tx) error { + exitCodeBucket, err := getExitCodeBucket(tx) + if err != nil { + return err + } + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } + + if err := exitCodeBucket.Put(rawID, rawExitCode); err != nil { + return fmt.Errorf("adding exit code of container %s to DB: %w", id, err) + } + if err := timeStampBucket.Put(rawID, rawTimeStamp); err != nil { + if rmErr := exitCodeBucket.Delete(rawID); rmErr != nil { + logrus.Errorf("Removing exit code of container %s from DB: %v", id, rmErr) + } + return fmt.Errorf("adding exit-code time stamp of container %s to DB: %w", id, err) + } + + return nil + }) +} + +// GetContainerExitCode returns the exit code for the specified container. +func (s *BoltState) GetContainerExitCode(id string) (int32, error) { + db, err := s.getDBCon() + if err != nil { + return -1, err + } + defer s.deferredCloseDBCon(db) + + rawID := []byte(id) + result := int32(-1) + return result, db.View(func(tx *bolt.Tx) error { + exitCodeBucket, err := getExitCodeBucket(tx) + if err != nil { + return err + } + + rawExitCode := exitCodeBucket.Get(rawID) + if rawExitCode == nil { + return fmt.Errorf("getting exit code of container %s from DB: %w", id, define.ErrNoSuchExitCode) + } + + exitCode, err := strconv.Atoi(string(rawExitCode)) + if err != nil { + return fmt.Errorf("converting raw exit code %v of container %s: %w", rawExitCode, id, err) + } + + result = int32(exitCode) + return nil + }) +} + +// GetContainerExitCodeTimeStamp returns the time stamp when the exit code of +// the specified container was added to the database. +func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) { + db, err := s.getDBCon() + if err != nil { + return nil, err + } + defer s.deferredCloseDBCon(db) + + rawID := []byte(id) + var result time.Time + return &result, db.View(func(tx *bolt.Tx) error { + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } + + rawTimeStamp := timeStampBucket.Get(rawID) + if rawTimeStamp == nil { + return fmt.Errorf("getting exit-code time stamp of container %s from DB: %w", id, define.ErrNoSuchExitCode) + } + + if err := result.UnmarshalText(rawTimeStamp); err != nil { + return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, id, err) + } + + return nil + }) +} + +// PrunExitCodes removes exit codes older than 5 minutes. +func (s *BoltState) PruneContainerExitCodes() error { + db, err := s.getDBCon() + if err != nil { + return err + } + defer s.deferredCloseDBCon(db) + + threshold := time.Minute * 5 + return db.View(func(tx *bolt.Tx) error { + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } + + return timeStampBucket.ForEach(func(rawID, rawTimeStamp []byte) error { + var timeStamp time.Time + if err := timeStamp.UnmarshalText(rawTimeStamp); err != nil { + return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err) + } + if time.Since(timeStamp) > threshold { + // Since the DB connection is locked, pass it + // to remove the exit codes to avoid race + // conditions. + return s.removeContainerExitCode(rawID, db) + } + return nil + }) + }) +} + +// removeContainerExitCode removes the exit code and time stamp of the specified container. +func (s *BoltState) removeContainerExitCode(rawID []byte, db *bolt.DB) error { + return db.Update(func(tx *bolt.Tx) error { + exitCodeBucket, err := getExitCodeBucket(tx) + if err != nil { + return err + } + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } + + var finalErr error + if err := exitCodeBucket.Delete(rawID); err != nil { + finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", string(rawID), err) + } + if err := timeStampBucket.Delete(rawID); err != nil { + err = fmt.Errorf("removing exit-code time stamp of container %s from DB: %w", string(rawID), err) + if finalErr != nil { + logrus.Error(err) + } else { + finalErr = err + } + } + + return finalErr + }) +} + // AddExecSession adds an exec session to the state. func (s *BoltState) AddExecSession(ctr *Container, session *ExecSession) error { if !s.valid { diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go index d6f035af93..edba78d6dc 100644 --- a/libpod/boltdb_state_internal.go +++ b/libpod/boltdb_state_internal.go @@ -29,6 +29,9 @@ const ( aliasesName = "aliases" runtimeConfigName = "runtime-config" + exitCodeName = "exit-code" + exitCodeTimeStampName = "exit-code-time-stamp" + configName = "config" stateName = "state" dependenciesName = "dependencies" @@ -65,6 +68,9 @@ var ( volDependenciesBkt = []byte(volCtrDependencies) networksBkt = []byte(networksName) + exitCodeBkt = []byte(exitCodeName) + exitCodeTimeStampBkt = []byte(exitCodeTimeStampName) + configKey = []byte(configName) stateKey = []byte(stateName) netNSKey = []byte(netNSName) @@ -362,6 +368,22 @@ func getRuntimeConfigBucket(tx *bolt.Tx) (*bolt.Bucket, error) { return bkt, nil } +func getExitCodeBucket(tx *bolt.Tx) (*bolt.Bucket, error) { + bkt := tx.Bucket(exitCodeBkt) + if bkt == nil { + return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code container bucket not found in DB") + } + return bkt, nil +} + +func getExitCodeTimeStampBucket(tx *bolt.Tx) (*bolt.Bucket, error) { + bkt := tx.Bucket(exitCodeTimeStampBkt) + if bkt == nil { + return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code time stamp bucket not found in DB") + } + return bkt, nil +} + func (s *BoltState) getContainerConfigFromDB(id []byte, config *ContainerConfig, ctrsBkt *bolt.Bucket) error { ctrBkt := ctrsBkt.Bucket(id) if ctrBkt == nil { diff --git a/libpod/container_api.go b/libpod/container_api.go index b064d35281..c14fe95b09 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -2,6 +2,7 @@ package libpod import ( "context" + "fmt" "io" "io/ioutil" "net/http" @@ -490,41 +491,84 @@ func (c *Container) RemoveArtifact(name string) error { // Wait blocks until the container exits and returns its exit code. func (c *Container) Wait(ctx context.Context) (int32, error) { - return c.WaitWithInterval(ctx, DefaultWaitInterval) + return c.WaitForExit(ctx, DefaultWaitInterval) } -// WaitWithInterval blocks until the container to exit and returns its exit -// code. The argument is the interval at which checks the container's status. -func (c *Container) WaitWithInterval(ctx context.Context, waitTimeout time.Duration) (int32, error) { +// WaitForExit blocks until the container exits and returns its exit code. The +// argument is the interval at which checks the container's status. +func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) (int32, error) { if !c.valid { return -1, define.ErrCtrRemoved } - exitFile, err := c.exitFilePath() - if err != nil { - return -1, err - } - chWait := make(chan error, 1) + id := c.ID() + var conmonTimer time.Timer + conmonTimerSet := false - go func() { - <-ctx.Done() - chWait <- define.ErrCanceled - }() + getExitCode := func() (bool, int32, error) { + containerRemoved := false + if !c.batched { + c.lock.Lock() + defer c.lock.Unlock() + } - for { - // ignore errors here (with exception of cancellation), it is only used to avoid waiting - // too long. - _, e := WaitForFile(exitFile, chWait, waitTimeout) - if e == define.ErrCanceled { - return -1, define.ErrCanceled + if err := c.syncContainer(); err != nil { + if !errors.Is(err, define.ErrNoSuchCtr) { + return false, -1, err + } + containerRemoved = true + } + + // If conmon is not alive anymore set a timer to make sure + // we're returning even if conmon has forcefully been killed. + if !conmonTimerSet && !containerRemoved { + conmonAlive, err := c.ociRuntime.CheckConmonRunning(c) + switch { + case errors.Is(err, define.ErrNoSuchCtr): + containerRemoved = true + case err != nil: + return false, -1, err + case !conmonAlive: + timerDuration := time.Second * 20 + conmonTimer = *time.NewTimer(timerDuration) + conmonTimerSet = true + } + } + + if !containerRemoved { + // If conmon is dead for more than $timerDuration or if the + // container has exited properly, try to look up the exit code. + select { + case <-conmonTimer.C: + logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id) + default: + if !c.ensureState(define.ContainerStateExited, define.ContainerStateConfigured) { + return false, -1, nil + } + } } - stopped, code, err := c.isStopped() + exitCode, err := c.runtime.state.GetContainerExitCode(id) + if err != nil { + return true, -1, err + } + + return true, exitCode, nil + } + + for { + hasExited, exitCode, err := getExitCode() + if hasExited { + return exitCode, err + } if err != nil { return -1, err } - if stopped { - return code, nil + select { + case <-ctx.Done(): + return -1, fmt.Errorf("waiting for exit code of container %s canceled", id) + default: + time.Sleep(pollInterval) } } } @@ -551,11 +595,12 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou wantedStates := make(map[define.ContainerStatus]bool, len(conditions)) for _, condition := range conditions { - if condition == define.ContainerStateStopped || condition == define.ContainerStateExited { + switch condition { + case define.ContainerStateExited, define.ContainerStateStopped: waitForExit = true - continue + default: + wantedStates[condition] = true } - wantedStates[condition] = true } trySend := func(code int32, err error) { @@ -572,7 +617,7 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou go func() { defer wg.Done() - code, err := c.WaitWithInterval(ctx, waitTimeout) + code, err := c.WaitForExit(ctx, waitTimeout) trySend(code, err) }() } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index ce48987f61..ae61298f37 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -219,7 +219,7 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { // Write an event for the container's death c.newContainerExitedEvent(c.state.ExitCode) - return nil + return c.runtime.state.AddContainerExitCode(c.ID(), c.state.ExitCode) } func (c *Container) shouldRestart() bool { @@ -784,20 +784,6 @@ func (c *Container) getArtifactPath(name string) string { return filepath.Join(c.config.StaticDir, artifactsDir, name) } -// Used with Wait() to determine if a container has exited -func (c *Container) isStopped() (bool, int32, error) { - if !c.batched { - c.lock.Lock() - defer c.lock.Unlock() - } - err := c.syncContainer() - if err != nil { - return true, -1, err - } - - return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), c.state.ExitCode, nil -} - // save container state to the database func (c *Container) save() error { if err := c.runtime.state.SaveContainer(c); err != nil { @@ -1282,13 +1268,6 @@ func (c *Container) stop(timeout uint) error { } } - // Check if conmon is still alive. - // If it is not, we won't be getting an exit file. - conmonAlive, err := c.ociRuntime.CheckConmonRunning(c) - if err != nil { - return err - } - // Set the container state to "stopping" and unlock the container // before handing it over to conmon to unblock other commands. #8501 // demonstrates nicely that a high stop timeout will block even simple @@ -1341,21 +1320,18 @@ func (c *Container) stop(timeout uint) error { } c.newContainerEvent(events.Stop) - - c.state.PID = 0 - c.state.ConmonPID = 0 c.state.StoppedByUser = true + conmonAlive, err := c.ociRuntime.CheckConmonRunning(c) + if err != nil { + return err + } if !conmonAlive { - // Conmon is dead, so we can't expect an exit code. - c.state.ExitCode = -1 - c.state.FinishedTime = time.Now() - c.state.State = define.ContainerStateStopped - if err := c.save(); err != nil { - logrus.Errorf("Saving container %s status: %v", c.ID(), err) + if err := c.checkExitFile(); err != nil { + return err } - return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID()) + return c.save() } if err := c.save(); err != nil { @@ -1939,6 +1915,18 @@ func (c *Container) cleanup(ctx context.Context) error { } } + // Prune the exit codes of other container during clean up. + // Since Podman is no daemon, we have to clean them up somewhere. + // Cleanup seems like a good place as it's not performance + // critical. + if err := c.runtime.state.PruneContainerExitCodes(); err != nil { + if lastError == nil { + lastError = err + } else { + logrus.Errorf("Pruning container exit codes: %v", err) + } + } + return lastError } diff --git a/libpod/define/errors.go b/libpod/define/errors.go index f5a7c73e5d..9757a85b13 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -24,6 +24,10 @@ var ( // not exist. ErrNoSuchExecSession = errors.New("no such exec session") + // ErrNoSuchExitCode indicates that the requested container exit code + // does not exist. + ErrNoSuchExitCode = errors.New("no such exit code") + // ErrDepExists indicates that the current object has dependencies and // cannot be removed before them. ErrDepExists = errors.New("dependency exists") diff --git a/libpod/events.go b/libpod/events.go index f09d8402af..021b3b53c7 100644 --- a/libpod/events.go +++ b/libpod/events.go @@ -151,6 +151,9 @@ func (r *Runtime) GetEvents(ctx context.Context, filters []string) ([]*events.Ev // GetLastContainerEvent takes a container name or ID and an event status and returns // the last occurrence of the container event func (r *Runtime) GetLastContainerEvent(ctx context.Context, nameOrID string, containerEvent events.Status) (*events.Event, error) { + // FIXME: events should be read in reverse order! + // https://github.com/containers/podman/issues/14579 + // check to make sure the event.Status is valid if _, err := events.StringToStatus(containerEvent.String()); err != nil { return nil, err diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index b076609375..d417626dc8 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -264,11 +264,6 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta // status, but will instead only check for the existence of the conmon exit file // and update state to stopped if it exists. func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error { - exitFile, err := r.ExitFilePath(ctr) - if err != nil { - return err - } - runtimeDir, err := util.GetRuntimeDir() if err != nil { return err @@ -340,22 +335,10 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error { // Only grab exit status if we were not already stopped // If we were, it should already be in the database if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped { - var fi os.FileInfo - chWait := make(chan error) - defer close(chWait) - - _, err := WaitForFile(exitFile, chWait, time.Second*5) - if err == nil { - fi, err = os.Stat(exitFile) + if _, err := ctr.Wait(context.Background()); err != nil { + logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err) } - if err != nil { - ctr.state.ExitCode = -1 - ctr.state.FinishedTime = time.Now() - logrus.Errorf("No exit file for container %s found: %v", ctr.ID(), err) - return nil - } - - return ctr.handleExitFile(exitFile, fi) + return nil } // Handle ContainerStateStopping - keep it unless the container @@ -1166,7 +1149,6 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co }).Debugf("running conmon: %s", r.conmonPath) cmd := exec.Command(r.conmonPath, args...) - cmd.Dir = ctr.bundlePath() cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } diff --git a/libpod/state.go b/libpod/state.go index 4710237693..4fbd3c302a 100644 --- a/libpod/state.go +++ b/libpod/state.go @@ -111,6 +111,15 @@ type State interface { // Return a container config from the database by full ID GetContainerConfig(id string) (*ContainerConfig, error) + // Add the exit code for the specified container to the database. + AddContainerExitCode(id string, exitCode int32) error + + // Return the exit code for the specified container. + GetContainerExitCode(id string) (int32, error) + + // Remove exit codes older than 5 minutes. + PruneContainerExitCodes() error + // Add creates a reference to an exec session in the database. // The container the exec session is attached to will be recorded. // The container state will not be modified. diff --git a/pkg/api/handlers/utils/containers.go b/pkg/api/handlers/utils/containers.go index 8588b49ba2..1795f6ce11 100644 --- a/pkg/api/handlers/utils/containers.go +++ b/pkg/api/handlers/utils/containers.go @@ -191,7 +191,6 @@ func waitDockerCondition(ctx context.Context, containerName string, interval tim var notRunningStates = []define.ContainerStatus{ define.ContainerStateCreated, define.ContainerStateRemoving, - define.ContainerStateStopped, define.ContainerStateExited, define.ContainerStateConfigured, } diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 1934533cf6..281e448f63 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -16,7 +16,6 @@ import ( "github.com/containers/image/v5/manifest" "github.com/containers/podman/v4/libpod" "github.com/containers/podman/v4/libpod/define" - "github.com/containers/podman/v4/libpod/events" "github.com/containers/podman/v4/libpod/logs" "github.com/containers/podman/v4/pkg/checkpoint" "github.com/containers/podman/v4/pkg/domain/entities" @@ -939,6 +938,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri } return reports, errors.Wrapf(err, "unable to start container %s", ctr.ID()) } + exitCode = ic.GetContainerExitCode(ctx, ctr) reports = append(reports, &entities.ContainerStartReport{ Id: ctr.ID(), @@ -1099,25 +1099,11 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int { exitCode, err := ctr.Wait(ctx) - if err == nil { - return int(exitCode) - } - if errors.Cause(err) != define.ErrNoSuchCtr { - logrus.Errorf("Could not retrieve exit code: %v", err) + if err != nil { + logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err) return define.ExecErrorCodeNotFound } - // Make 4 attempt with 0.25s backoff between each for 1 second total - var event *events.Event - for i := 0; i < 4; i++ { - event, err = ic.Libpod.GetLastContainerEvent(ctx, ctr.ID(), events.Exited) - if err != nil { - time.Sleep(250 * time.Millisecond) - continue - } - return event.ContainerExitCode - } - logrus.Errorf("Could not retrieve exit code from event: %v", err) - return define.ExecErrorCodeNotFound + return int(exitCode) } func (ic *ContainerEngine) ContainerLogs(ctx context.Context, containers []string, options entities.ContainerLogsOptions) error {