Skip to content

Commit

Permalink
libpod: fix wait and exit-code logic
Browse files Browse the repository at this point in the history
This commit addresses three intertwined bugs to fix an issue when using
Gitlab runner on Podman.  The three bug fixes are not split into
separate commits as tests won't pass otherwise; avoidable noise when
bisecting future issues.

1) Podman conflated states: even when asking to wait for the `exited`
   state, Podman returned as soon as a container transitioned to
   `stopped`.  The issues surfaced in Gitlab tests to fail [1] as
   `conmon`'s buffers have not (yet) been emptied when attaching to a
   container right after a wait.  The race window was extremely narrow,
   and I only managed to reproduce with the Gitlab runner [1] unit
   tests.

2) The clearer separation between `exited` and `stopped` revealed a race
   condition predating the changes.  If a container is configured for
   autoremoval (e.g., via `run --rm`), the "run" process competes with
   the "cleanup" process running in the background.  The window of the
   race condition was sufficiently large that the "cleanup" process has
   already removed the container and storage before the "run" process
   could read the exit code and hence waited indefinitely.

   Address the exit-code race condition by recording exit codes in the
   main libpod database.  Exit codes can now be read from a database.
   When waiting for a container to exit, Podman first waits for the
   container to transition to `exited` and will then query the database
   for its exit code. Outdated exit codes are pruned during cleanup
   (i.e., non-performance critical) and when refreshing the database
   after a reboot.  An exit code is considered outdated when it is older
   than 5 minutes.

   While the race condition predates this change, the waiting process
   has apparently always been fast enough in catching the exit code due
   to issue 1): `exited` and `stopped` were conflated.  The waiting
   process hence caught the exit code after the container transitioned
   to `stopped` but before it `exited` and got removed.

3) With 1) and 2), Podman is now waiting for a container to properly
   transition to the `exited` state.  Some tests did not pass after 1)
   and 2) which revealed the third bug: `conmon` was executed with its
   working directory pointing to the OCI runtime bundle of the
   container.  The changed working directory broke resolving relative
   paths in the "cleanup" process.  The "cleanup" process error'ed
   before actually cleaning up the container and waiting "main" process
   ran indefinitely - or until hitting a timeout.  Fix the issue by
   executing `conmon` with the same working directory as Podman.

Note that fixing 3) *may* address a number of issues we have seen in the
past where for *some* reason cleanup processes did not fire.

[1] https://gitlab.com/gitlab-org/gitlab-runner/-/issues/27119#note_970712864

Signed-off-by: Valentin Rothberg <[email protected]>

[MH: Minor reword of commit message]

Signed-off-by: Matthew Heon <[email protected]>
  • Loading branch information
vrothberg authored and mheon committed Jun 23, 2022
1 parent 15188dc commit 30e7cbc
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 98 deletions.
175 changes: 175 additions & 0 deletions libpod/boltdb_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import (
"fmt"
"net"
"os"
"strconv"
"strings"
"sync"
"time"

"github.com/containers/common/libnetwork/types"
"github.com/containers/podman/v4/libpod/define"
Expand Down Expand Up @@ -63,6 +65,13 @@ type BoltState struct {
// initially created the database. This must match for any further instances
// that access the database, to ensure that state mismatches with
// containers/storage do not occur.
// - exitCodeBucket/exitCodeTimeStampBucket: (#14559) exit codes must be part
// of the database to resolve a previous race condition when one process waits
// for the exit file to be written and another process removes it along with
// the container during auto-removal. The same race would happen trying to
// read the exit code from the containers bucket. Hence, exit codes go into
// their own bucket. To avoid the rather expensive JSON (un)marshaling, we
// have two buckets: one for the exit codes, the other for the timestamps.

// NewBoltState creates a new bolt-backed state database
func NewBoltState(path string, runtime *Runtime) (State, error) {
Expand Down Expand Up @@ -98,6 +107,8 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
allVolsBkt,
execBkt,
runtimeConfigBkt,
exitCodeBkt,
exitCodeTimeStampBkt,
}

// Does the DB need an update?
Expand Down Expand Up @@ -150,6 +161,10 @@ func (s *BoltState) Refresh() error {
return define.ErrDBClosed
}

if err := s.PruneContainerExitCodes(); err != nil {
return err
}

db, err := s.getDBCon()
if err != nil {
return err
Expand Down Expand Up @@ -1341,6 +1356,166 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) {
return config, nil
}

// AddContainerExitCode adds the exit code for the specified container to the database.
func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {
db, err := s.getDBCon()
if err != nil {
return err
}
defer s.deferredCloseDBCon(db)

rawID := []byte(id)
rawExitCode := []byte(strconv.Itoa(int(exitCode)))
rawTimeStamp, err := time.Now().MarshalText()
if err != nil {
return fmt.Errorf("marshaling exit-code time stamp: %w", err)
}

return db.Update(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

if err := exitCodeBucket.Put(rawID, rawExitCode); err != nil {
return fmt.Errorf("adding exit code of container %s to DB: %w", id, err)
}
if err := timeStampBucket.Put(rawID, rawTimeStamp); err != nil {
if rmErr := exitCodeBucket.Delete(rawID); rmErr != nil {
logrus.Errorf("Removing exit code of container %s from DB: %v", id, rmErr)
}
return fmt.Errorf("adding exit-code time stamp of container %s to DB: %w", id, err)
}

return nil
})
}

// GetContainerExitCode returns the exit code for the specified container.
func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
db, err := s.getDBCon()
if err != nil {
return -1, err
}
defer s.deferredCloseDBCon(db)

rawID := []byte(id)
result := int32(-1)
return result, db.View(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}

rawExitCode := exitCodeBucket.Get(rawID)
if rawExitCode == nil {
return fmt.Errorf("getting exit code of container %s from DB: %w", id, define.ErrNoSuchExitCode)
}

exitCode, err := strconv.Atoi(string(rawExitCode))
if err != nil {
return fmt.Errorf("converting raw exit code %v of container %s: %w", rawExitCode, id, err)
}

result = int32(exitCode)
return nil
})
}

// GetContainerExitCodeTimeStamp returns the time stamp when the exit code of
// the specified container was added to the database.
func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) {
db, err := s.getDBCon()
if err != nil {
return nil, err
}
defer s.deferredCloseDBCon(db)

rawID := []byte(id)
var result time.Time
return &result, db.View(func(tx *bolt.Tx) error {
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

rawTimeStamp := timeStampBucket.Get(rawID)
if rawTimeStamp == nil {
return fmt.Errorf("getting exit-code time stamp of container %s from DB: %w", id, define.ErrNoSuchExitCode)
}

if err := result.UnmarshalText(rawTimeStamp); err != nil {
return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, id, err)
}

return nil
})
}

// PrunExitCodes removes exit codes older than 5 minutes.
func (s *BoltState) PruneContainerExitCodes() error {
db, err := s.getDBCon()
if err != nil {
return err
}
defer s.deferredCloseDBCon(db)

threshold := time.Minute * 5
return db.View(func(tx *bolt.Tx) error {
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

return timeStampBucket.ForEach(func(rawID, rawTimeStamp []byte) error {
var timeStamp time.Time
if err := timeStamp.UnmarshalText(rawTimeStamp); err != nil {
return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err)
}
if time.Since(timeStamp) > threshold {
// Since the DB connection is locked, pass it
// to remove the exit codes to avoid race
// conditions.
return s.removeContainerExitCode(rawID, db)
}
return nil
})
})
}

// removeContainerExitCode removes the exit code and time stamp of the specified container.
func (s *BoltState) removeContainerExitCode(rawID []byte, db *bolt.DB) error {
return db.Update(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

var finalErr error
if err := exitCodeBucket.Delete(rawID); err != nil {
finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", string(rawID), err)
}
if err := timeStampBucket.Delete(rawID); err != nil {
err = fmt.Errorf("removing exit-code time stamp of container %s from DB: %w", string(rawID), err)
if finalErr != nil {
logrus.Error(err)
} else {
finalErr = err
}
}

return finalErr
})
}

// AddExecSession adds an exec session to the state.
func (s *BoltState) AddExecSession(ctr *Container, session *ExecSession) error {
if !s.valid {
Expand Down
22 changes: 22 additions & 0 deletions libpod/boltdb_state_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ const (
aliasesName = "aliases"
runtimeConfigName = "runtime-config"

exitCodeName = "exit-code"
exitCodeTimeStampName = "exit-code-time-stamp"

configName = "config"
stateName = "state"
dependenciesName = "dependencies"
Expand Down Expand Up @@ -65,6 +68,9 @@ var (
volDependenciesBkt = []byte(volCtrDependencies)
networksBkt = []byte(networksName)

exitCodeBkt = []byte(exitCodeName)
exitCodeTimeStampBkt = []byte(exitCodeTimeStampName)

configKey = []byte(configName)
stateKey = []byte(stateName)
netNSKey = []byte(netNSName)
Expand Down Expand Up @@ -362,6 +368,22 @@ func getRuntimeConfigBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
return bkt, nil
}

func getExitCodeBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
bkt := tx.Bucket(exitCodeBkt)
if bkt == nil {
return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code container bucket not found in DB")
}
return bkt, nil
}

func getExitCodeTimeStampBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
bkt := tx.Bucket(exitCodeTimeStampBkt)
if bkt == nil {
return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code time stamp bucket not found in DB")
}
return bkt, nil
}

func (s *BoltState) getContainerConfigFromDB(id []byte, config *ContainerConfig, ctrsBkt *bolt.Bucket) error {
ctrBkt := ctrsBkt.Bucket(id)
if ctrBkt == nil {
Expand Down
97 changes: 71 additions & 26 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package libpod

import (
"context"
"fmt"
"io"
"io/ioutil"
"net/http"
Expand Down Expand Up @@ -490,41 +491,84 @@ func (c *Container) RemoveArtifact(name string) error {

// Wait blocks until the container exits and returns its exit code.
func (c *Container) Wait(ctx context.Context) (int32, error) {
return c.WaitWithInterval(ctx, DefaultWaitInterval)
return c.WaitForExit(ctx, DefaultWaitInterval)
}

// WaitWithInterval blocks until the container to exit and returns its exit
// code. The argument is the interval at which checks the container's status.
func (c *Container) WaitWithInterval(ctx context.Context, waitTimeout time.Duration) (int32, error) {
// WaitForExit blocks until the container exits and returns its exit code. The
// argument is the interval at which checks the container's status.
func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) (int32, error) {
if !c.valid {
return -1, define.ErrCtrRemoved
}

exitFile, err := c.exitFilePath()
if err != nil {
return -1, err
}
chWait := make(chan error, 1)
id := c.ID()
var conmonTimer time.Timer
conmonTimerSet := false

go func() {
<-ctx.Done()
chWait <- define.ErrCanceled
}()
getExitCode := func() (bool, int32, error) {
containerRemoved := false
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
}

for {
// ignore errors here (with exception of cancellation), it is only used to avoid waiting
// too long.
_, e := WaitForFile(exitFile, chWait, waitTimeout)
if e == define.ErrCanceled {
return -1, define.ErrCanceled
if err := c.syncContainer(); err != nil {
if !errors.Is(err, define.ErrNoSuchCtr) {
return false, -1, err
}
containerRemoved = true
}

// If conmon is not alive anymore set a timer to make sure
// we're returning even if conmon has forcefully been killed.
if !conmonTimerSet && !containerRemoved {
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
switch {
case errors.Is(err, define.ErrNoSuchCtr):
containerRemoved = true
case err != nil:
return false, -1, err
case !conmonAlive:
timerDuration := time.Second * 20
conmonTimer = *time.NewTimer(timerDuration)
conmonTimerSet = true
}
}

if !containerRemoved {
// If conmon is dead for more than $timerDuration or if the
// container has exited properly, try to look up the exit code.
select {
case <-conmonTimer.C:
logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
default:
if !c.ensureState(define.ContainerStateExited, define.ContainerStateConfigured) {
return false, -1, nil
}
}
}

stopped, code, err := c.isStopped()
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err != nil {
return true, -1, err
}

return true, exitCode, nil
}

for {
hasExited, exitCode, err := getExitCode()
if hasExited {
return exitCode, err
}
if err != nil {
return -1, err
}
if stopped {
return code, nil
select {
case <-ctx.Done():
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
default:
time.Sleep(pollInterval)
}
}
}
Expand All @@ -551,11 +595,12 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
wantedStates := make(map[define.ContainerStatus]bool, len(conditions))

for _, condition := range conditions {
if condition == define.ContainerStateStopped || condition == define.ContainerStateExited {
switch condition {
case define.ContainerStateExited, define.ContainerStateStopped:
waitForExit = true
continue
default:
wantedStates[condition] = true
}
wantedStates[condition] = true
}

trySend := func(code int32, err error) {
Expand All @@ -572,7 +617,7 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
go func() {
defer wg.Done()

code, err := c.WaitWithInterval(ctx, waitTimeout)
code, err := c.WaitForExit(ctx, waitTimeout)
trySend(code, err)
}()
}
Expand Down
Loading

0 comments on commit 30e7cbc

Please sign in to comment.