Skip to content

Commit

Permalink
Two fixes for DB exit code handling
Browse files Browse the repository at this point in the history
Firstly: don't prune exit codes after a refresh - instead, clear
the table entirely. We are guaranteed that all containers are
gone after a refresh, we should not worry about exit codes given
this.

Secondly: alter the way pruning was done. We were updating the DB
by calling Update from within an existing View, and stacking an
RW transaction on top of an existing RO one seems dodgy; further,
modifying a bucket while iterating over it with ForEach is
undefined behavior.

Hopefully this will resolve our CI issues.

Signed-off-by: Matthew Heon <[email protected]>
  • Loading branch information
mheon committed Jun 23, 2022
1 parent 30e7cbc commit 3a810b8
Showing 1 changed file with 107 additions and 34 deletions.
141 changes: 107 additions & 34 deletions libpod/boltdb_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,6 @@ func (s *BoltState) Refresh() error {
return define.ErrDBClosed
}

if err := s.PruneContainerExitCodes(); err != nil {
return err
}

db, err := s.getDBCon()
if err != nil {
return err
Expand Down Expand Up @@ -207,6 +203,45 @@ func (s *BoltState) Refresh() error {
return err
}

exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}

timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

// Clear all exec exit codes
toRemoveExitCodes := []string{}
err = exitCodeBucket.ForEach(func(id, _ []byte) error {
toRemoveExitCodes = append(toRemoveExitCodes, string(id))
return nil
})
if err != nil {
return errors.Wrapf(err, "error reading exit codes bucket")
}
for _, id := range toRemoveExitCodes {
if err := exitCodeBucket.Delete([]byte(id)); err != nil {
return errors.Wrapf(err, "error removing exit code for ID %s", id)
}
}

toRemoveTimeStamps := []string{}
err = timeStampBucket.ForEach(func(id, _ []byte) error {
toRemoveTimeStamps = append(toRemoveTimeStamps, string(id))
return nil
})
if err != nil {
return errors.Wrapf(err, "reading timestamps bucket")
}
for _, id := range toRemoveTimeStamps {
if err := timeStampBucket.Delete([]byte(id)); err != nil {
return errors.Wrapf(err, "removing timestamp for ID %s", id)
}
}

// Iterate through all IDs. Check if they are containers.
// If they are, unmarshal their state, and then clear
// PID, mountpoint, and state for all of them
Expand Down Expand Up @@ -1358,6 +1393,14 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) {

// AddContainerExitCode adds the exit code for the specified container to the database.
func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {
if len(id) == 0 {
return define.ErrEmptyID
}

if !s.valid {
return define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return err
Expand Down Expand Up @@ -1397,6 +1440,14 @@ func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {

// GetContainerExitCode returns the exit code for the specified container.
func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
if len(id) == 0 {
return -1, define.ErrEmptyID
}

if !s.valid {
return -1, define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return -1, err
Expand Down Expand Up @@ -1429,6 +1480,14 @@ func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
// GetContainerExitCodeTimeStamp returns the time stamp when the exit code of
// the specified container was added to the database.
func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) {
if len(id) == 0 {
return nil, define.ErrEmptyID
}

if !s.valid {
return nil, define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return nil, err
Expand Down Expand Up @@ -1456,16 +1515,22 @@ func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error)
})
}

// PrunExitCodes removes exit codes older than 5 minutes.
// PruneExitCodes removes exit codes older than 5 minutes.
func (s *BoltState) PruneContainerExitCodes() error {
if !s.valid {
return define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return err
}
defer s.deferredCloseDBCon(db)

toRemoveIDs := []string{}

threshold := time.Minute * 5
return db.View(func(tx *bolt.Tx) error {
err = db.View(func(tx *bolt.Tx) error {
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
Expand All @@ -1477,43 +1542,51 @@ func (s *BoltState) PruneContainerExitCodes() error {
return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err)
}
if time.Since(timeStamp) > threshold {
// Since the DB connection is locked, pass it
// to remove the exit codes to avoid race
// conditions.
return s.removeContainerExitCode(rawID, db)
toRemoveIDs = append(toRemoveIDs, string(rawID))
}
return nil
})
})
}
if err != nil {
return errors.Wrapf(err, "reading exit codes to prune")
}

// removeContainerExitCode removes the exit code and time stamp of the specified container.
func (s *BoltState) removeContainerExitCode(rawID []byte, db *bolt.DB) error {
return db.Update(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}
if len(toRemoveIDs) > 0 {
err = db.Update(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

var finalErr error
if err := exitCodeBucket.Delete(rawID); err != nil {
finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", string(rawID), err)
}
if err := timeStampBucket.Delete(rawID); err != nil {
err = fmt.Errorf("removing exit-code time stamp of container %s from DB: %w", string(rawID), err)
if finalErr != nil {
logrus.Error(err)
} else {
finalErr = err
var finalErr error
for _, id := range toRemoveIDs {
rawID := []byte(id)
if err := exitCodeBucket.Delete(rawID); err != nil {
if finalErr != nil {
logrus.Error(finalErr)
}
finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", id, err)
}
if err := timeStampBucket.Delete(rawID); err != nil {
if finalErr != nil {
logrus.Error(finalErr)
}
finalErr = fmt.Errorf("removing exit code timestamp of container %s from DB: %w", id, err)
}
}

return finalErr
})
if err != nil {
return errors.Wrapf(err, "pruning exit codes")
}
}

return finalErr
})
return nil
}

// AddExecSession adds an exec session to the state.
Expand Down

0 comments on commit 3a810b8

Please sign in to comment.