Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Followon to #14559 #14685

Merged
merged 3 commits into from
Jun 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 248 additions & 0 deletions libpod/boltdb_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import (
"fmt"
"net"
"os"
"strconv"
"strings"
"sync"
"time"

"github.com/containers/common/libnetwork/types"
"github.com/containers/podman/v4/libpod/define"
Expand Down Expand Up @@ -63,6 +65,13 @@ type BoltState struct {
// initially created the database. This must match for any further instances
// that access the database, to ensure that state mismatches with
// containers/storage do not occur.
// - exitCodeBucket/exitCodeTimeStampBucket: (#14559) exit codes must be part
// of the database to resolve a previous race condition when one process waits
// for the exit file to be written and another process removes it along with
// the container during auto-removal. The same race would happen trying to
// read the exit code from the containers bucket. Hence, exit codes go into
// their own bucket. To avoid the rather expensive JSON (un)marshaling, we
// have two buckets: one for the exit codes, the other for the timestamps.

// NewBoltState creates a new bolt-backed state database
func NewBoltState(path string, runtime *Runtime) (State, error) {
Expand Down Expand Up @@ -98,6 +107,8 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
allVolsBkt,
execBkt,
runtimeConfigBkt,
exitCodeBkt,
exitCodeTimeStampBkt,
}

// Does the DB need an update?
Expand Down Expand Up @@ -192,6 +203,45 @@ func (s *BoltState) Refresh() error {
return err
}

exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}

timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

// Clear all exec exit codes
toRemoveExitCodes := []string{}
err = exitCodeBucket.ForEach(func(id, _ []byte) error {
toRemoveExitCodes = append(toRemoveExitCodes, string(id))
return nil
})
if err != nil {
return errors.Wrapf(err, "error reading exit codes bucket")
}
for _, id := range toRemoveExitCodes {
if err := exitCodeBucket.Delete([]byte(id)); err != nil {
return errors.Wrapf(err, "error removing exit code for ID %s", id)
}
}

toRemoveTimeStamps := []string{}
err = timeStampBucket.ForEach(func(id, _ []byte) error {
toRemoveTimeStamps = append(toRemoveTimeStamps, string(id))
return nil
})
if err != nil {
return errors.Wrapf(err, "reading timestamps bucket")
}
for _, id := range toRemoveTimeStamps {
if err := timeStampBucket.Delete([]byte(id)); err != nil {
return errors.Wrapf(err, "removing timestamp for ID %s", id)
}
}

// Iterate through all IDs. Check if they are containers.
// If they are, unmarshal their state, and then clear
// PID, mountpoint, and state for all of them
Expand Down Expand Up @@ -1341,6 +1391,204 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) {
return config, nil
}

// AddContainerExitCode adds the exit code for the specified container to the database.
func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {
if len(id) == 0 {
return define.ErrEmptyID
}

if !s.valid {
return define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return err
}
defer s.deferredCloseDBCon(db)

rawID := []byte(id)
rawExitCode := []byte(strconv.Itoa(int(exitCode)))
rawTimeStamp, err := time.Now().MarshalText()
if err != nil {
return fmt.Errorf("marshaling exit-code time stamp: %w", err)
}

return db.Update(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

if err := exitCodeBucket.Put(rawID, rawExitCode); err != nil {
return fmt.Errorf("adding exit code of container %s to DB: %w", id, err)
}
if err := timeStampBucket.Put(rawID, rawTimeStamp); err != nil {
if rmErr := exitCodeBucket.Delete(rawID); rmErr != nil {
logrus.Errorf("Removing exit code of container %s from DB: %v", id, rmErr)
}
return fmt.Errorf("adding exit-code time stamp of container %s to DB: %w", id, err)
}

return nil
})
}

// GetContainerExitCode returns the exit code for the specified container.
func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
if len(id) == 0 {
return -1, define.ErrEmptyID
}

if !s.valid {
return -1, define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return -1, err
}
defer s.deferredCloseDBCon(db)

rawID := []byte(id)
result := int32(-1)
return result, db.View(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}

rawExitCode := exitCodeBucket.Get(rawID)
if rawExitCode == nil {
return fmt.Errorf("getting exit code of container %s from DB: %w", id, define.ErrNoSuchExitCode)
}

exitCode, err := strconv.Atoi(string(rawExitCode))
if err != nil {
return fmt.Errorf("converting raw exit code %v of container %s: %w", rawExitCode, id, err)
}

result = int32(exitCode)
return nil
})
}

// GetContainerExitCodeTimeStamp returns the time stamp when the exit code of
// the specified container was added to the database.
func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) {
if len(id) == 0 {
return nil, define.ErrEmptyID
}

if !s.valid {
return nil, define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return nil, err
}
defer s.deferredCloseDBCon(db)

rawID := []byte(id)
var result time.Time
return &result, db.View(func(tx *bolt.Tx) error {
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

rawTimeStamp := timeStampBucket.Get(rawID)
if rawTimeStamp == nil {
return fmt.Errorf("getting exit-code time stamp of container %s from DB: %w", id, define.ErrNoSuchExitCode)
}

if err := result.UnmarshalText(rawTimeStamp); err != nil {
return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, id, err)
}

return nil
})
}

// PruneExitCodes removes exit codes older than 5 minutes.
func (s *BoltState) PruneContainerExitCodes() error {
if !s.valid {
return define.ErrDBClosed
}

db, err := s.getDBCon()
if err != nil {
return err
}
defer s.deferredCloseDBCon(db)

toRemoveIDs := []string{}

threshold := time.Minute * 5
err = db.View(func(tx *bolt.Tx) error {
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

return timeStampBucket.ForEach(func(rawID, rawTimeStamp []byte) error {
var timeStamp time.Time
if err := timeStamp.UnmarshalText(rawTimeStamp); err != nil {
return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err)
}
if time.Since(timeStamp) > threshold {
toRemoveIDs = append(toRemoveIDs, string(rawID))
}
return nil
})
})
if err != nil {
return errors.Wrapf(err, "reading exit codes to prune")
}

if len(toRemoveIDs) > 0 {
err = db.Update(func(tx *bolt.Tx) error {
exitCodeBucket, err := getExitCodeBucket(tx)
if err != nil {
return err
}
timeStampBucket, err := getExitCodeTimeStampBucket(tx)
if err != nil {
return err
}

var finalErr error
for _, id := range toRemoveIDs {
rawID := []byte(id)
if err := exitCodeBucket.Delete(rawID); err != nil {
if finalErr != nil {
logrus.Error(finalErr)
}
finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", id, err)
}
if err := timeStampBucket.Delete(rawID); err != nil {
if finalErr != nil {
logrus.Error(finalErr)
}
vrothberg marked this conversation as resolved.
Show resolved Hide resolved
finalErr = fmt.Errorf("removing exit code timestamp of container %s from DB: %w", id, err)
}
}

return finalErr
})
if err != nil {
return errors.Wrapf(err, "pruning exit codes")
}
}

return nil
}

// AddExecSession adds an exec session to the state.
func (s *BoltState) AddExecSession(ctr *Container, session *ExecSession) error {
if !s.valid {
Expand Down
22 changes: 22 additions & 0 deletions libpod/boltdb_state_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ const (
aliasesName = "aliases"
runtimeConfigName = "runtime-config"

exitCodeName = "exit-code"
exitCodeTimeStampName = "exit-code-time-stamp"

configName = "config"
stateName = "state"
dependenciesName = "dependencies"
Expand Down Expand Up @@ -65,6 +68,9 @@ var (
volDependenciesBkt = []byte(volCtrDependencies)
networksBkt = []byte(networksName)

exitCodeBkt = []byte(exitCodeName)
exitCodeTimeStampBkt = []byte(exitCodeTimeStampName)

configKey = []byte(configName)
stateKey = []byte(stateName)
netNSKey = []byte(netNSName)
Expand Down Expand Up @@ -362,6 +368,22 @@ func getRuntimeConfigBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
return bkt, nil
}

func getExitCodeBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
bkt := tx.Bucket(exitCodeBkt)
if bkt == nil {
return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code container bucket not found in DB")
}
return bkt, nil
}

func getExitCodeTimeStampBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
bkt := tx.Bucket(exitCodeTimeStampBkt)
if bkt == nil {
return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code time stamp bucket not found in DB")
}
return bkt, nil
}

func (s *BoltState) getContainerConfigFromDB(id []byte, config *ContainerConfig, ctrsBkt *bolt.Bucket) error {
ctrBkt := ctrsBkt.Bucket(id)
if ctrBkt == nil {
Expand Down
Loading