Skip to content

Commit

Permalink
fix race conditions in start/attach logic
Browse files Browse the repository at this point in the history
The current code did something like this:
lock()
getState()
unlock()

if state != running
  lock()
  getState() == running -> error
  unlock()

This of course is wrong because between the first unlock() and second
lock() call another process could have modified the state. This meant
that sometimes you would get a weird error on start because the internal
setup errored as the container was already running.

In general any state check without holding the lock is incorrect and
will result in race conditions. As such refactor the code to combine
both StartAndAttach and Attach() into one function that can handle both.
With that we can move the running check into the locked code.

Also use typed error for this specific error case then the callers can
check and ignore the specific error when needed. This also allows us to
fix races in the compat API that did a similar racy state check.

This commit changes slightly how we output the result, previously a
start on already running container would never print the id/name of the
container which is confusing and sort of breaks idempotence. Now it will
include the output except when --all is used. Then it only reports the
ids that were actually started.

Fixes containers#23246

Signed-off-by: Paul Holzinger <[email protected]>
  • Loading branch information
Luap99 committed Jul 12, 2024
1 parent 360c4f3 commit 3280da0
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 123 deletions.
97 changes: 32 additions & 65 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"github.com/containers/common/pkg/resize"
"github.com/containers/podman/v5/libpod/define"
"github.com/containers/podman/v5/libpod/events"
"github.com/containers/podman/v5/pkg/signal"
"github.com/containers/storage/pkg/archive"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -142,13 +141,12 @@ func (c *Container) Update(resources *spec.LinuxResources, restartPolicy *string
return c.update(resources, restartPolicy, restartRetries)
}

// StartAndAttach starts a container and attaches to it.
// This acts as a combination of the Start and Attach APIs, ensuring proper
// Attach to a container.
// The last parameter "start" can be used to also start the container.
// This will then Start and Attach APIs, ensuring proper
// ordering of the two such that no output from the container is lost (e.g. the
// Attach call occurs before Start).
// In overall functionality, it is identical to the Start call, with the added
// side effect that an attach session will also be started.
func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, recursive bool) (retChan <-chan error, finalErr error) {
func (c *Container) Attach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, start bool) (retChan <-chan error, finalErr error) {
defer func() {
if finalErr != nil {
// Have to re-lock.
Expand All @@ -175,9 +173,27 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
}
}

if err := c.prepareToStart(ctx, recursive); err != nil {
return nil, err
if c.state.State != define.ContainerStateRunning {
if !start {
return nil, errors.New("you can only attach to running containers")
}

if err := c.prepareToStart(ctx, true); err != nil {
return nil, err
}
}

if !start {
// A bit awkward, technically passthrough never supports attach. We only pretend
// it does as we leak the stdio fds down into the container but that of course only
// works if we are the process that started conmon with the right fds.
if c.LogDriver() == define.PassthroughLogging {
return nil, fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
} else if c.LogDriver() == define.PassthroughTTYLogging {
return nil, fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
}
}

attachChan := make(chan error)

// We need to ensure that we don't return until start() fired in attach.
Expand All @@ -194,7 +210,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
opts := new(AttachOptions)
opts.Streams = streams
opts.DetachKeys = &keys
opts.Start = true
opts.Start = start
opts.Started = startedChan

// attach and start the container on a different thread. waitForHealthy must
Expand All @@ -213,7 +229,13 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
c.newContainerEvent(events.Attach)
}

return attachChan, c.waitForHealthy(ctx)
if start {
if err := c.waitForHealthy(ctx); err != nil {
return nil, err
}
}

return attachChan, nil
}

// RestartWithTimeout restarts a running container and takes a given timeout in uint
Expand Down Expand Up @@ -315,61 +337,6 @@ func (c *Container) Kill(signal uint) error {
return c.save()
}

// Attach attaches to a container.
// This function returns when the attach finishes. It does not hold the lock for
// the duration of its runtime, only using it at the beginning to verify state.
func (c *Container) Attach(streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize) error {
if c.LogDriver() == define.PassthroughLogging {
return fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
}
if c.LogDriver() == define.PassthroughTTYLogging {
return fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
}
if !c.batched {
c.lock.Lock()
if err := c.syncContainer(); err != nil {
c.lock.Unlock()
return err
}
// We are NOT holding the lock for the duration of the function.
c.lock.Unlock()
}

if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) {
return fmt.Errorf("can only attach to created or running containers: %w", define.ErrCtrStateInvalid)
}

// HACK: This is really gross, but there isn't a better way without
// splitting attach into separate versions for StartAndAttach and normal
// attaching, and I really do not want to do that right now.
// Send a SIGWINCH after attach succeeds so that most programs will
// redraw the screen for the new attach session.
attachRdy := make(chan bool, 1)
if c.Terminal() {
go func() {
<-attachRdy
c.lock.Lock()
defer c.lock.Unlock()
if err := c.ociRuntime.KillContainer(c, uint(signal.SIGWINCH), false); err != nil {
logrus.Warnf("Unable to send SIGWINCH to container %s after attach: %v", c.ID(), err)
}
}()
}

// Start resizing
if c.LogDriver() != define.PassthroughLogging && c.LogDriver() != define.PassthroughTTYLogging {
registerResizeFunc(resize, c.bundlePath())
}

opts := new(AttachOptions)
opts.Streams = streams
opts.DetachKeys = &keys
opts.AttachReady = attachRdy

c.newContainerEvent(events.Attach)
return c.ociRuntime.Attach(c, opts)
}

// HTTPAttach forwards an attach session over a hijacked HTTP session.
// HTTPAttach will consume and close the included httpCon, which is expected to
// be sourced from a hijacked HTTP connection.
Expand Down
5 changes: 5 additions & 0 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,11 @@ func (c *Container) save() error {
func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) {
// Container must be created or stopped to be started
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
// Special case: Let the caller know that is is already running,
// the caller can then decide to ignore/handle the error the way it needs.
if c.state.State == define.ContainerStateRunning {
return fmt.Errorf("container %s: %w", c.ID(), define.ErrCtrStateRunning)
}
return fmt.Errorf("container %s must be in Created or Stopped state to be started: %w", c.ID(), define.ErrCtrStateInvalid)
}

Expand Down
2 changes: 2 additions & 0 deletions libpod/define/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ var (
// ErrCtrStateInvalid indicates a container is in an improper state for
// the requested operation
ErrCtrStateInvalid = errors.New("container state improper")
// ErrCtrStateRunning indicates a container is running.
ErrCtrStateRunning = errors.New("container is running")
// ErrExecSessionStateInvalid indicates that an exec session is in an
// improper state for the requested operation
ErrExecSessionStateInvalid = errors.New("exec session state improper")
Expand Down
1 change: 1 addition & 0 deletions libpod/oci_conmon_attach_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
}
params.Started <- true
}
close(params.Started)

if passthrough {
return nil
Expand Down
14 changes: 5 additions & 9 deletions pkg/api/handlers/compat/containers_start.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package compat

import (
"errors"
"net/http"

api "github.com/containers/podman/v5/pkg/api/types"
Expand Down Expand Up @@ -33,16 +34,11 @@ func StartContainer(w http.ResponseWriter, r *http.Request) {
utils.ContainerNotFound(w, name, err)
return
}
state, err := con.State()
if err != nil {
utils.InternalServerError(w, err)
return
}
if state == define.ContainerStateRunning {
utils.WriteResponse(w, http.StatusNotModified, nil)
return
}
if err := con.Start(r.Context(), true); err != nil {
if errors.Is(err, define.ErrCtrStateRunning) {
utils.WriteResponse(w, http.StatusNotModified, nil)
return
}
utils.InternalServerError(w, err)
return
}
Expand Down
75 changes: 31 additions & 44 deletions pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -795,13 +795,6 @@ func (ic *ContainerEngine) ContainerAttach(ctx context.Context, nameOrID string,
}

ctr := containers[0]
conState, err := ctr.State()
if err != nil {
return fmt.Errorf("unable to determine state of %s: %w", ctr.ID(), err)
}
if conState != define.ContainerStateRunning {
return fmt.Errorf("you can only attach to running containers")
}

// If the container is in a pod, also set to recursively start dependencies
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, false)
Expand Down Expand Up @@ -939,14 +932,9 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
// There can only be one container if attach was used
for i := range containers {
ctr := containers[i]
ctrState, err := ctr.State()
if err != nil {
return nil, err
}
ctrRunning := ctrState == define.ContainerStateRunning

if options.Attach {
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, !ctrRunning)
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, true)
if errors.Is(err, define.ErrDetach) {
// User manually detached
// Exit cleanly immediately
Expand All @@ -970,16 +958,6 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, fmt.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
}

if ctrRunning {
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
Err: nil,
ExitCode: 0,
})
return reports, err
}

if err != nil {
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
Expand Down Expand Up @@ -1008,34 +986,43 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, nil
} // end attach

// Start the container if it's not running already.
if !ctrRunning {
// Handle non-attach start
// If the container is in a pod, also set to recursively start dependencies
report := &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
ExitCode: 125,
}
if err := ctr.Start(ctx, true); err != nil {
report.Err = err
if errors.Is(err, define.ErrWillDeadlock) {
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
// Handle non-attach start
// If the container is in a pod, also set to recursively start dependencies
report := &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
ExitCode: 125,
}
if err := ctr.Start(ctx, true); err != nil {
// Already running is no error for the start command as it is idempotent.
if errors.Is(err, define.ErrCtrStateRunning) {
// If all is set we only want to output the actual started containers
// so do not include the entry in the result.
if !options.All {
report.ExitCode = 0
reports = append(reports, report)
continue
}
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
continue
}

report.Err = err
if errors.Is(err, define.ErrWillDeadlock) {
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
reports = append(reports, report)
if ctr.AutoRemove() {
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
}
}
continue
}
report.ExitCode = 0
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
if ctr.AutoRemove() {
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
}
}
reports = append(reports, report)
continue
}
// no error set exit code to 0
report.ExitCode = 0
reports = append(reports, report)
}
return reports, nil
}
Expand Down
6 changes: 1 addition & 5 deletions pkg/domain/infra/abi/terminal/terminal_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,7 @@ func StartAttachCtr(ctx context.Context, ctr *libpod.Container, stdout, stderr,
ProxySignals(ctr)
}

if !startContainer {
return ctr.Attach(streams, detachKeys, resize)
}

attachChan, err := ctr.StartAndAttach(ctx, streams, detachKeys, resize, true)
attachChan, err := ctr.Attach(ctx, streams, detachKeys, resize, startContainer)
if err != nil {
return err
}
Expand Down

0 comments on commit 3280da0

Please sign in to comment.