Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow containers to --restart on-failure with --rm #8263

Merged
merged 1 commit into from
Nov 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/podman/common/createparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
// by validate must not need any state information on the flag (i.e. changed)
func (c *ContainerCLIOpts) validate() error {
var ()
if c.Rm && c.Restart != "" && c.Restart != "no" {
if c.Rm && (c.Restart != "" && c.Restart != "no" && c.Restart != "on-failure") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have "on-failure" defined in a variable somewhere rather than a bare string scattered about. Not for this pr though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is defined in the libpod package as RestartPolicyOnFailure.
The same for no it should be RestartPolicyNo.

RestartPolicyOnFailure = "on-failure"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rhatdan are you going to change per @Luap99 's suggestion?

return errors.Errorf(`the --rm option conflicts with --restart, when the restartPolicy is not "" and "no"`)
}

Expand Down
14 changes: 14 additions & 0 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -714,3 +714,17 @@ func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOpti
defer c.newContainerEvent(events.Restore)
return c.restore(ctx, options)
}

// Indicate whether or not the container should restart
func (c *Container) ShouldRestart(ctx context.Context) bool {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you duplicate all of this logic? It looks like it was copied from container_internal

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is, I will use the same function in both places.

logrus.Debugf("Checking if container %s should restart", c.ID())
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()

if err := c.syncContainer(); err != nil {
return false
}
}
return c.shouldRestart()
}
28 changes: 15 additions & 13 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,37 +206,39 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
return nil
}

// Handle container restart policy.
// This is called when a container has exited, and was not explicitly stopped by
// an API call to stop the container or pod it is in.
func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr error) {
// If we did not get a restart policy match, exit immediately.
func (c *Container) shouldRestart() bool {
// If we did not get a restart policy match, return false
// Do the same if we're not a policy that restarts.
if !c.state.RestartPolicyMatch ||
c.config.RestartPolicy == RestartPolicyNo ||
c.config.RestartPolicy == RestartPolicyNone {
return false, nil
return false
}

// If we're RestartPolicyOnFailure, we need to check retries and exit
// code.
if c.config.RestartPolicy == RestartPolicyOnFailure {
if c.state.ExitCode == 0 {
return false, nil
return false
}

// If we don't have a max retries set, continue
if c.config.RestartRetries > 0 {
if c.state.RestartCount < c.config.RestartRetries {
logrus.Debugf("Container %s restart policy trigger: on retry %d (of %d)",
c.ID(), c.state.RestartCount, c.config.RestartRetries)
} else {
logrus.Debugf("Container %s restart policy trigger: retries exhausted", c.ID())
return false, nil
if c.state.RestartCount >= c.config.RestartRetries {
return false
}
}
}
return true
}

// Handle container restart policy.
// This is called when a container has exited, and was not explicitly stopped by
// an API call to stop the container or pod it is in.
func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr error) {
if !c.shouldRestart() {
return false, nil
}
logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy)

// Need to check if dependencies are alive.
Expand Down
24 changes: 24 additions & 0 deletions pkg/api/handlers/libpod/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,27 @@ func InitContainer(w http.ResponseWriter, r *http.Request) {
}
utils.WriteResponse(w, http.StatusNoContent, "")
}

func ShouldRestart(w http.ResponseWriter, r *http.Request) {
runtime := r.Context().Value("runtime").(*libpod.Runtime)
// Now use the ABI implementation to prevent us from having duplicate
// code.
containerEngine := abi.ContainerEngine{Libpod: runtime}

name := utils.GetName(r)
report, err := containerEngine.ShouldRestart(r.Context(), name)
if err != nil {
if errors.Cause(err) == define.ErrNoSuchCtr {
utils.ContainerNotFound(w, name, err)
return
}
utils.InternalServerError(w, err)
return

}
if report.Value {
utils.WriteResponse(w, http.StatusNoContent, "")
} else {
utils.ContainerNotFound(w, name, define.ErrNoSuchCtr)
}
}
12 changes: 12 additions & 0 deletions pkg/bindings/containers/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,15 @@ func ContainerInit(ctx context.Context, nameOrID string) error {
}
return response.Process(nil)
}

func ShouldRestart(ctx context.Context, nameOrID string) (bool, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this in the API and Bindings? This should be local-only, podman container cleanup should never be invoked remotely

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In pkg/domain/infra/tunnel/containers.go when we do a non detached container, we Remove the container when it completes. Added this ShouldRestart check to make sure that this does not happen.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if it's worth adding a complete new API endpoint to handle this. Maybe we should just disable removing the container in that case if it detects that the container has this restart policy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then we end up with the race conditions again. But for only a small subset of calls.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the bindings and just stopped rm the container in run if the restart policy is set to something other then no.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the bindings proved to fail testing. So I added them back.

conn, err := bindings.GetClient(ctx)
if err != nil {
return false, err
}
response, err := conn.DoRequest(nil, http.MethodPost, "/containers/%s/shouldrestart", nil, nil, nameOrID)
if err != nil {
return false, err
}
return response.IsSuccess(), nil
}
15 changes: 13 additions & 2 deletions pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
} else {
report.ExitCode = int(ecode)
}
if opts.Rm {
if opts.Rm && !ctr.ShouldRestart(ctx) {
mheon marked this conversation as resolved.
Show resolved Hide resolved
if err := ic.Libpod.RemoveContainer(ctx, ctr, false, true); err != nil {
if errors.Cause(err) == define.ErrNoSuchCtr ||
errors.Cause(err) == define.ErrCtrRemoved {
Expand Down Expand Up @@ -992,7 +992,7 @@ func (ic *ContainerEngine) ContainerCleanup(ctx context.Context, namesOrIds []st
return []*entities.ContainerCleanupReport{}, nil
}

if options.Remove {
if options.Remove && !ctr.ShouldRestart(ctx) {
err = ic.Libpod.RemoveContainer(ctx, ctr, false, true)
if err != nil {
report.RmErr = errors.Wrapf(err, "failed to cleanup and remove container %v", ctr.ID())
Expand All @@ -1015,6 +1015,7 @@ func (ic *ContainerEngine) ContainerCleanup(ctx context.Context, namesOrIds []st
_, err = ic.Libpod.RemoveImage(ctx, ctrImage, false)
report.RmiErr = err
}

reports = append(reports, &report)
}
return reports, nil
Expand Down Expand Up @@ -1314,3 +1315,13 @@ func (ic *ContainerEngine) ContainerStats(ctx context.Context, namesOrIds []stri

return statsChan, nil
}

// ShouldRestart returns whether the container should be restarted
func (ic *ContainerEngine) ShouldRestart(ctx context.Context, nameOrID string) (*entities.BoolReport, error) {
ctr, err := ic.Libpod.LookupContainer(nameOrID)
if err != nil {
return nil, err
}

return &entities.BoolReport{Value: ctr.ShouldRestart(ctx)}, nil
}
25 changes: 19 additions & 6 deletions pkg/domain/infra/tunnel/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -595,12 +595,20 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
// Defer the removal, so we can return early if needed and
// de-spaghetti the code.
defer func() {
if err := containers.Remove(ic.ClientCxt, con.ID, bindings.PFalse, bindings.PTrue); err != nil {
if errorhandling.Contains(err, define.ErrNoSuchCtr) ||
errorhandling.Contains(err, define.ErrCtrRemoved) {
logrus.Warnf("Container %s does not exist: %v", con.ID, err)
} else {
logrus.Errorf("Error removing container %s: %v", con.ID, err)
shouldRestart, err := containers.ShouldRestart(ic.ClientCxt, con.ID)
if err != nil {
logrus.Errorf("Failed to check if %s should restart: %v", con.ID, err)
return
}

if !shouldRestart {
if err := containers.Remove(ic.ClientCxt, con.ID, bindings.PFalse, bindings.PTrue); err != nil {
if errorhandling.Contains(err, define.ErrNoSuchCtr) ||
errorhandling.Contains(err, define.ErrCtrRemoved) {
logrus.Warnf("Container %s does not exist: %v", con.ID, err)
} else {
logrus.Errorf("Error removing container %s: %v", con.ID, err)
}
}
}
}()
Expand Down Expand Up @@ -737,3 +745,8 @@ func (ic *ContainerEngine) ContainerStats(ctx context.Context, namesOrIds []stri
}
return containers.Stats(ic.ClientCxt, namesOrIds, &options.Stream)
}

// ShouldRestart reports back whether the containre will restart
func (ic *ContainerEngine) ShouldRestart(_ context.Context, id string) (bool, error) {
return containers.ShouldRestart(ic.ClientCxt, id)
}
4 changes: 1 addition & 3 deletions test/e2e/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,9 @@ var _ = Describe("Podman run", func() {
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Equal(0))

// the --rm option conflicts with --restart, when the restartPolicy is not "" and "no"
// so the exitCode should not equal 0
session = podmanTest.Podman([]string{"run", "--rm", "--restart", "on-failure", ALPINE})
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Not(Equal(0)))
Expect(session.ExitCode()).To(Equal(0))

session = podmanTest.Podman([]string{"run", "--rm", "--restart", "always", ALPINE})
session.WaitWithDefaultTimeout()
Expand Down