Skip to content

Commit

Permalink
Merge pull request #15687 from vrothberg/RUN-1639
Browse files Browse the repository at this point in the history
health check: add on-failure actions
  • Loading branch information
openshift-merge-robot authored Sep 9, 2022
2 parents b239966 + aad29e7 commit 7e7db23
Show file tree
Hide file tree
Showing 19 changed files with 340 additions and 39 deletions.
5 changes: 5 additions & 0 deletions cmd/podman/common/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -1641,3 +1641,8 @@ func AutocompleteSSH(cmd *cobra.Command, args []string, toComplete string) ([]st
}
return []string{string(ssh.GolangMode), string(ssh.NativeMode)}, cobra.ShellCompDirectiveNoFileComp
}

// AutocompleteHealthOnFailure - action to take once the container turns unhealthy.
func AutocompleteHealthOnFailure(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
return define.SupportedHealthCheckOnFailureActions, cobra.ShellCompDirectiveNoFileComp
}
8 changes: 8 additions & 0 deletions cmd/podman/common/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,14 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
)
_ = cmd.RegisterFlagCompletionFunc(healthTimeoutFlagName, completion.AutocompleteNone)

healthOnFailureFlagName := "health-on-failure"
createFlags.StringVar(
&cf.HealthOnFailure,
healthOnFailureFlagName, "none",
"action to take once the container turns unhealthy",
)
_ = cmd.RegisterFlagCompletionFunc(healthOnFailureFlagName, AutocompleteHealthOnFailure)

createFlags.BoolVar(
&cf.HTTPProxy,
"http-proxy", containerConfig.Containers.HTTPProxy,
Expand Down
8 changes: 8 additions & 0 deletions docs/source/markdown/options/health-on-failure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#### **--health-on-failure**=*action*

Action to take once the container transitions to an unhealthy state. The default is **none**.

- **none**: Take no action.
- **kill**: Kill the container.
- **restart**: Restart the container. Do not combine the `restart` action with the `--restart` flag. When running inside of a systemd unit, consider using the `kill` or `stop` action instead to make use of systemd's restart policy.
- **stop**: Stop the container.
2 changes: 2 additions & 0 deletions docs/source/markdown/podman-create.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ Read in a line delimited file of environment variables. See **Environment** note

@@option health-interval

@@option health-on-failure

@@option health-retries

@@option health-start-period
Expand Down
2 changes: 2 additions & 0 deletions docs/source/markdown/podman-run.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ Read in a line delimited file of environment variables. See **Environment** note

@@option health-interval

@@option health-on-failure

@@option health-retries

@@option health-start-period
Expand Down
3 changes: 3 additions & 0 deletions libpod/container_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/containers/common/libnetwork/types"
"github.com/containers/common/pkg/secrets"
"github.com/containers/image/v5/manifest"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/namespaces"
"github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/storage"
Expand Down Expand Up @@ -392,6 +393,8 @@ type ContainerMiscConfig struct {
Systemd *bool `json:"systemd,omitempty"`
// HealthCheckConfig has the health check command and related timings
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
// HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
// PreserveFDs is a number of additional file descriptors (in addition
// to 0, 1, 2) that will be passed to the executed process. The total FDs
// passed will be 3 + PreserveFDs.
Expand Down
2 changes: 2 additions & 0 deletions libpod/container_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp
// leak.
ctrConfig.Healthcheck = c.config.HealthCheckConfig

ctrConfig.HealthcheckOnFailureAction = c.config.HealthCheckOnFailureAction.String()

ctrConfig.CreateCommand = c.config.CreateCommand

ctrConfig.Timezone = c.config.Timezone
Expand Down
4 changes: 4 additions & 0 deletions libpod/container_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,5 +137,9 @@ func (c *Container) validate() error {
if c.config.SdNotifyMode == define.SdNotifyModeIgnore && len(c.config.SdNotifySocket) > 0 {
return fmt.Errorf("cannot set sd-notify socket %q with sd-notify mode %q", c.config.SdNotifySocket, c.config.SdNotifyMode)
}

if c.config.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone && c.config.HealthCheckConfig == nil {
return fmt.Errorf("cannot set on-failure action to %s without a health check", c.config.HealthCheckOnFailureAction.String())
}
return nil
}
2 changes: 2 additions & 0 deletions libpod/define/container_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ type InspectContainerConfig struct {
StopSignal uint `json:"StopSignal"`
// Configured healthcheck for the container
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
// HealthcheckOnFailureAction defines an action to take once the container turns unhealthy.
HealthcheckOnFailureAction string `json:"HealthcheckOnFailureAction,omitempty"`
// CreateCommand is the full command plus arguments of the process the
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
Expand Down
74 changes: 74 additions & 0 deletions libpod/define/healthchecks.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package define

import (
"fmt"
"strings"
)

const (
// HealthCheckHealthy describes a healthy container
HealthCheckHealthy string = "healthy"
Expand Down Expand Up @@ -57,3 +62,72 @@ const (
// HealthConfigTestCmdShell runs commands with the system's default shell
HealthConfigTestCmdShell = "CMD-SHELL"
)

// HealthCheckOnFailureAction defines how Podman reacts when a container's health
// status turns unhealthy.
type HealthCheckOnFailureAction int

// Healthcheck on-failure actions.
const (
// HealthCheckOnFailureActionNonce instructs Podman to not react on an unhealthy status.
HealthCheckOnFailureActionNone = iota // Must be first iota for backwards compatibility
// HealthCheckOnFailureActionInvalid denotes an invalid on-failure policy.
HealthCheckOnFailureActionInvalid = iota
// HealthCheckOnFailureActionNonce instructs Podman to kill the container on an unhealthy status.
HealthCheckOnFailureActionKill = iota
// HealthCheckOnFailureActionNonce instructs Podman to restart the container on an unhealthy status.
HealthCheckOnFailureActionRestart = iota
// HealthCheckOnFailureActionNonce instructs Podman to stop the container on an unhealthy status.
HealthCheckOnFailureActionStop = iota
)

// String representations for on-failure actions.
const (
strHealthCheckOnFailureActionNone = "none"
strHealthCheckOnFailureActionInvalid = "invalid"
strHealthCheckOnFailureActionKill = "kill"
strHealthCheckOnFailureActionRestart = "restart"
strHealthCheckOnFailureActionStop = "stop"
)

// SupportedHealthCheckOnFailureActions lists all supported healthcheck restart policies.
var SupportedHealthCheckOnFailureActions = []string{
strHealthCheckOnFailureActionNone,
strHealthCheckOnFailureActionKill,
strHealthCheckOnFailureActionRestart,
strHealthCheckOnFailureActionStop,
}

// String returns the string representation of the HealthCheckOnFailureAction.
func (h HealthCheckOnFailureAction) String() string {
switch h {
case HealthCheckOnFailureActionNone:
return strHealthCheckOnFailureActionNone
case HealthCheckOnFailureActionKill:
return strHealthCheckOnFailureActionKill
case HealthCheckOnFailureActionRestart:
return strHealthCheckOnFailureActionRestart
case HealthCheckOnFailureActionStop:
return strHealthCheckOnFailureActionStop
default:
return strHealthCheckOnFailureActionInvalid
}
}

// ParseHealthCheckOnFailureAction parses the specified string into a HealthCheckOnFailureAction.
// An error is returned for an invalid input.
func ParseHealthCheckOnFailureAction(s string) (HealthCheckOnFailureAction, error) {
switch s {
case "", strHealthCheckOnFailureActionNone:
return HealthCheckOnFailureActionNone, nil
case strHealthCheckOnFailureActionKill:
return HealthCheckOnFailureActionKill, nil
case strHealthCheckOnFailureActionRestart:
return HealthCheckOnFailureActionRestart, nil
case strHealthCheckOnFailureActionStop:
return HealthCheckOnFailureActionStop, nil
default:
err := fmt.Errorf("invalid on-failure action %q for health check: supported actions are %s", s, strings.Join(SupportedHealthCheckOnFailureActions, ","))
return HealthCheckOnFailureActionInvalid, err
}
}
41 changes: 40 additions & 1 deletion libpod/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package libpod

import (
"bufio"
"context"
"errors"
"fmt"
"io/ioutil"
Expand All @@ -12,6 +13,7 @@ import (

"github.com/containers/podman/v4/libpod/define"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

const (
Expand All @@ -29,9 +31,14 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
if err != nil {
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
}

hcStatus, err := checkHealthCheckCanBeRun(container)
if err == nil {
return container.runHealthCheck()
hcStatus, err := container.runHealthCheck()
if err := container.processHealthCheckStatus(hcStatus); err != nil {
return hcStatus, err
}
return hcStatus, err
}
return hcStatus, err
}
Expand Down Expand Up @@ -127,13 +134,45 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
hcResult = define.HealthCheckFailure
hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
}

hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
}

return hcResult, hcErr
}

func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error {
if status == define.HealthCheckSuccess {
return nil
}

switch c.config.HealthCheckOnFailureAction {
case define.HealthCheckOnFailureActionNone: // Nothing to do

case define.HealthCheckOnFailureActionKill:
if err := c.Kill(uint(unix.SIGKILL)); err != nil {
return fmt.Errorf("killing container health-check turned unhealthy: %w", err)
}

case define.HealthCheckOnFailureActionRestart:
if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil {
return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err)
}

case define.HealthCheckOnFailureActionStop:
if err := c.Stop(); err != nil {
return fmt.Errorf("stopping container after health-check turned unhealthy: %w", err)
}

default: // Should not happen but better be safe than sorry
return fmt.Errorf("unsupported on-failure action %d", c.config.HealthCheckOnFailureAction)
}

return nil
}

func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
cstate, err := c.State()
if err != nil {
Expand Down
11 changes: 11 additions & 0 deletions libpod/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -1473,6 +1473,17 @@ func WithHealthCheck(healthCheck *manifest.Schema2HealthConfig) CtrCreateOption
}
}

// WithHealthCheckOnFailureAction adds an on-failure action to health-check config
func WithHealthCheckOnFailureAction(action define.HealthCheckOnFailureAction) CtrCreateOption {
return func(ctr *Container) error {
if ctr.valid {
return define.ErrCtrFinalized
}
ctr.config.HealthCheckOnFailureAction = action
return nil
}
}

// WithPreserveFDs forwards from the process running Libpod into the container
// the given number of extra FDs (starting after the standard streams) to the created container
func WithPreserveFDs(fd uint) CtrCreateOption {
Expand Down
1 change: 1 addition & 0 deletions pkg/domain/entities/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ type ContainerCreateOptions struct {
HealthRetries uint
HealthStartPeriod string
HealthTimeout string
HealthOnFailure string
Hostname string `json:"hostname,omitempty"`
HTTPProxy bool
HostUsers []string
Expand Down
4 changes: 4 additions & 0 deletions pkg/specgen/generate/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,10 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
logrus.Debugf("New container has a health check")
}

if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))
}

if len(s.Secrets) != 0 {
manager, err := rt.SecretsManager()
if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion pkg/specgen/specgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/containers/common/libimage"
nettypes "github.com/containers/common/libnetwork/types"
"github.com/containers/image/v5/manifest"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/storage/types"
spec "github.com/opencontainers/runtime-spec/specs-go"
)
Expand Down Expand Up @@ -533,7 +534,8 @@ type ContainerResourceConfig struct {
// ContainerHealthCheckConfig describes a container healthcheck with attributes
// like command, retries, interval, start period, and timeout.
type ContainerHealthCheckConfig struct {
HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"`
HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"`
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"health_check_on_failure_action,omitempty"`
}

// SpecGenerator creates an OCI spec and Libpod configuration options to create
Expand Down
7 changes: 7 additions & 0 deletions pkg/specgenutil/specgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,13 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
Test: []string{"NONE"},
}
}

onFailureAction, err := define.ParseHealthCheckOnFailureAction(c.HealthOnFailure)
if err != nil {
return err
}
s.HealthCheckOnFailureAction = onFailureAction

if err := setNamespaces(s, c); err != nil {
return err
}
Expand Down
Loading

0 comments on commit 7e7db23

Please sign in to comment.