From 0cfd12786fd155261b1b60a556ead17014964742 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 25 Jul 2023 09:45:00 +0200 Subject: [PATCH] add "healthy" sdnotify policy Add a new "healthy" sdnotify policy that instructs Podman to send the READY message once the container has turned healthy. Fixes: #6160 Signed-off-by: Valentin Rothberg --- cmd/podman/common/completion.go | 2 +- docs/source/markdown/options/sdnotify.md | 4 +- libpod/container_api.go | 2 +- libpod/container_internal.go | 37 +++++++++++++-- libpod/define/sdnotify.go | 7 +-- libpod/oci_conmon_attach_common.go | 3 +- pkg/specgen/container_validate.go | 2 - pkg/specgen/generate/container_create.go | 7 +++ test/system/260-sdnotify.bats | 59 ++++++++++++++++++++++++ 9 files changed, 109 insertions(+), 14 deletions(-) diff --git a/cmd/podman/common/completion.go b/cmd/podman/common/completion.go index 819f1807d8..c2a1a75d4c 100644 --- a/cmd/podman/common/completion.go +++ b/cmd/podman/common/completion.go @@ -1545,7 +1545,7 @@ func AutocompleteLogLevel(cmd *cobra.Command, args []string, toComplete string) // AutocompleteSDNotify - Autocomplete sdnotify options. // -> "container", "conmon", "ignore" func AutocompleteSDNotify(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - types := []string{define.SdNotifyModeContainer, define.SdNotifyModeContainer, define.SdNotifyModeIgnore} + types := []string{define.SdNotifyModeConmon, define.SdNotifyModeContainer, define.SdNotifyModeHealthy, define.SdNotifyModeIgnore} return types, cobra.ShellCompDirectiveNoFileComp } diff --git a/docs/source/markdown/options/sdnotify.md b/docs/source/markdown/options/sdnotify.md index c3905007ae..3b441cb858 100644 --- a/docs/source/markdown/options/sdnotify.md +++ b/docs/source/markdown/options/sdnotify.md @@ -2,7 +2,7 @@ ####> podman create, run ####> If file is edited, make sure the changes ####> are applicable to all of those. -#### **--sdnotify**=**container** | *conmon* | *ignore* +#### **--sdnotify**=**container** | *conmon* | *healthy* | *ignore* Determines how to use the NOTIFY_SOCKET, as passed with systemd and Type=notify. @@ -10,5 +10,7 @@ Default is **container**, which means allow the OCI runtime to proxy the socket container to receive ready notification. Podman sets the MAINPID to conmon's pid. The **conmon** option sets MAINPID to conmon's pid, and sends READY when the container has started. The socket is never passed to the runtime or the container. +The **healthy** option sets MAINPID to conmon's pid, and sends READY when the container +has turned healthy; requires a healthcheck to be set. The socket is never passed to the runtime or the container. The **ignore** option removes NOTIFY_SOCKET from the environment for itself and child processes, for the case where some other process above Podman uses NOTIFY_SOCKET and Podman does not use it. diff --git a/libpod/container_api.go b/libpod/container_api.go index cfec2fed21..1c630081f5 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -113,7 +113,7 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error) } // Start the container - return c.start() + return c.start(ctx) } // Update updates the given container. diff --git a/libpod/container_internal.go b/libpod/container_internal.go index be0560efd9..e5972ab263 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -308,7 +308,7 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err return false, err } } - if err := c.start(); err != nil { + if err := c.start(ctx); err != nil { return false, err } return true, nil @@ -1198,11 +1198,11 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) { } // Now start the container - return c.start() + return c.start(ctx) } // Internal, non-locking function to start a container -func (c *Container) start() error { +func (c *Container) start(ctx context.Context) error { if c.config.Spec.Process != nil { logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args) } @@ -1214,9 +1214,11 @@ func (c *Container) start() error { c.state.State = define.ContainerStateRunning + // Unless being ignored, set the MAINPID to conmon. if c.config.SdNotifyMode != define.SdNotifyModeIgnore { payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID) if c.config.SdNotifyMode == define.SdNotifyModeConmon { + // Also send the READY message for the "conmon" policy. payload += "\n" payload += daemon.SdNotifyReady } @@ -1241,7 +1243,32 @@ func (c *Container) start() error { defer c.newContainerEvent(events.Start) - return c.save() + if err := c.save(); err != nil { + return err + } + + if c.config.SdNotifyMode != define.SdNotifyModeHealthy { + return nil + } + + // Wait for the container to turn healthy before sending the READY + // message. This implies that we need to unlock and re-lock the + // container. + if !c.batched { + c.lock.Unlock() + defer c.lock.Lock() + } + + if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil { + return err + } + + if err := notifyproxy.SendMessage(c.config.SdNotifySocket, daemon.SdNotifyReady); err != nil { + logrus.Errorf("Sending READY message after turning healthy: %s", err.Error()) + } else { + logrus.Debugf("Notify sent successfully") + } + return nil } // Internal, non-locking function to stop container @@ -1487,7 +1514,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr return err } } - return c.start() + return c.start(ctx) } // mountStorage sets up the container's root filesystem diff --git a/libpod/define/sdnotify.go b/libpod/define/sdnotify.go index 1d548c764e..f188a36507 100644 --- a/libpod/define/sdnotify.go +++ b/libpod/define/sdnotify.go @@ -4,17 +4,18 @@ import "fmt" // Strings used for --sdnotify option to podman const ( - SdNotifyModeContainer = "container" SdNotifyModeConmon = "conmon" + SdNotifyModeContainer = "container" + SdNotifyModeHealthy = "healthy" SdNotifyModeIgnore = "ignore" ) // ValidateSdNotifyMode validates the specified mode. func ValidateSdNotifyMode(mode string) error { switch mode { - case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore: + case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore, SdNotifyModeHealthy: return nil default: - return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore) + return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeConmon, SdNotifyModeContainer, SdNotifyModeHealthy, SdNotifyModeIgnore) } } diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go index 8d5a7f813b..0ff0a2ef02 100644 --- a/libpod/oci_conmon_attach_common.go +++ b/libpod/oci_conmon_attach_common.go @@ -4,6 +4,7 @@ package libpod import ( + "context" "errors" "fmt" "io" @@ -86,7 +87,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error { // If starting was requested, start the container and notify when that's // done. if params.Start { - if err := c.start(); err != nil { + if err := c.start(context.TODO()); err != nil { return err } params.Started <- true diff --git a/pkg/specgen/container_validate.go b/pkg/specgen/container_validate.go index 536c2ff9a4..c6a8baddd7 100644 --- a/pkg/specgen/container_validate.go +++ b/pkg/specgen/container_validate.go @@ -14,8 +14,6 @@ var ( ErrInvalidSpecConfig = errors.New("invalid configuration") // SystemDValues describes the only values that SystemD can be SystemDValues = []string{"true", "false", "always"} - // SdNotifyModeValues describes the only values that SdNotifyMode can be - SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore} // ImageVolumeModeValues describes the only values that ImageVolumeMode can be ImageVolumeModeValues = []string{"ignore", define.TypeTmpfs, "anonymous"} ) diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index f9e663001e..3db8a772f8 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -601,18 +601,25 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l } options = append(options, libpod.WithRestartRetries(retries), libpod.WithRestartPolicy(restartPolicy)) + healthCheckSet := false if s.ContainerHealthCheckConfig.HealthConfig != nil { options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig)) logrus.Debugf("New container has a health check") + healthCheckSet = true } if s.ContainerHealthCheckConfig.StartupHealthConfig != nil { options = append(options, libpod.WithStartupHealthcheck(s.ContainerHealthCheckConfig.StartupHealthConfig)) + healthCheckSet = true } if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone { options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction)) } + if s.SdNotifyMode == define.SdNotifyModeHealthy && !healthCheckSet { + return nil, fmt.Errorf("%w: sdnotify policy %q requires a healthcheck to be set", define.ErrInvalidArg, s.SdNotifyMode) + } + if len(s.Secrets) != 0 { manager, err := rt.SecretsManager() if err != nil { diff --git a/test/system/260-sdnotify.bats b/test/system/260-sdnotify.bats index 71894ed1cb..ae1f828afb 100644 --- a/test/system/260-sdnotify.bats +++ b/test/system/260-sdnotify.bats @@ -184,6 +184,65 @@ READY=1" _stop_socat } +# These tests can fail in dev. environment because of SELinux. +# quick fix: chcon -t container_runtime_exec_t ./bin/podman +@test "sdnotify : healthy" { + export NOTIFY_SOCKET=$PODMAN_TMPDIR/container.sock + _start_socat + + wait_file="$PODMAN_TMPDIR/$(random_string).wait_for_me" + run_podman 125 create --sdnotify=healthy $IMAGE + is "$output" "Error: invalid argument: sdnotify policy \"healthy\" requires a healthcheck to be set" + + # Create a container with a simple `/bin/true` healthcheck that we need to + # run manually. + ctr=$(random_string) + run_podman create --name $ctr \ + --health-cmd=/bin/true \ + --health-retries=1 \ + --health-interval=disable \ + --sdnotify=healthy \ + $IMAGE sleep infinity + + # Start the container in the background which will block until the + # container turned healthy. After that, create the wait_file which + # indicates that start has returned. + (timeout --foreground -v --kill=5 20 $PODMAN start $ctr && touch $wait_file) & + + run_podman wait --condition=running $ctr + + # Make sure that the MAINPID is set but without the READY message. + run_podman container inspect $ctr --format "{{.State.ConmonPid}}" + mainPID="$output" + # With container, READY=1 isn't necessarily the last message received; + # just look for it anywhere in received messages + run cat $_SOCAT_LOG + # The 'echo's help us debug failed runs + echo "socat log:" + echo "$output" + + is "$output" "MAINPID=$mainPID" "Container is not healthy yet, so we only know the main PID" + + # Now run the healthcheck and look for the READY message. + run_podman healthcheck run $ctr + is "$output" "" "output from 'podman healthcheck run'" + + # Wait for start to return. At that point the READY message must have been + # sent. + wait_for_file $wait_file + run cat $_SOCAT_LOG + echo "socat log:" + echo "$output" + is "$output" "MAINPID=$mainPID +READY=1" + + run_podman container inspect --format "{{.State.Status}}" $ctr + is "$output" "running" "make sure container is still running" + + run_podman rm -f -t0 $ctr + _stop_socat +} + @test "sdnotify : play kube - no policies" { # Create the YAMl file yaml_source="$PODMAN_TMPDIR/test.yaml"