From 4dd67272ed17cfc4c0c1621a969ed66c432677c0 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 11 Oct 2022 13:01:07 +0200 Subject: [PATCH] healthcheck: fix --on-failure=stop Fix the "stop" on-failure action by not removing the transient systemd timer and service during container stop. Removing the service will in turn cause systemd to terminate the Podman process attempting to stop the container and hence leave it in the "stopping" state. Instead move the removal into the restart sequence. Signed-off-by: Valentin Rothberg --- libpod/container_internal.go | 11 +++++------ test/system/220-healthcheck.bats | 2 ++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 9bf93412d2..7f8f467543 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1285,12 +1285,6 @@ func (c *Container) stop(timeout uint) error { c.lock.Unlock() } - if c.config.HealthCheckConfig != nil { - if err := c.removeTransientFiles(context.Background()); err != nil { - logrus.Error(err.Error()) - } - } - stopErr := c.ociRuntime.StopContainer(c, timeout, all) if !c.batched { @@ -1415,6 +1409,11 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr if err := c.stop(timeout); err != nil { return err } + if c.config.HealthCheckConfig != nil { + if err := c.removeTransientFiles(context.Background()); err != nil { + logrus.Error(err.Error()) + } + } // Old versions of conmon have a bug where they create the exit file before // closing open file descriptors causing a race condition when restarting // containers with open ports since we cannot bind the ports as they're not diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index 8e14adcecc..21e2a50def 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -123,6 +123,8 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" # kill and stop yield the container into a non-running state is "$output" ".* $policy" "container was stopped/killed" assert "$output" != "running $policy" + # also make sure that it's not stuck in the stopping state + assert "$output" != "stopping $policy" fi run_podman rm -f -t0 $ctr