Skip to content

Commit

Permalink
Don't update health check status during initialDelaySeconds
Browse files Browse the repository at this point in the history
When InitialDelaySeconds in the kube yaml is set for a helthcheck,
don't update the healthcheck status till those initial delay seconds are over.
We were waiting to update for a failing healtcheck, but when the healthcheck
was successful during the initial delay time, the status was being updated as healthy
immediately.
This is misleading to the users wondering why their healthcheck takes
much longer to fail for a failing case while it is quick to succeed for
a healthy case. It also doesn't match what the k8s InitialDelaySeconds
does. This change is only for kube play, podman healthcheck run is
unaffected.

Signed-off-by: Urvashi Mohnani <[email protected]>
  • Loading branch information
umohnani8 authored and openshift-cherrypick-robot committed Feb 29, 2024
1 parent 8d2b55d commit 7f2106b
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 2 deletions.
4 changes: 4 additions & 0 deletions libpod/define/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ const (
// of the container
UlimitAnnotation = "io.podman.annotations.ulimit"

// KubeHealthCheckAnnotation is used by kube play to tell podman that any health checks should follow
// the k8s behavior of waiting for the intialDelaySeconds to be over before updating the status
KubeHealthCheckAnnotation = "io.podman.annotations.kube.health.check"

// MaxKubeAnnotation is the max length of annotations allowed by Kubernetes.
MaxKubeAnnotation = 63
)
Expand Down
11 changes: 9 additions & 2 deletions libpod/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
}

hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
logStatus, err := c.updateHealthCheckLog(hcl, inStartPeriod)
logStatus, err := c.updateHealthCheckLog(hcl, inStartPeriod, isStartup)
if err != nil {
return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
}
Expand Down Expand Up @@ -375,10 +375,17 @@ func (c *Container) isUnhealthy() (bool, error) {
}

// UpdateHealthCheckLog parses the health check results and writes the log
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod bool) (string, error) {
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod, isStartup bool) (string, error) {
c.lock.Lock()
defer c.lock.Unlock()

// If we are playing a kube yaml then let's honor the start period time for
// both failing and succeeding cases to match kube behavior.
// So don't update the health check log till the start period is over
if _, ok := c.config.Spec.Annotations[define.KubeHealthCheckAnnotation]; ok && inStartPeriod && !isStartup {
return "", nil
}

healthCheck, err := c.getHealthCheckLog()
if err != nil {
return "", err
Expand Down
2 changes: 2 additions & 0 deletions pkg/specgen/generate/kube/kube.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
s.Annotations[define.InspectAnnotationPublishAll] = publishAll
}

s.Annotations[define.KubeHealthCheckAnnotation] = "true"

// Environment Variables
envs := map[string]string{}
for _, env := range imageData.Config.Env {
Expand Down
106 changes: 106 additions & 0 deletions test/system/700-play.bats
Original file line number Diff line number Diff line change
Expand Up @@ -829,3 +829,109 @@ EOF

run_podman rmi $local_image
}

@test "podman kube play healthcheck should wait initialDelaySeconds before updating status (healthy)" {
fname="$PODMAN_TMPDIR/play_kube_healthy_$(random_string 6).yaml"
echo "
apiVersion: v1
kind: Pod
metadata:
labels:
name: liveness-exec
spec:
containers:
- name: liveness
image: $IMAGE
args:
- /bin/sh
- -c
- touch /tmp/healthy && sleep 100
livenessProbe:
exec:
command:
- cat
- /tmp/healthy
initialDelaySeconds: 3
failureThreshold: 1
periodSeconds: 1
" > $fname

run_podman kube play $fname
ctrName="liveness-exec-liveness"

# Keep checking status. For the first 2 seconds it must be 'starting'
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 2)) ]]; do
run_podman inspect $ctrName --format "1-{{.State.Health.Status}}"
assert "$output" == "1-starting" "Health.Status at $((SECONDS - t0))"
sleep 0.5
done

# After 3 seconds it may take another second to go healthy. Wait.
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 3)) ]]; do
run_podman inspect $ctrName --format "2-{{.State.Health.Status}}"
if [[ "$output" = "2-healthy" ]]; then
break;
fi
sleep 0.5
done
assert $output == "2-healthy" "After 3 seconds"

run_podman kube down $fname
run_podman pod rm -a
run_podman rm -a
}

@test "podman kube play healthcheck should wait initialDelaySeconds before updating status (unhealthy)" {
fname="$PODMAN_TMPDIR/play_kube_unhealthy_$(random_string 6).yaml"
echo "
apiVersion: v1
kind: Pod
metadata:
labels:
name: liveness-exec
spec:
containers:
- name: liveness
image: $IMAGE
args:
- /bin/sh
- -c
- touch /tmp/healthy && sleep 100
livenessProbe:
exec:
command:
- cat
- /tmp/randomfile
initialDelaySeconds: 3
failureThreshold: 1
periodSeconds: 1
" > $fname

run_podman kube play $fname
ctrName="liveness-exec-liveness"

# Keep checking status. For the first 2 seconds it must be 'starting'
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 2)) ]]; do
run_podman inspect $ctrName --format "1-{{.State.Health.Status}}"
assert "$output" == "1-starting" "Health.Status at $((SECONDS - t0))"
sleep 0.5
done

# After 3 seconds it may take another second to go unhealthy. Wait.
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 3)) ]]; do
run_podman inspect $ctrName --format "2-{{.State.Health.Status}}"
if [[ "$output" = "2-unhealthy" ]]; then
break;
fi
sleep 0.5
done
assert $output == "2-unhealthy" "After 3 seconds"

run_podman kube down $fname
run_podman pod rm -a
run_podman rm -a
}

0 comments on commit 7f2106b

Please sign in to comment.