From e2a1e8750cf1d7e93f7215d6207ce668417e2116 Mon Sep 17 00:00:00 2001 From: Thorarinn Sigurdsson Date: Thu, 7 Mar 2019 14:12:30 +0100 Subject: [PATCH] fix: increase init delay for liveness probe The initial delay in the liveness probe config for k8s health checks was too short. This was due to the mistaken assumption that k8s would only start running the liveness probe after the readiness probe succeeded. For services that take a long time to start up, the old config would sometimes lead to the liveness probe failing and thus the container being restarted - repeatedly, in some situations. The initial delay and frequency of the readiness probe were also reduced. Later, we should aim to make initialDelaySeconds configurable for the liveness probe. --- .../plugins/kubernetes/container/deployment.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/garden-service/src/plugins/kubernetes/container/deployment.ts b/garden-service/src/plugins/kubernetes/container/deployment.ts index 8a55e235a0..10a261778c 100644 --- a/garden-service/src/plugins/kubernetes/container/deployment.ts +++ b/garden-service/src/plugins/kubernetes/container/deployment.ts @@ -269,16 +269,23 @@ function deploymentConfig(service: Service, configuredReplicas: number, namespac function configureHealthCheck(container, spec): void { + const readinessPeriodSeconds = 1 + const readinessFailureThreshold = 90 + container.readinessProbe = { - initialDelaySeconds: 10, - periodSeconds: 5, + initialDelaySeconds: 2, + periodSeconds: readinessPeriodSeconds, timeoutSeconds: 3, successThreshold: 2, - failureThreshold: 5, + failureThreshold: readinessFailureThreshold, } + /* + * We wait for the effective failure duration (period * threshold) of the readiness probe before starting the + * liveness probe. + */ container.livenessProbe = { - initialDelaySeconds: 15, + initialDelaySeconds: readinessPeriodSeconds * readinessFailureThreshold, periodSeconds: 5, timeoutSeconds: 3, successThreshold: 1,