fix: increase init delay for liveness probe

The initial delay in the liveness probe config for k8s health checks was too short. This was due to the mistaken assumption that k8s would only start running the liveness probe after the readiness probe succeeded. For services that take a long time to start up, the old config would sometimes lead to the liveness probe failing and thus the container being restarted - repeatedly, in some situations. The initial delay and frequency of the readiness probe were also reduced. Later, we should aim to make initialDelaySeconds configurable for the liveness probe.
garden-io · Mar 8, 2019 · e2a1e87 · e2a1e87
1 parent a9f15d0
commit e2a1e87
Showing 1 changed file with 11 additions and 4 deletions.
diff --git a/garden-service/src/plugins/kubernetes/container/deployment.ts b/garden-service/src/plugins/kubernetes/container/deployment.ts
@@ -269,16 +269,23 @@ function deploymentConfig(service: Service, configuredReplicas: number, namespac
 
 function configureHealthCheck(container, spec): void {
 
+  const readinessPeriodSeconds = 1
+  const readinessFailureThreshold = 90
+
   container.readinessProbe = {
-    initialDelaySeconds: 10,
-    periodSeconds: 5,
+    initialDelaySeconds: 2,
+    periodSeconds: readinessPeriodSeconds,
     timeoutSeconds: 3,
     successThreshold: 2,
-    failureThreshold: 5,
+    failureThreshold: readinessFailureThreshold,
   }
 
+  /*
+   * We wait for the effective failure duration (period * threshold) of the readiness probe before starting the
+   * liveness probe.
+   */
   container.livenessProbe = {
-    initialDelaySeconds: 15,
+    initialDelaySeconds: readinessPeriodSeconds * readinessFailureThreshold,
     periodSeconds: 5,
     timeoutSeconds: 3,
     successThreshold: 1,