Skip to content

Commit

Permalink
Add liveness probe to Celery workers (#25561)
Browse files Browse the repository at this point in the history
  • Loading branch information
jedcunningham authored Aug 26, 2022
1 parent 90aba8a commit 02a81ac
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 0 deletions.
16 changes: 16 additions & 0 deletions chart/templates/workers/worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,22 @@ spec:
{{- end }}
resources:
{{ toYaml .Values.workers.resources | indent 12 }}
{{- if .Values.workers.livenessProbe.enabled }}
livenessProbe:
initialDelaySeconds: {{ .Values.workers.livenessProbe.initialDelaySeconds }}
timeoutSeconds: {{ .Values.workers.livenessProbe.timeoutSeconds }}
failureThreshold: {{ .Values.workers.livenessProbe.failureThreshold }}
periodSeconds: {{ .Values.workers.livenessProbe.periodSeconds }}
exec:
command:
{{- if .Values.workers.livenessProbe.command }}
{{ toYaml .Values.workers.livenessProbe.command | nindent 16 }}
{{- else}}
- sh
- -c
- CONNECTION_CHECK_MAX_COUNT=0 exec /entrypoint python -m celery --app airflow.executors.celery_executor.app inspect ping -d celery@$(hostname)
{{- end }}
{{- end }}
ports:
- name: worker-logs
containerPort: {{ .Values.ports.workerLogs }}
Expand Down
42 changes: 42 additions & 0 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,48 @@
"exec \\\nairflow {{ semverCompare \">=2.0.0\" .Values.airflowVersion | ternary \"celery worker\" \"worker\" }}"
]
},
"livenessProbe": {
"description": "Liveness probe configuration for worker containers.",
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"description": "Enable liveness probe for celery workers.",
"type": "boolean",
"default": true
},
"initialDelaySeconds": {
"description": "Number of seconds after the container has started before liveness probes are initiated.",
"type": "integer",
"default": 10
},
"timeoutSeconds": {
"description": "Number of seconds after which the probe times out. Minimum value is 1 seconds.",
"type": "integer",
"default": 20
},
"failureThreshold": {
"description": "Minimum consecutive failures for the probe to be considered failed after having succeeded. Minimum value is 1.",
"type": "integer",
"default": 5
},
"periodSeconds": {
"description": "How often (in seconds) to perform the probe. Minimum value is 1.",
"type": "integer",
"default": 60
},
"command": {
"description": "Command for livenessProbe",
"type": [
"array",
"null"
],
"items": {
"type": "string"
}
}
}
},
"updateStrategy": {
"description": "Specifies the strategy used to replace old Pods by new ones when deployed as a StatefulSet.",
"type": [
Expand Down
10 changes: 10 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,16 @@ workers:
exec \
airflow {{ semverCompare ">=2.0.0" .Values.airflowVersion | ternary "celery worker" "worker" }}
# If the worker stops responding for 5 minutes (5*60s) kill the
# worker and let Kubernetes restart it
livenessProbe:
enabled: true
initialDelaySeconds: 10
timeoutSeconds: 20
failureThreshold: 5
periodSeconds: 60
command: ~

# Update Strategy when worker is deployed as a StatefulSet
updateStrategy: ~
# Update Strategy when worker is deployed as a Deployment
Expand Down
38 changes: 38 additions & 0 deletions tests/charts/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,44 @@ def test_should_create_default_affinity(self):
docs[0],
)

def test_livenessprobe_values_are_configurable(self):
docs = render_chart(
values={
"workers": {
"livenessProbe": {
"initialDelaySeconds": 111,
"timeoutSeconds": 222,
"failureThreshold": 333,
"periodSeconds": 444,
"command": ["sh", "-c", "echo", "wow such test"],
}
},
},
show_only=["templates/workers/worker-deployment.yaml"],
)

livenessprobe = jmespath.search("spec.template.spec.containers[0].livenessProbe", docs[0])
assert livenessprobe == {
"initialDelaySeconds": 111,
"timeoutSeconds": 222,
"failureThreshold": 333,
"periodSeconds": 444,
"exec": {
"command": ["sh", "-c", "echo", "wow such test"],
},
}

def test_disable_livenessprobe(self):
docs = render_chart(
values={
"workers": {"livenessProbe": {"enabled": False}},
},
show_only=["templates/workers/worker-deployment.yaml"],
)

livenessprobe = jmespath.search("spec.template.spec.containers[0].livenessProbe", docs[0])
assert livenessprobe is None

@parameterized.expand(
[
({"enabled": False}, {"emptyDir": {}}),
Expand Down

0 comments on commit 02a81ac

Please sign in to comment.