Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add liveness probe to Celery workers #25561

Merged
merged 3 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions chart/templates/workers/worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,20 @@ spec:
{{- end }}
resources:
{{ toYaml .Values.workers.resources | indent 12 }}
livenessProbe:
initialDelaySeconds: {{ .Values.workers.livenessProbe.initialDelaySeconds }}
timeoutSeconds: {{ .Values.workers.livenessProbe.timeoutSeconds }}
failureThreshold: {{ .Values.workers.livenessProbe.failureThreshold }}
periodSeconds: {{ .Values.workers.livenessProbe.periodSeconds }}
exec:
command:
{{- if .Values.workers.livenessProbe.command }}
{{ toYaml .Values.workers.livenessProbe.command | nindent 16 }}
{{- else}}
- sh
- -c
- CONNECTION_CHECK_MAX_COUNT=0 exec /entrypoint python -m celery --app airflow.executors.celery_executor.app inspect ping -d celery@$(hostname)
{{- end }}
ports:
- name: worker-logs
containerPort: {{ .Values.ports.workerLogs }}
Expand Down
37 changes: 37 additions & 0 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,43 @@
"exec \\\nairflow {{ semverCompare \">=2.0.0\" .Values.airflowVersion | ternary \"celery worker\" \"worker\" }}"
]
},
"livenessProbe": {
"description": "Liveness probe configuration for worker containers.",
"type": "object",
"additionalProperties": false,
"properties": {
"initialDelaySeconds": {
"description": "Number of seconds after the container has started before liveness probes are initiated.",
"type": "integer",
"default": 10
},
"timeoutSeconds": {
"description": "Number of seconds after which the probe times out. Minimum value is 1 seconds.",
"type": "integer",
"default": 20
},
"failureThreshold": {
"description": "Minimum consecutive failures for the probe to be considered failed after having succeeded. Minimum value is 1.",
"type": "integer",
"default": 5
},
"periodSeconds": {
"description": "How often (in seconds) to perform the probe. Minimum value is 1.",
"type": "integer",
"default": 60
},
"command": {
"description": "Command for livenessProbe",
"type": [
"array",
"null"
],
"items": {
"type": "string"
}
}
}
},
"updateStrategy": {
"description": "Specifies the strategy used to replace old Pods by new ones when deployed as a StatefulSet.",
"type": [
Expand Down
9 changes: 9 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,15 @@ workers:
exec \
airflow {{ semverCompare ">=2.0.0" .Values.airflowVersion | ternary "celery worker" "worker" }}

# If the worker stops responding for 5 minutes (5*60s) kill the
# worker and let Kubernetes restart it
livenessProbe:
initialDelaySeconds: 10
timeoutSeconds: 20
failureThreshold: 5
periodSeconds: 60
command: ~

# Update Strategy when worker is deployed as a StatefulSet
updateStrategy: ~
# Update Strategy when worker is deployed as a Deployment
Expand Down
27 changes: 27 additions & 0 deletions tests/charts/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,33 @@ def test_should_create_default_affinity(self):
docs[0],
)

def test_livenessprobe_values_are_configurable(self):
docs = render_chart(
values={
"workers": {
"livenessProbe": {
"initialDelaySeconds": 111,
"timeoutSeconds": 222,
"failureThreshold": 333,
"periodSeconds": 444,
"command": ["sh", "-c", "echo", "wow such test"],
}
},
},
show_only=["templates/workers/worker-deployment.yaml"],
)

livenessprobe = jmespath.search("spec.template.spec.containers[0].livenessProbe", docs[0])
assert livenessprobe == {
"initialDelaySeconds": 111,
"timeoutSeconds": 222,
"failureThreshold": 333,
"periodSeconds": 444,
"exec": {
"command": ["sh", "-c", "echo", "wow such test"],
},
}

@parameterized.expand(
[
({"enabled": False}, {"emptyDir": {}}),
Expand Down