From c399578b23bd62740caf2eb1bcc89f2ea927ea9e Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Fri, 11 Oct 2024 10:08:21 +0800 Subject: [PATCH] helm/manifest: Sync HPA related K8S probe settings (#459) Sync the mismatch between helm chart and GMC manifests introduced by PR #386 Signed-off-by: Lianhao Lu --- helm-charts/chatqna/gaudi-values.yaml | 17 +++++++++++ .../chatqna/guardrails-gaudi-values.yaml | 30 +++++++++++++++++++ helm-charts/chatqna/nv-values.yaml | 13 ++++++++ helm-charts/codegen/gaudi-values.yaml | 13 ++++++++ helm-charts/codetrans/gaudi-values.yaml | 13 ++++++++ helm-charts/common/tei/gaudi-values.yaml | 5 ++++ helm-charts/common/tei/values.yaml | 2 ++ helm-charts/common/teirerank/values.yaml | 10 ++++--- helm-charts/common/tgi/gaudi-values.yaml | 14 +++++++++ helm-charts/common/tgi/nv-values.yaml | 14 +++++++++ helm-charts/common/tgi/values.yaml | 15 ++++++---- helm-charts/docsum/gaudi-values.yaml | 13 ++++++++ .../config/manifests/tei.yaml | 6 ++-- .../config/manifests/tei_gaudi.yaml | 6 ++-- .../config/manifests/teirerank.yaml | 6 ++-- .../config/manifests/tgi.yaml | 10 +++---- .../config/manifests/tgi_gaudi.yaml | 5 +++- .../config/manifests/tgi_nv.yaml | 5 +++- 18 files changed, 169 insertions(+), 28 deletions(-) diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml index 455368cb..161968f8 100644 --- a/helm-charts/chatqna/gaudi-values.yaml +++ b/helm-charts/chatqna/gaudi-values.yaml @@ -11,6 +11,10 @@ tei: habana.ai/gaudi: 1 securityContext: readOnlyRootFilesystem: false + livenessProbe: + timeoutSeconds: 1 + readinessProbe: + timeoutSeconds: 1 # To override values in subchart tgi tgi: @@ -24,3 +28,16 @@ tgi: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml index f09083b6..40d866a4 100644 --- a/helm-charts/chatqna/guardrails-gaudi-values.yaml +++ b/helm-charts/chatqna/guardrails-gaudi-values.yaml @@ -22,6 +22,10 @@ tei: habana.ai/gaudi: 1 securityContext: readOnlyRootFilesystem: false + livenessProbe: + timeoutSeconds: 1 + readinessProbe: + timeoutSeconds: 1 tgi: accelDevice: "gaudi" @@ -34,6 +38,19 @@ tgi: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 tgi-guardrails: accelDevice: "gaudi" @@ -47,3 +64,16 @@ tgi-guardrails: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/chatqna/nv-values.yaml b/helm-charts/chatqna/nv-values.yaml index 63e3eaf4..67c4e3ac 100644 --- a/helm-charts/chatqna/nv-values.yaml +++ b/helm-charts/chatqna/nv-values.yaml @@ -10,3 +10,16 @@ tgi: resources: limits: nvidia.com/gpu: 1 + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml index 4115f5a3..b37ccc6b 100644 --- a/helm-charts/codegen/gaudi-values.yaml +++ b/helm-charts/codegen/gaudi-values.yaml @@ -12,3 +12,16 @@ tgi: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/codetrans/gaudi-values.yaml b/helm-charts/codetrans/gaudi-values.yaml index 4115f5a3..b37ccc6b 100644 --- a/helm-charts/codetrans/gaudi-values.yaml +++ b/helm-charts/codetrans/gaudi-values.yaml @@ -12,3 +12,16 @@ tgi: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/common/tei/gaudi-values.yaml b/helm-charts/common/tei/gaudi-values.yaml index 17358ea6..c5141505 100644 --- a/helm-charts/common/tei/gaudi-values.yaml +++ b/helm-charts/common/tei/gaudi-values.yaml @@ -17,3 +17,8 @@ securityContext: resources: limits: habana.ai/gaudi: 1 + +livenessProbe: + timeoutSeconds: 1 +readinessProbe: + timeoutSeconds: 1 diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml index 486b9adb..15f43850 100644 --- a/helm-charts/common/tei/values.yaml +++ b/helm-charts/common/tei/values.yaml @@ -69,12 +69,14 @@ livenessProbe: initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 24 + timeoutSeconds: 2 readinessProbe: httpGet: path: /health port: http initialDelaySeconds: 5 periodSeconds: 5 + timeoutSeconds: 2 startupProbe: httpGet: path: /health diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml index 526a3af4..176dea10 100644 --- a/helm-charts/common/teirerank/values.yaml +++ b/helm-charts/common/teirerank/values.yaml @@ -66,15 +66,17 @@ livenessProbe: httpGet: path: /health port: http - initialDelaySeconds: 5 - periodSeconds: 5 + initialDelaySeconds: 8 + periodSeconds: 8 + timeoutSeconds: 4 failureThreshold: 24 readinessProbe: httpGet: path: /health port: http - initialDelaySeconds: 5 - periodSeconds: 5 + initialDelaySeconds: 8 + periodSeconds: 8 + timeoutSeconds: 4 startupProbe: httpGet: path: /health diff --git a/helm-charts/common/tgi/gaudi-values.yaml b/helm-charts/common/tgi/gaudi-values.yaml index b18ead91..7bb81354 100644 --- a/helm-charts/common/tgi/gaudi-values.yaml +++ b/helm-charts/common/tgi/gaudi-values.yaml @@ -18,3 +18,17 @@ CUDA_GRAPHS: "" resources: limits: habana.ai/gaudi: 1 + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/common/tgi/nv-values.yaml b/helm-charts/common/tgi/nv-values.yaml index 798af895..d073ffb9 100644 --- a/helm-charts/common/tgi/nv-values.yaml +++ b/helm-charts/common/tgi/nv-values.yaml @@ -16,3 +16,17 @@ resources: nvidia.com/gpu: 1 CUDA_GRAPHS: "" + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml index 97ef2e59..e3e72e6c 100644 --- a/helm-charts/common/tgi/values.yaml +++ b/helm-charts/common/tgi/values.yaml @@ -72,20 +72,23 @@ resources: {} livenessProbe: tcpSocket: port: http - initialDelaySeconds: 5 - periodSeconds: 5 + initialDelaySeconds: 8 + periodSeconds: 8 + timeoutSeconds: 4 failureThreshold: 24 readinessProbe: tcpSocket: port: http - initialDelaySeconds: 5 - periodSeconds: 5 + initialDelaySeconds: 16 + periodSeconds: 8 + timeoutSeconds: 4 startupProbe: tcpSocket: port: http - initialDelaySeconds: 5 + initialDelaySeconds: 10 periodSeconds: 5 - failureThreshold: 120 + failureThreshold: 180 + timeoutSeconds: 2 #livenessProbe: # httpGet: # path: /health diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml index 4115f5a3..b37ccc6b 100644 --- a/helm-charts/docsum/gaudi-values.yaml +++ b/helm-charts/docsum/gaudi-values.yaml @@ -12,3 +12,16 @@ tgi: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/microservices-connector/config/manifests/tei.yaml b/microservices-connector/config/manifests/tei.yaml index 2f67a57b..0354653b 100644 --- a/microservices-connector/config/manifests/tei.yaml +++ b/microservices-connector/config/manifests/tei.yaml @@ -64,8 +64,6 @@ metadata: app.kubernetes.io/version: "cpu-1.5" app.kubernetes.io/managed-by: Helm spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 selector: matchLabels: app.kubernetes.io/name: tei @@ -138,10 +136,10 @@ spec: sizeLimit: 1Gi - name: tmp emptyDir: {} - # extra time to finish processing buffered requests before pod is forcibly terminated + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated terminationGracePeriodSeconds: 60 --- -# Source: tei/templates/horizontalPodAutoscaler.yaml +# Source: tei/templates/horizontal-pod-autoscaler.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/microservices-connector/config/manifests/tei_gaudi.yaml b/microservices-connector/config/manifests/tei_gaudi.yaml index 0af2958e..a3529fe7 100644 --- a/microservices-connector/config/manifests/tei_gaudi.yaml +++ b/microservices-connector/config/manifests/tei_gaudi.yaml @@ -64,8 +64,6 @@ metadata: app.kubernetes.io/version: "cpu-1.5" app.kubernetes.io/managed-by: Helm spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 selector: matchLabels: app.kubernetes.io/name: tei @@ -110,12 +108,14 @@ spec: port: http initialDelaySeconds: 5 periodSeconds: 5 + timeoutSeconds: 1 readinessProbe: httpGet: path: /health port: http initialDelaySeconds: 5 periodSeconds: 5 + timeoutSeconds: 1 startupProbe: failureThreshold: 120 httpGet: @@ -138,7 +138,7 @@ spec: - name: tmp emptyDir: {} --- -# Source: tei/templates/horizontalPodAutoscaler.yaml +# Source: tei/templates/horizontal-pod-autoscaler.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/microservices-connector/config/manifests/teirerank.yaml b/microservices-connector/config/manifests/teirerank.yaml index 20510a18..639350d5 100644 --- a/microservices-connector/config/manifests/teirerank.yaml +++ b/microservices-connector/config/manifests/teirerank.yaml @@ -63,8 +63,6 @@ metadata: app.kubernetes.io/version: "cpu-1.5" app.kubernetes.io/managed-by: Helm spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 selector: matchLabels: app.kubernetes.io/name: teirerank @@ -137,10 +135,10 @@ spec: sizeLimit: 1Gi - name: tmp emptyDir: {} - # extra time to finish processing buffered requests before pod is forcibly terminated + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated terminationGracePeriodSeconds: 60 --- -# Source: teirerank/templates/horizontalPodAutoscaler.yaml +# Source: teirerank/templates/horizontal-pod-autoscaler.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/microservices-connector/config/manifests/tgi.yaml b/microservices-connector/config/manifests/tgi.yaml index cece9855..0d781393 100644 --- a/microservices-connector/config/manifests/tgi.yaml +++ b/microservices-connector/config/manifests/tgi.yaml @@ -104,22 +104,22 @@ spec: failureThreshold: 24 initialDelaySeconds: 8 periodSeconds: 8 - timeoutSeconds: 4 tcpSocket: port: http + timeoutSeconds: 4 readinessProbe: initialDelaySeconds: 16 periodSeconds: 8 - timeoutSeconds: 4 tcpSocket: port: http + timeoutSeconds: 4 startupProbe: failureThreshold: 180 initialDelaySeconds: 10 periodSeconds: 5 - timeoutSeconds: 2 tcpSocket: port: http + timeoutSeconds: 2 resources: {} volumes: @@ -133,10 +133,10 @@ spec: sizeLimit: 1Gi - name: tmp emptyDir: {} - # extra time to finish processing buffered requests before pod is forcibly terminated + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated terminationGracePeriodSeconds: 120 --- -# Source: tgi/templates/horizontalPorAutoscaler.yaml +# Source: tgi/templates/horizontal-pod-autoscaler.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml index 3205ea90..c090507c 100644 --- a/microservices-connector/config/manifests/tgi_gaudi.yaml +++ b/microservices-connector/config/manifests/tgi_gaudi.yaml @@ -107,17 +107,20 @@ spec: periodSeconds: 5 tcpSocket: port: http + timeoutSeconds: 1 readinessProbe: initialDelaySeconds: 5 periodSeconds: 5 tcpSocket: port: http + timeoutSeconds: 1 startupProbe: failureThreshold: 120 initialDelaySeconds: 5 periodSeconds: 5 tcpSocket: port: http + timeoutSeconds: 1 resources: limits: habana.ai/gaudi: 1 @@ -133,7 +136,7 @@ spec: - name: tmp emptyDir: {} --- -# Source: tgi/templates/horizontalPorAutoscaler.yaml +# Source: tgi/templates/horizontal-pod-autoscaler.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/microservices-connector/config/manifests/tgi_nv.yaml b/microservices-connector/config/manifests/tgi_nv.yaml index d99a2fb9..2917e9ee 100644 --- a/microservices-connector/config/manifests/tgi_nv.yaml +++ b/microservices-connector/config/manifests/tgi_nv.yaml @@ -105,17 +105,20 @@ spec: periodSeconds: 5 tcpSocket: port: http + timeoutSeconds: 1 readinessProbe: initialDelaySeconds: 5 periodSeconds: 5 tcpSocket: port: http + timeoutSeconds: 1 startupProbe: failureThreshold: 120 initialDelaySeconds: 5 periodSeconds: 5 tcpSocket: port: http + timeoutSeconds: 1 resources: limits: nvidia.com/gpu: 1 @@ -131,7 +134,7 @@ spec: - name: tmp emptyDir: {} --- -# Source: tgi/templates/horizontalPorAutoscaler.yaml +# Source: tgi/templates/horizontal-pod-autoscaler.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 ---