Skip to content

Commit

Permalink
helm: Add K8S probes to retriever-usvc (#244)
Browse files Browse the repository at this point in the history
- Add K8S probes to retriever-usvc
- Remove redundant values in xx-values.yaml
Signed-off-by: Lianhao Lu <[email protected]>
  • Loading branch information
lianhao authored Jul 31, 2024
1 parent aa2730a commit af47b3c
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 105 deletions.
46 changes: 5 additions & 41 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,6 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Default values for chatqna.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

image:
repository: opea/chatqna:latest
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
# tag: "1.0"

port: 8888
service:
type: ClusterIP
port: 8888

securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault

tei:
image:
repository: ghcr.io/huggingface/tei-gaudi
Expand All @@ -39,22 +11,14 @@ tei:

# To override values in subchart tgi
tgi:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.1"
resources:
limits:
habana.ai/gaudi: 1

global:
http_proxy:
https_proxy:
no_proxy:
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
LANGCHAIN_TRACING_V2: false
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
# comment out modeluseHostPath if you want to download the model from huggingface
modelUseHostPath: /mnt/opea-models
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
22 changes: 10 additions & 12 deletions helm-charts/common/retriever-usvc/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,17 @@ spec:
volumeMounts:
- mountPath: /tmp
name: tmp
{{- if not .Values.noProbe }}
{{- if .Values.livenessProbe }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
{{- end }}
{{- if .Values.readinessProbe }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
{{- end }}
{{- if .Values.startupProbe }}
startupProbe:
exec:
command:
- curl
{{- if .Values.TEI_EMBEDDING_ENDPOINT }}
- {{ .Values.TEI_EMBEDDING_ENDPOINT }}
{{- else }}
- http://{{ .Release.Name }}-tei
{{- end }}
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
{{- toYaml .Values.startupProbe | nindent 12 }}
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
Expand Down
11 changes: 8 additions & 3 deletions helm-charts/common/retriever-usvc/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@ spec:
- name: curl
#image: alpine/curl
image: python:3.10.14
command: ['sh', '-c']
command: ['bash', '-c']
args:
- |
your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)");
curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \
max_retry=20;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
-H 'Content-Type: application/json'
-H 'Content-Type: application/json' && break;
sleep 10;
done;
if [ $i -gt $max_retry ]; then echo "retriever test failed."; exit 1; fi
restartPolicy: Never
21 changes: 21 additions & 0 deletions helm-charts/common/retriever-usvc/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,27 @@ resources: {}
# cpu: 100m
# memory: 128Mi

livenessProbe:
httpGet:
path: v1/health_check
port: retriever-usvc
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 24
readinessProbe:
httpGet:
path: v1/health_check
port: retriever-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
httpGet:
path: v1/health_check
port: retriever-usvc
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120

nodeSelector: {}

tolerations: []
Expand Down
46 changes: 0 additions & 46 deletions helm-charts/common/tgi/nv-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,56 +5,10 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

port: 2080

image:
repository: ghcr.io/huggingface/text-generation-inference
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "2.0"

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

podAnnotations: {}

podSecurityContext: {}
# fsGroup: 2000

securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault

service:
type: ClusterIP

resources:
limits:
nvidia.com/gpu: 1

nodeSelector: {}

tolerations: []

affinity: {}

LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

global:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
# comment out modeluseHostPath if you want to download the model from huggingface
modelUseHostPath: /mnt/opea-models
20 changes: 20 additions & 0 deletions manifests/common/retriever-usvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,26 @@ spec:
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: retriever-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: retriever-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: retriever-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
Expand Down
20 changes: 17 additions & 3 deletions manifests/common/tgi_nv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@ metadata:
data:
MODEL_ID: "Intel/neural-chat-7b-v3-3"
PORT: "2080"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy: ""
https_proxy: ""
no_proxy: ""
Expand Down Expand Up @@ -102,6 +99,23 @@ spec:
- name: http
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
startupProbe:
failureThreshold: 120
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
resources:
limits:
nvidia.com/gpu: 1
Expand Down

0 comments on commit af47b3c

Please sign in to comment.