diff --git a/ChatQnA/benchmark/performance/helm_charts/README.md b/ChatQnA/benchmark/performance/helm_charts/README.md index 9813d8806d..f6df9ce4fe 100644 --- a/ChatQnA/benchmark/performance/helm_charts/README.md +++ b/ChatQnA/benchmark/performance/helm_charts/README.md @@ -10,29 +10,27 @@ This document guides you through deploying ChatQnA pipelines using Helm charts. # on k8s-master node cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts -# Replace with your actual Hugging Face token and run the following command: -HUGGINGFACE_TOKEN= -find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \; - -# Replace the following placeholders with the desired model IDs: -LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 -EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5 -RERANK_MODEL_ID=BAAI/bge-reranker-base -find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \; -find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \; -find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \; - +# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: +# vim customize.yaml +HUGGINGFACEHUB_API_TOKEN: hf_xxxxx ``` -### ChatQnA Installation +### Deploy your ChatQnA ```bash # Deploy a ChatQnA pipeline using the specified YAML configuration. # To deploy with different configurations, simply provide a different YAML file. -helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml - -# Tips: To display rendered manifests according to the given yaml. -helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml +helm install chatqna helm_charts/ -f customize.yaml ``` Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool. + +## Customize your own ChatQnA pipelines. (Optional) + +There are two yaml configs you can specify. + +- customize.yaml + This file can specify image names, the number of replicas and CPU cores to manage your pods. + +- values.yaml + This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes. diff --git a/ChatQnA/benchmark/performance/helm_charts/customize.yaml b/ChatQnA/benchmark/performance/helm_charts/customize.yaml new file mode 100644 index 0000000000..718dd2de80 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/customize.yaml @@ -0,0 +1,71 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + +podSpecs: + - name: chatqna-backend-server-deploy + spec: + image_name: opea/chatqna-no-wrapper + image_tag: latest + replicas: 2 + resources: + limits: + cpu: "8" + memory: "8000Mi" + requests: + cpu: "8" + memory: "8000Mi" + + - name: embedding-dependency-deploy + spec: + image_name: ghcr.io/huggingface/text-embeddings-inference + image_tag: cpu-1.5 + replicas: 1 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: reranking-dependency-deploy + spec: + image_name: opea/tei-gaudi + image_tag: latest + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 + + - name: llm-dependency-deploy + spec: + image_name: ghcr.io/huggingface/tgi-gaudi + image_tag: 2.0.4 + replicas: 7 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + spec: + image_name: opea/dataprep-redis + image_tag: latest + replicas: 1 + + - name: vector-db + spec: + image_name: redis/redis-stack + image_tag: 7.2.0-v9 + replicas: 1 + + - name: retriever-deploy + spec: + image_name: opea/retriever-redis + image_tag: latest + replicas: 2 + resources: + requests: + cpu: "4" + memory: "4000Mi" diff --git a/ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml b/ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml deleted file mode 100644 index 2a526dd482..0000000000 --- a/ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml +++ /dev/null @@ -1,237 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -config: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVER_HOST_IP: llm-dependency-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVER_HOST_IP: reranking-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - -deployments: - - name: chatqna-backend-server-deploy - spec: - image_name: opea/chatqna-no-wrapper - image_tag: latest - replicas: 1 - ports: - - containerPort: 8888 - - - name: dataprep-deploy - spec: - image_name: opea/dataprep-redis - image_tag: latest - replicas: 1 - ports: - - containerPort: 6007 - - - name: vector-db - spec: - image_name: redis/redis-stack - image_tag: 7.2.0-v9 - replicas: 1 - ports: - - containerPort: 6379 - - containerPort: 8001 - - - name: retriever-deploy - spec: - image_name: opea/retriever-redis - image_tag: latest - replicas: 1 - ports: - - containerPort: 7000 - - - name: embedding-dependency-deploy - spec: - image_name: ghcr.io/huggingface/text-embeddings-inference - image_tag: cpu-1.5 - replicas: 1 - ports: - - containerPort: 80 - args: - - name: "--model-id" - value: $(EMBEDDING_MODEL_ID) - - name: "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - - - name: reranking-dependency-deploy - spec: - image_name: opea/tei-gaudi - image_tag: latest - replicas: 1 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - - value: $(RERANK_MODEL_ID) - - name: "--auto-truncate" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: "512" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - - - name: llm-dependency-deploy - spec: - image_name: ghcr.io/huggingface/tgi-gaudi - image_tag: 2.0.4 - replicas: 7 - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - value: $(LLM_MODEL_ID) - - name: "--max-input-length" - value: "2048" - - name: "--max-total-tokens" - value: "4096" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - -services: - - name: chatqna-backend-server-svc - spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort - - - name: dataprep-svc - spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP - - - name: embedding-dependency-svc - spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP - - - name: llm-dependency-svc - spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP - - - name: reranking-dependency-svc - spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP - - - name: retriever-svc - spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP - - - name: vector-db - spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP diff --git a/ChatQnA/benchmark/performance/helm_charts/templates/configmap.yaml b/ChatQnA/benchmark/performance/helm_charts/templates/configmap.yaml index 5d1247a114..2ce795a1ef 100644 --- a/ChatQnA/benchmark/performance/helm_charts/templates/configmap.yaml +++ b/ChatQnA/benchmark/performance/helm_charts/templates/configmap.yaml @@ -8,18 +8,18 @@ metadata: namespace: default data: EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }} - EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }} - HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }} - INDEX_NAME: {{ .Values.config.INDEX_NAME }} + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} + INDEX_NAME: rag-redis LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} - LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }} - NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} - REDIS_URL: {{ .Values.config.REDIS_URL }} + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }} - RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }} - RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }} - TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }} - TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }} - TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }} - TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }} + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 --- diff --git a/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml b/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml index b33e3a1b0b..d751d7dfe5 100644 --- a/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml +++ b/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml @@ -1,14 +1,17 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +{{- $global := .Values }} {{- range $deployment := .Values.deployments }} +{{- range $podSpec := $global.podSpecs }} +{{- if eq $podSpec.name $deployment.name }} apiVersion: apps/v1 kind: Deployment metadata: name: {{ $deployment.name }} namespace: default spec: - replicas: {{ $deployment.spec.replicas }} + replicas: {{ $podSpec.spec.replicas }} selector: matchLabels: app: {{ $deployment.name }} @@ -43,9 +46,9 @@ spec: {{- end }} {{- end }} - image: {{ $deployment.spec.image_name }}:{{ $deployment.spec.image_tag }} + image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }} imagePullPolicy: IfNotPresent - name: {{ $deployment.name }} + name: {{ $podSpec.name }} {{- if $deployment.spec.ports }} ports: @@ -56,9 +59,10 @@ spec: {{- end }} {{- end }} - {{- if $deployment.spec.resources }} + + {{- if $podSpec.spec.resources }} resources: - {{- range $resourceType, $resource := $deployment.spec.resources }} + {{- range $resourceType, $resource := $podSpec.spec.resources }} {{ $resourceType }}: {{- range $limitType, $limit := $resource }} {{ $limitType }}: {{ $limit }} @@ -103,6 +107,7 @@ spec: {{- end }} {{- end }} - --- {{- end }} +{{- end }} +{{- end }} diff --git a/ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml b/ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml deleted file mode 100644 index 5163d7694e..0000000000 --- a/ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -config: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVER_HOST_IP: llm-dependency-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVER_HOST_IP: reranking-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - -deployments: - - name: chatqna-backend-server-deploy - spec: - image_name: opea/chatqna-no-wrapper - image_tag: latest - replicas: 2 - ports: - - containerPort: 8888 - resources: - limits: - cpu: "8" - memory: "8000Mi" - requests: - cpu: "8" - memory: "8000Mi" - - - name: dataprep-deploy - spec: - image_name: opea/dataprep-redis - image_tag: latest - replicas: 1 - ports: - - containerPort: 6007 - - - name: vector-db - spec: - image_name: redis/redis-stack - image_tag: 7.2.0-v9 - replicas: 1 - ports: - - containerPort: 6379 - - containerPort: 8001 - - - name: retriever-deploy - spec: - image_name: opea/retriever-redis - image_tag: latest - replicas: 2 - ports: - - containerPort: 7000 - resources: - requests: - cpu: "4" - memory: "4000Mi" - - - name: embedding-dependency-deploy - spec: - image_name: ghcr.io/huggingface/text-embeddings-inference - image_tag: cpu-1.5 - replicas: 1 - ports: - - containerPort: 80 - args: - - name: "--model-id" - value: $(EMBEDDING_MODEL_ID) - - name: "--auto-truncate" - resources: - limits: - cpu: "80" - memory: "20000Mi" - requests: - cpu: "80" - memory: "20000Mi" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - - - name: reranking-dependency-deploy - spec: - image_name: opea/tei-gaudi - image_tag: latest - replicas: 1 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - - value: $(RERANK_MODEL_ID) - - name: "--auto-truncate" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: "512" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - - - name: llm-dependency-deploy - spec: - image_name: ghcr.io/huggingface/tgi-gaudi - image_tag: 2.0.4 - replicas: 7 - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - value: $(LLM_MODEL_ID) - - name: "--max-input-length" - value: "1280" - - name: "--max-total-tokens" - value: "2048" - - name: "--max-batch-total-tokens" - value: "65536" - - name: "--max-batch-prefill-tokens" - value: "4096" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - -services: - - name: chatqna-backend-server-svc - spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort - - - name: dataprep-svc - spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP - - - name: embedding-dependency-svc - spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP - - - name: llm-dependency-svc - spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP - - - name: reranking-dependency-svc - spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP - - - name: retriever-svc - spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP - - - name: vector-db - spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP diff --git a/ChatQnA/benchmark/performance/helm_charts/values.yaml b/ChatQnA/benchmark/performance/helm_charts/values.yaml index 2a526dd482..7041e0e8f2 100644 --- a/ChatQnA/benchmark/performance/helm_charts/values.yaml +++ b/ChatQnA/benchmark/performance/helm_charts/values.yaml @@ -1,62 +1,37 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +namespace: default + config: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVER_HOST_IP: llm-dependency-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVER_HOST_IP: reranking-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 deployments: - name: chatqna-backend-server-deploy spec: - image_name: opea/chatqna-no-wrapper - image_tag: latest - replicas: 1 ports: - containerPort: 8888 - name: dataprep-deploy spec: - image_name: opea/dataprep-redis - image_tag: latest - replicas: 1 ports: - containerPort: 6007 - name: vector-db spec: - image_name: redis/redis-stack - image_tag: 7.2.0-v9 - replicas: 1 ports: - containerPort: 6379 - containerPort: 8001 - name: retriever-deploy spec: - image_name: opea/retriever-redis - image_tag: latest - replicas: 1 ports: - containerPort: 7000 - name: embedding-dependency-deploy spec: - image_name: ghcr.io/huggingface/text-embeddings-inference - image_tag: cpu-1.5 - replicas: 1 ports: - containerPort: 80 args: @@ -80,12 +55,6 @@ deployments: - name: reranking-dependency-deploy spec: - image_name: opea/tei-gaudi - image_tag: latest - replicas: 1 - resources: - limits: - habana.ai/gaudi: 1 args: - name: "--model-id" - value: $(RERANK_MODEL_ID) @@ -120,9 +89,6 @@ deployments: - name: llm-dependency-deploy spec: - image_name: ghcr.io/huggingface/tgi-gaudi - image_tag: 2.0.4 - replicas: 7 ports: - containerPort: 80 resources: