opea-project · kevinintel · Sep 30, 2024 · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024
@@ -10,29 +10,27 @@ This document guides you through deploying ChatQnA pipelines using Helm charts.
 # on k8s-master node
 cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
 
-# Replace <your token> with your actual Hugging Face token and run the following command:
-HUGGINGFACE_TOKEN=<your token>
-find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
-
-# Replace the following placeholders with the desired model IDs:
-LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
-RERANK_MODEL_ID=BAAI/bge-reranker-base
-find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
-find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
-find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
-
+# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
+# vim customize.yaml
+HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
 ```
 
-### ChatQnA Installation
+### Deploy your ChatQnA
 
 ```bash
 # Deploy a ChatQnA pipeline using the specified YAML configuration.
 # To deploy with different configurations, simply provide a different YAML file.
-helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
-
-# Tips: To display rendered manifests according to the given yaml.
-helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
+helm install chatqna helm_charts/ -f customize.yaml
 ```
 
 Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
+
+## Customize your own ChatQnA pipelines. (Optional)
+
+There are two yaml configs you can specify.
+
+- customize.yaml
+  This file can specify image names, the number of replicas and CPU cores to manage your pods.
+
+- values.yaml
+  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
@@ -0,0 +1,71 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+podSpecs:
+  - name: chatqna-backend-server-deploy
+    spec:
+      image_name: opea/chatqna-no-wrapper
+      image_tag: latest
+      replicas: 2
+      resources:
+        limits:
+          cpu: "8"
+          memory: "8000Mi"
+        requests:
+          cpu: "8"
+          memory: "8000Mi"
+
+  - name: embedding-dependency-deploy
+    spec:
+      image_name: ghcr.io/huggingface/text-embeddings-inference
+      image_tag: cpu-1.5
+      replicas: 1
+      resources:
+        limits:
+          cpu: "80"
+          memory: "20000Mi"
+        requests:
+          cpu: "80"
+          memory: "20000Mi"
+
+  - name: reranking-dependency-deploy
+    spec:
+      image_name: opea/tei-gaudi
+      image_tag: latest
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: llm-dependency-deploy
+    spec:
+      image_name: ghcr.io/huggingface/tgi-gaudi
+      image_tag: 2.0.4
+      replicas: 7
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: dataprep-deploy
+    spec:
+      image_name: opea/dataprep-redis
+      image_tag: latest
+      replicas: 1
+
+  - name: vector-db
+    spec:
+      image_name: redis/redis-stack
+      image_tag: 7.2.0-v9
+      replicas: 1
+
+  - name: retriever-deploy
+    spec:
+      image_name: opea/retriever-redis
+      image_tag: latest
+      replicas: 2
+      resources:
+        requests:
+          cpu: "4"
+          memory: "4000Mi"
@@ -8,18 +8,18 @@ metadata:
   namespace: default
 data:
   EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
-  EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }}
-  HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
-  INDEX_NAME: {{ .Values.config.INDEX_NAME }}
+  EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  INDEX_NAME: rag-redis
   LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
-  LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }}
-  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
-  REDIS_URL: {{ .Values.config.REDIS_URL }}
+  LLM_SERVER_HOST_IP: llm-dependency-svc
+  NODE_SELECTOR: chatqna-opea
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
   RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
-  RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }}
-  RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }}
-  TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }}
-  TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }}
-  TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }}
-  TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }}
+  RERANK_SERVER_HOST_IP: reranking-dependency-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
 ---