From 9ab8de026af52f854e758e2486c2f05ff2d1de3d Mon Sep 17 00:00:00 2001
From: dolpher <dolpher.du@intel.com>
Date: Thu, 20 Jun 2024 17:00:44 +0800
Subject: [PATCH] Add DocSum llm service manifests (#111)

DocSum manifest support.
Regenerate the CodeTrans yaml file.

Signed-off-by: Dolpher Du <Dolpher.Du@intel.com>
---
 .../workflows/scripts/e2e/manifest_test.sh    |  62 +++++-
 helm-charts/common/llm-uservice/values.yaml   |   1 -
 manifests/CodeTrans/README.md                 |  23 ++-
 manifests/CodeTrans/gaudi/llm.yaml            |  56 ++++--
 manifests/CodeTrans/xeon/llm.yaml             |  56 ++++--
 manifests/DocSum/README.md                    | 117 +++--------
 manifests/DocSum/gaudi/llm.yaml               | 182 ++++++++++++++++++
 .../DocSum/manifests/backend-service.yaml     |  58 ------
 .../inference-serving-tgi-gaudi.yaml          |  77 --------
 .../manifests/inference-serving-tgi.yaml      |  69 -------
 manifests/DocSum/manifests/web-ui.yaml        |  54 ------
 manifests/DocSum/xeon/llm.yaml                | 181 +++++++++++++++++
 12 files changed, 548 insertions(+), 388 deletions(-)
 create mode 100644 manifests/DocSum/gaudi/llm.yaml
 delete mode 100644 manifests/DocSum/manifests/backend-service.yaml
 delete mode 100644 manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml
 delete mode 100644 manifests/DocSum/manifests/inference-serving-tgi.yaml
 delete mode 100644 manifests/DocSum/manifests/web-ui.yaml
 create mode 100644 manifests/DocSum/xeon/llm.yaml

diff --git a/.github/workflows/scripts/e2e/manifest_test.sh b/.github/workflows/scripts/e2e/manifest_test.sh
index 715acd5f9..e510f9f2b 100755
--- a/.github/workflows/scripts/e2e/manifest_test.sh
+++ b/.github/workflows/scripts/e2e/manifest_test.sh
@@ -9,6 +9,16 @@ MOUNT_DIR=/home/$USER_ID/charts-mnt
 # IMAGE_REPO is $OPEA_IMAGE_REPO, or else ""
 IMAGE_REPO=${OPEA_IMAGE_REPO:-""}
 
+function init_docsum() {
+    # executed under path manifest/docsum/xeon
+    # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
+    find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
+    # replace the repository "image: opea/*" with "image: ${IMAGE_REPO}opea/"
+    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
+    # set huggingface token
+    find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
+}
+
 function init_codetrans() {
     # executed under path manifest/codetrans/xeon
     # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
@@ -29,6 +39,11 @@ function init_codegen() {
     find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
 }
 
+function install_docsum {
+    echo "namespace is $NAMESPACE"
+    kubectl apply -f . -n $NAMESPACE
+}
+
 function install_codetrans {
     echo "namespace is $NAMESPACE"
     kubectl apply -f . -n $NAMESPACE
@@ -61,6 +76,35 @@ function install_chatqna {
     kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE
 }
 
+function validate_docsum() {
+    ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
+    port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
+    echo "try to curl http://${ip_address}:${port}/v1/chat/docsum..."
+    # Curl the DocSum LLM Service
+    curl http://${ip_address}:${port}/v1/chat/docsum \
+      -X POST \
+      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
+      -H 'Content-Type: application/json' > $LOG_PATH/curl_docsum.log
+    exit_code=$?
+    if [ $exit_code -ne 0 ]; then
+        echo "LLM for docsum failed, please check the logs in ${LOG_PATH}!"
+        exit 1
+    fi
+
+    echo "Checking response results, make sure the output is reasonable. "
+    local status=false
+    if [[ -f $LOG_PATH/curl_docsum.log ]] && \
+    [[ $(grep -c "TEI" $LOG_PATH/curl_docsum.log) != 0 ]]; then
+        status=true
+    fi
+
+    if [ $status == false ]; then
+        echo "Response check failed, please check the logs in artifacts!"
+    else
+        echo "Response check succeed!"
+    fi
+}
+
 function validate_codetrans() {
     ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
     port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
@@ -158,6 +202,11 @@ if [ $# -eq 0 ]; then
 fi
 
 case "$1" in
+    init_docsum)
+        pushd manifests/DocSum/xeon
+        init_docsum
+        popd
+        ;;
     init_codetrans)
         pushd manifests/CodeTrans/xeon
         init_codetrans
@@ -173,6 +222,12 @@ case "$1" in
         init_chatqna
         popd
         ;;
+    install_docsum)
+        pushd manifests/DocSum/xeon
+        NAMESPACE=$2
+        install_docsum
+        popd
+        ;;
     install_codetrans)
         pushd manifests/CodeTrans/xeon
         NAMESPACE=$2
@@ -191,9 +246,14 @@ case "$1" in
         install_chatqna
         popd
         ;;
+    validate_docsum)
+        NAMESPACE=$2
+        SERVICE_NAME=docsum-llm-uservice
+        validate_docsum
+        ;;
     validate_codetrans)
         NAMESPACE=$2
-        SERVICE_NAME=llm-llm-uservice
+        SERVICE_NAME=codetrans-llm-uservice
         validate_codetrans
         ;;
     validate_codegen)
diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml
index 8bfb02412..884a95965 100644
--- a/helm-charts/common/llm-uservice/values.yaml
+++ b/helm-charts/common/llm-uservice/values.yaml
@@ -58,7 +58,6 @@ affinity: {}
 tgi:
   LLM_MODEL_ID: m-a-p/OpenCodeInterpreter-DS-6.7B
   # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
-  port: 80
 
   image:
     repository: ghcr.io/huggingface/text-generation-inference
diff --git a/manifests/CodeTrans/README.md b/manifests/CodeTrans/README.md
index f85230b5e..935ad18b8 100644
--- a/manifests/CodeTrans/README.md
+++ b/manifests/CodeTrans/README.md
@@ -3,13 +3,11 @@
 > [NOTE]
 > The following values must be set before you can deploy:
 > HUGGINGFACEHUB_API_TOKEN
-> You can also customize the "MODEL_ID" and "model-volume"
-> The manifest llm.yaml is generated from helm chart.
 
 ## Deploy On Xeon
 
 ```
-cd GenAIExamples/CodeTrans/kubernetes/manifests/xeon
+cd GenAIInfra/manifests/CodeTrans/xeon
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
 kubectl apply -f llm.yaml
@@ -18,7 +16,7 @@ kubectl apply -f llm.yaml
 ## Deploy On Gaudi
 
 ```
-cd GenAIExamples/CodeTrans/kubernetes/manifests/gaudi
+cd GenAIInfra/manifests/CodeTrans/gaudi
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
 kubectl apply -f llm.yaml
@@ -30,8 +28,23 @@ Make sure all the pods are running, and restart the llm-xxxx pod if necessary.
 
 ```
 kubectl get pods
-curl http://llm-llm-uservice:9000/v1/chat/completions\
+curl http://codetrans-llm-uservice:9000/v1/chat/completions\
   -X POST \
   -d '{"query":"    ### System: Please translate the following Golang codes into  Python codes.    ### Original codes:    '\'''\'''\''Golang    \npackage main\n\nimport \"fmt\"\nfunc main() {\n    fmt.Println(\"Hello, World!\");\n    '\'''\'''\''    ### Translated codes:"}' \
   -H 'Content-Type: application/json'
 ```
+
+## Generate the llm file from helm chart
+
+The llm file is generated from llm-uservice helm chart automatically.
+
+Here is the exact command lines:
+
+```
+cd GenAIInfra/manifests/CodeTrans
+export HF_TOKEN="insert-your-huggingface-token-here"
+export MODELDIR="/mnt"
+helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/values.yaml > xeon/llm.yaml
+helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/gaudi-values.yaml > gaudi/llm.yaml
+
+```
diff --git a/manifests/CodeTrans/gaudi/llm.yaml b/manifests/CodeTrans/gaudi/llm.yaml
index be54bfe0b..52aaa862e 100644
--- a/manifests/CodeTrans/gaudi/llm.yaml
+++ b/manifests/CodeTrans/gaudi/llm.yaml
@@ -6,11 +6,11 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: llm-tgi
+  name: codetrans-tgi
   labels:
     helm.sh/chart: tgi-0.1.0
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -22,16 +22,20 @@ spec:
       name: tgi
   selector:
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
 ---
+# Source: llm-uservice/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: Service
 metadata:
-  name: llm-llm-uservice
+  name: codetrans-llm-uservice
   labels:
     helm.sh/chart: llm-uservice-0.1.0
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.0.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -43,16 +47,20 @@ spec:
       name: llm-uservice
   selector:
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
 ---
+# Source: llm-uservice/charts/tgi/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-tgi
+  name: codetrans-tgi
   labels:
     helm.sh/chart: tgi-0.1.0
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -60,12 +68,12 @@ spec:
   selector:
     matchLabels:
       app.kubernetes.io/name: tgi
-      app.kubernetes.io/instance: llm
+      app.kubernetes.io/instance: codetrans
   template:
     metadata:
       labels:
         app.kubernetes.io/name: tgi
-        app.kubernetes.io/instance: llm
+        app.kubernetes.io/instance: codetrans
     spec:
       securityContext:
         {}
@@ -76,6 +84,10 @@ spec:
               value: HuggingFaceH4/mistral-7b-grok
             - name: PORT
               value: "80"
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: HF_TOKEN
+              value: "insert-your-huggingface-token-here"
             - name: http_proxy
               value:
             - name: https_proxy
@@ -102,14 +114,18 @@ spec:
             path: /mnt
             type: Directory
 ---
+# Source: llm-uservice/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-llm-uservice
+  name: codetrans-llm-uservice
   labels:
     helm.sh/chart: llm-uservice-0.1.0
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.0.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -117,20 +133,20 @@ spec:
   selector:
     matchLabels:
       app.kubernetes.io/name: llm-uservice
-      app.kubernetes.io/instance: llm
+      app.kubernetes.io/instance: codetrans
   template:
     metadata:
       labels:
         app.kubernetes.io/name: llm-uservice
-        app.kubernetes.io/instance: llm
+        app.kubernetes.io/instance: codetrans
     spec:
       securityContext:
         {}
       containers:
-        - name: llm
+        - name: codetrans
           env:
             - name: TGI_LLM_ENDPOINT
-              value: "http://llm-tgi:80"
+              value: "http://codetrans-tgi"
             - name: HUGGINGFACEHUB_API_TOKEN
               value: "insert-your-huggingface-token-here"
             - name: http_proxy
@@ -139,6 +155,12 @@ spec:
               value:
             - name: no_proxy
               value:
+            - name: LANGCHAIN_TRACING_V2
+              value: "false"
+            - name: LANGCHAIN_API_KEY
+              value: insert-your-langchain-key-here
+            - name: LANGCHAIN_PROJECT
+              value: "opea-llm-service"
 
           securityContext:
             {}
@@ -152,7 +174,7 @@ spec:
             exec:
               command:
               - curl
-              - http://llm-tgi:80
+              - http://codetrans-tgi
             initialDelaySeconds: 5
             periodSeconds: 5
             failureThreshold: 120
diff --git a/manifests/CodeTrans/xeon/llm.yaml b/manifests/CodeTrans/xeon/llm.yaml
index 6acb31d99..eab1cd025 100644
--- a/manifests/CodeTrans/xeon/llm.yaml
+++ b/manifests/CodeTrans/xeon/llm.yaml
@@ -6,11 +6,11 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: llm-tgi
+  name: codetrans-tgi
   labels:
     helm.sh/chart: tgi-0.1.0
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -22,16 +22,20 @@ spec:
       name: tgi
   selector:
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
 ---
+# Source: llm-uservice/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: Service
 metadata:
-  name: llm-llm-uservice
+  name: codetrans-llm-uservice
   labels:
     helm.sh/chart: llm-uservice-0.1.0
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.0.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -43,16 +47,20 @@ spec:
       name: llm-uservice
   selector:
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
 ---
+# Source: llm-uservice/charts/tgi/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-tgi
+  name: codetrans-tgi
   labels:
     helm.sh/chart: tgi-0.1.0
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -60,12 +68,12 @@ spec:
   selector:
     matchLabels:
       app.kubernetes.io/name: tgi
-      app.kubernetes.io/instance: llm
+      app.kubernetes.io/instance: codetrans
   template:
     metadata:
       labels:
         app.kubernetes.io/name: tgi
-        app.kubernetes.io/instance: llm
+        app.kubernetes.io/instance: codetrans
     spec:
       securityContext:
         {}
@@ -76,6 +84,10 @@ spec:
               value: HuggingFaceH4/mistral-7b-grok
             - name: PORT
               value: "80"
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: HF_TOKEN
+              value: "insert-your-huggingface-token-here"
             - name: http_proxy
               value:
             - name: https_proxy
@@ -101,14 +113,18 @@ spec:
             path: /mnt
             type: Directory
 ---
+# Source: llm-uservice/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-llm-uservice
+  name: codetrans-llm-uservice
   labels:
     helm.sh/chart: llm-uservice-0.1.0
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.0.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -116,20 +132,20 @@ spec:
   selector:
     matchLabels:
       app.kubernetes.io/name: llm-uservice
-      app.kubernetes.io/instance: llm
+      app.kubernetes.io/instance: codetrans
   template:
     metadata:
       labels:
         app.kubernetes.io/name: llm-uservice
-        app.kubernetes.io/instance: llm
+        app.kubernetes.io/instance: codetrans
     spec:
       securityContext:
         {}
       containers:
-        - name: llm
+        - name: codetrans
           env:
             - name: TGI_LLM_ENDPOINT
-              value: "http://llm-tgi:80"
+              value: "http://codetrans-tgi"
             - name: HUGGINGFACEHUB_API_TOKEN
               value: "insert-your-huggingface-token-here"
             - name: http_proxy
@@ -138,6 +154,12 @@ spec:
               value:
             - name: no_proxy
               value:
+            - name: LANGCHAIN_TRACING_V2
+              value: "false"
+            - name: LANGCHAIN_API_KEY
+              value: insert-your-langchain-key-here
+            - name: LANGCHAIN_PROJECT
+              value: "opea-llm-service"
 
           securityContext:
             {}
@@ -151,7 +173,7 @@ spec:
             exec:
               command:
               - curl
-              - http://llm-tgi:80
+              - http://codetrans-tgi
             initialDelaySeconds: 5
             periodSeconds: 5
             failureThreshold: 120
diff --git a/manifests/DocSum/README.md b/manifests/DocSum/README.md
index a1a026791..e4b04a4f2 100644
--- a/manifests/DocSum/README.md
+++ b/manifests/DocSum/README.md
@@ -1,111 +1,50 @@
-<h1 align="center" id="title">Deploy DocSum in Kubernetes Cluster</h1>
-
-## Prebuilt images
-
-You should have prebuilt images
-
-- TGI: ghcr.io/huggingface/text-generation-inference:1.4
-- TGI-Gaudi: ghcr.io/huggingface/tgi-gaudi:1.2.1
-- Doc_Summary: intel/gen-ai-examples:document-summarize
-- UI: ${docker_registry}/gen-ai-examples/doc-sum-ui:v1.0
-
-> [NOTE]  
-> Please refer this OPEA repo https://github.com/opea-project/GenAIExamples/tree/main/DocSum to build UI
-
-## Deploy Services by Yaml files(Option 1)
+<h1 align="center" id="title">Deploy DocSum llm-uservice in Kubernetes Cluster</h1>
 
 > [NOTE]
-> Be sure to set the ${http_proxy} and ${https_proxy} in each yaml file properly.
-> Be sure to modify the image name in web-ui.yaml by your own value
-> Be sure to modify the ${HUGGINGFACEHUB_API_TOKEN} in backend-service.yaml
+> The following values must be set before you can deploy:
+> HUGGINGFACEHUB_API_TOKEN
 
-### 1. Deploy Inference Service
+## Deploy On Xeon
 
 ```
-$ cd ${RepoPath}/manifests/DocSum/manifests
+cd GenAIInfra/manifests/DocSum/xeon
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
+kubectl apply -f llm.yaml
 ```
 
-#### 1.1 (Option 1) Deploy TGI on Xeon
-
-If you don't have Habana hardware, you could used TGI instead of TGI-Gaudi to serve as inference service.
+## Deploy On Gaudi
 
 ```
-# deloy tgi
-$ kubectl apply -f inference-serving-tgi.yaml
-
-# verify tgi
-$ tgi_svc_ip=`k get svc|grep tgi-deploy|awk '{print $3}'`
-$ curl ${tgi_svc_ip}:8180/generate_stream -X POST -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json' --noproxy "*"
-# the output should be like:
-data:{"index":1,"token":{"id":89554,"text":" Deep","logprob":-0.9719473,"special":false},"generated_text":null,"details":null}
-
-data:{"index":2,"token":{"id":89950,"text":" Learning","logprob":-0.39028463,"special":false},"generated_text":null,"details":null}
-
-data:{"index":3,"token":{"id":632,"text":" is","logprob":-0.56862223,"special":false},"generated_text":null,"details":null}
-
-data:{"index":4,"token":{"id":267,"text":" a","logprob":-0.7765873,"special":false},"generated_text":null,"details":null}
-
-```
-
-#### 1.2 (Option 2) Deploy TGI-Gaudi on Gaudi
-
+cd GenAIInfra/manifests/DocSum/gaudi
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
+kubectl apply -f llm.yaml
 ```
-# deloy tgi-gaudi
-$ kubectl apply -f inference-serving-tgi-gaudi.yaml
-
-# verify tgi-gaudi
-$ tgi_svc_ip=`k get svc|grep tgi-deploy|awk '{print $3}'`
-$ curl ${tgi_svc_ip}:8180/generate_stream -X POST -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json' --noproxy "*"
-# the output should be like:
-data:{"index":1,"token":{"id":89554,"text":" Deep","logprob":-0.9719473,"special":false},"generated_text":null,"details":null}
 
-data:{"index":2,"token":{"id":89950,"text":" Learning","logprob":-0.39028463,"special":false},"generated_text":null,"details":null}
+## Verify llm Services
 
-data:{"index":3,"token":{"id":632,"text":" is","logprob":-0.56862223,"special":false},"generated_text":null,"details":null}
+Make sure all the pods are running, and restart the llm-xxxx pod if necessary.
 
-data:{"index":4,"token":{"id":267,"text":" a","logprob":-0.7765873,"special":false},"generated_text":null,"details":null}
 ```
-
-### 2. Deploy Document Summary Service
-
+kubectl get pods
+curl http://docsum-llm-uservice:9000/v1/chat/docsum \
+  -X POST \
+  -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
+  -H 'Content-Type: application/json'
 ```
-# deloy doc summary backend service
-$ kubectl apply -f backend-service.yaml
 
-# verify doc summary backend service
-$ docsum_svc_ip=`k get svc|grep doc-sum|awk '{print $3}'`
-$ curl ${docsum_svc_ip}:8080/v1/text_summarize     -X POST     -H 'Content-Type: application/json'     -d '{"text":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' --noproxy "*"
-# the output should be like:
-data: {"ops":[{"op":"replace","path":"","value":{"id":"3ec5836a-6715-4289-961e-4a0bcb5f5937","streamed_output":[],"final_output":null,"logs":{},"name":"MapReduceDocumentsChain","type":"chain"}}]}
+## Generate the llm file from helm chart
 
-data: {"ops":[{"op":"add","path":"/logs/LLMChain","value":{"id":"7c4116cd-00a1-4958-919f-b43ecb3ad515","name":"LLMChain","type":"chain","tags":[],"metadata":{},"start_time":"2024-04-15T08:11:25.573+00:00","streamed_output":[],"streamed_output_str":[],"final_output":null,"end_time":null}}]}
+The llm file is generated from llm-uservice helm chart automatically.
 
-data: {"ops":[{"op":"add","path":"/logs/HuggingFaceEndpoint","value":{"id":"a1032421-ee98-422d-83c5-6f8377640cc3","name":"HuggingFaceEndpoint","type":"llm","tags":[],"metadata":{},"start_time":"2024-04-15T08:11:25.576+00:00","streamed_output":[],"streamed_output_str":[],"final_output":null,"end_time":null}}]}
-
-data: {"ops":[{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output_str/-","value":"\n\n"},{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output/-","value":"\n\n"}]}
-
-data: {"ops":[{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output_str/-","value":"The"},{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output/-","value":"The"}]}
+Here is the exact command lines:
 
 ```
-
-### 3. Deploy UI Service
+cd GenAIInfra/manifests/DocSum
+export HF_TOKEN="insert-your-huggingface-token-here"
+export MODELDIR="/mnt"
+helm template docsum ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-docsum-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" --values ../../helm-charts/common/llm-uservice/values.yaml > xeon/llm.yaml
+helm template docsum ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-docsum-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" --values ../../helm-charts/common/llm-uservice/gaudi-values.yaml > gaudi/llm.yaml
 
 ```
-# deloy ui service
-$ kubectl apply -f web-ui.yaml
-
-# verify ui service
-$ ui_svc_ip=`k get svc|grep ui-deploy|awk '{print $3}'`
-$ curl ${ui_svc_ip}:5176 --noproxy "*"
-```
-
-### 4. Access the UI
-
-1. Be sure you could access the ui service by nodeport from your local pc
-   http://${nodeip}:30176
-2. Be sure you could access the doc summary service by nodeport from your local pc
-   http://${nodeip}:30123
-
-## Deploy Services by helm chart(Option 2)
-
-Under Construction ...
diff --git a/manifests/DocSum/gaudi/llm.yaml b/manifests/DocSum/gaudi/llm.yaml
new file mode 100644
index 000000000..a9a952940
--- /dev/null
+++ b/manifests/DocSum/gaudi/llm.yaml
@@ -0,0 +1,182 @@
+---
+# Source: llm-uservice/charts/tgi/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: docsum-tgi
+  labels:
+    helm.sh/chart: tgi-0.1.0
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.4"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 80
+      protocol: TCP
+      name: tgi
+  selector:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: docsum
+---
+# Source: llm-uservice/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: docsum-llm-uservice
+  labels:
+    helm.sh/chart: llm-uservice-0.1.0
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.0.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 9000
+      targetPort: 9000
+      protocol: TCP
+      name: llm-uservice
+  selector:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: docsum
+---
+# Source: llm-uservice/charts/tgi/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: docsum-tgi
+  labels:
+    helm.sh/chart: tgi-0.1.0
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.4"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: tgi
+      app.kubernetes.io/instance: docsum
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: tgi
+        app.kubernetes.io/instance: docsum
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: tgi
+          env:
+            - name: MODEL_ID
+              value: Intel/neural-chat-7b-v3-3
+            - name: PORT
+              value: "80"
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: HF_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: http_proxy
+              value:
+            - name: https_proxy
+              value:
+            - name: no_proxy
+              value:
+          securityContext:
+            {}
+          image: "ghcr.io/huggingface/tgi-gaudi:1.2.1"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+          ports:
+            - name: http
+              containerPort: 80
+              protocol: TCP
+          resources:
+            limits:
+              habana.ai/gaudi: 1
+      volumes:
+        - name: model-volume
+          hostPath:
+            path: /mnt
+            type: Directory
+---
+# Source: llm-uservice/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: docsum-llm-uservice
+  labels:
+    helm.sh/chart: llm-uservice-0.1.0
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.0.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: llm-uservice
+      app.kubernetes.io/instance: docsum
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: llm-uservice
+        app.kubernetes.io/instance: docsum
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: docsum
+          env:
+            - name: TGI_LLM_ENDPOINT
+              value: "http://docsum-tgi"
+            - name: HUGGINGFACEHUB_API_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: http_proxy
+              value:
+            - name: https_proxy
+              value:
+            - name: no_proxy
+              value:
+            - name: LANGCHAIN_TRACING_V2
+              value: "false"
+            - name: LANGCHAIN_API_KEY
+              value: insert-your-langchain-key-here
+            - name: LANGCHAIN_PROJECT
+              value: "opea-llm-service"
+
+          securityContext:
+            {}
+          image: "opea/llm-docsum-tgi:latest"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: llm-uservice
+              containerPort: 9000
+              protocol: TCP
+          startupProbe:
+            exec:
+              command:
+              - curl
+              - http://docsum-tgi
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            failureThreshold: 120
+          resources:
+            {}
diff --git a/manifests/DocSum/manifests/backend-service.yaml b/manifests/DocSum/manifests/backend-service.yaml
deleted file mode 100644
index 6051be2b9..000000000
--- a/manifests/DocSum/manifests/backend-service.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: doc-sum
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: doc-sum
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
-      labels:
-        app: doc-sum
-    spec:
-      containers:
-        - env:
-            - name: http_proxy
-              value: ${http_proxy}
-            - name: https_proxy
-              value: ${https_proxy}
-            - name: HUGGINGFACEHUB_API_TOKEN
-              value: ${HUGGINGFACEHUB_API_TOKEN}
-            - name: TGI_ENDPOINT
-              value: http://tgi-deploy.default.svc.cluster.local:8180
-          image: intel/gen-ai-examples:document-summarize
-          name: doc-sum
-          command: ["nohup", "python", "app/server.py", "&"]
-          ports:
-            - containerPort: 8000
-          resources:
-            limits:
-              cpu: 8000m
-              memory: 26Gi
-            requests:
-              cpu: 8000m
-              memory: 26Gi
-      serviceAccountName: default
-
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: doc-sum
-spec:
-  type: NodePort
-  selector:
-    app: doc-sum
-  ports:
-    - name: http-service
-      port: 8080
-      targetPort: 8000
-      nodePort: 30123
diff --git a/manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml b/manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml
deleted file mode 100644
index 35123a2a0..000000000
--- a/manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: tgi-gaudi-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: tgi-gaudi-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
-      labels:
-        app: tgi-gaudi-deploy
-    spec:
-      hostIPC: true
-      containers:
-        - env:
-            - name: http_proxy
-              value: ${http_proxy}
-            - name: https_proxy
-              value: ${https_proxy}
-            - name: runtime
-              value: "habana"
-            - name: HABANA_VISIBLE_DEVICES
-              value: "0"
-            - name: OMPI_MCA_btl_vader_single_copy_mechanism
-              value: "none"
-            - name: model-id
-              value: "HuggingFaceH4/zephyr-7b-beta"
-          image: ghcr.io/huggingface/tgi-gaudi:1.2.1
-          name: tgi-gaudi-deploy
-          volumeMounts:
-            - mountPath: /data
-              name: model-volume
-          ports:
-            - containerPort: 80
-          resources:
-            limits:
-              habana.ai/gaudi: 3
-              memory: 409Gi
-            # limits:
-            #   cpu: 8000m
-            #   memory: 26Gi
-            # requests:
-            #   cpu: 8000m
-            #   memory: 26Gi
-      serviceAccountName: default
-      volumes:
-        - name: model-volume
-          hostPath:
-            # directory location on host
-            path: /home/cloud/yulu/data
-            # this field is optional
-            type: Directory
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: tgi-gaudi-deploy
-  labels:
-    app: tgi-gaudi-deploy
-spec:
-  type: NodePort
-  selector:
-    app: tgi-gaudi-deploy
-  ports:
-    - name: metric-service
-      port: 8080
-      targetPort: 80
-      nodePort: 30031
diff --git a/manifests/DocSum/manifests/inference-serving-tgi.yaml b/manifests/DocSum/manifests/inference-serving-tgi.yaml
deleted file mode 100644
index 90ef0e731..000000000
--- a/manifests/DocSum/manifests/inference-serving-tgi.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: tgi-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: tgi-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
-      labels:
-        app: tgi-deploy
-    spec:
-      containers:
-        - env:
-            - name: http_proxy
-              value: ${http_proxy}
-            - name: https_proxy
-              value: ${https_proxy}
-            - name: shm-size
-              value: "1g"
-            - name: model-id
-              value: "HuggingFaceH4/zephyr-7b-beta"
-            - name: disable-custom-kernels
-              value: ""
-          image: ghcr.io/huggingface/text-generation-inference:1.4
-          name: tgi-deploy-demo
-          volumeMounts:
-            - mountPath: /data
-              name: model-volume
-          ports:
-            - containerPort: 8180
-          resources:
-            limits:
-              cpu: 56000m
-              memory: 26Gi
-            requests:
-              cpu: 56000m
-              memory: 26Gi
-      serviceAccountName: default
-      volumes:
-        - name: model-volume
-          hostPath:
-            # directory location on host
-            path: /mnt/model
-            # this field is optional
-            type: Directory
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: tgi-deploy
-spec:
-  type: NodePort
-  selector:
-    app: tgi-deploy
-  ports:
-    - name: service
-      port: 8180
-      targetPort: 80
-      nodePort: 30180
diff --git a/manifests/DocSum/manifests/web-ui.yaml b/manifests/DocSum/manifests/web-ui.yaml
deleted file mode 100644
index d3341cb75..000000000
--- a/manifests/DocSum/manifests/web-ui.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: ui-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: ui-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
-      labels:
-        app: ui-deploy
-    spec:
-      containers:
-        - env:
-            - name: BASIC_URL
-              value: "http://{node_ip}:30123/v1"
-            - name: SVC_PORT
-              value: "5176"
-          image: ${docker_registry}/gen-ai-examples/ui:v1.0
-          name: ui-deploy-demo
-          command: ["npm", "run", "dev", "--", "--port", "5176", "--host"]
-          ports:
-            - containerPort: 5176
-          resources:
-            limits:
-              cpu: 2000m
-              memory: 1Gi
-            requests:
-              cpu: 1000m
-              memory: 1Gi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: ui-deploy
-spec:
-  type: NodePort
-  selector:
-    app: ui-deploy
-  ports:
-    - name: service
-      port: 5176
-      targetPort: 5176
-      nodePort: 30176
diff --git a/manifests/DocSum/xeon/llm.yaml b/manifests/DocSum/xeon/llm.yaml
new file mode 100644
index 000000000..86a2bad29
--- /dev/null
+++ b/manifests/DocSum/xeon/llm.yaml
@@ -0,0 +1,181 @@
+---
+# Source: llm-uservice/charts/tgi/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: docsum-tgi
+  labels:
+    helm.sh/chart: tgi-0.1.0
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.4"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 80
+      protocol: TCP
+      name: tgi
+  selector:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: docsum
+---
+# Source: llm-uservice/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: docsum-llm-uservice
+  labels:
+    helm.sh/chart: llm-uservice-0.1.0
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.0.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 9000
+      targetPort: 9000
+      protocol: TCP
+      name: llm-uservice
+  selector:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: docsum
+---
+# Source: llm-uservice/charts/tgi/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: docsum-tgi
+  labels:
+    helm.sh/chart: tgi-0.1.0
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.4"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: tgi
+      app.kubernetes.io/instance: docsum
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: tgi
+        app.kubernetes.io/instance: docsum
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: tgi
+          env:
+            - name: MODEL_ID
+              value: Intel/neural-chat-7b-v3-3
+            - name: PORT
+              value: "80"
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: HF_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: http_proxy
+              value:
+            - name: https_proxy
+              value:
+            - name: no_proxy
+              value:
+          securityContext:
+            {}
+          image: "ghcr.io/huggingface/text-generation-inference:1.4"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+          ports:
+            - name: http
+              containerPort: 80
+              protocol: TCP
+          resources:
+            {}
+      volumes:
+        - name: model-volume
+          hostPath:
+            path: /mnt
+            type: Directory
+---
+# Source: llm-uservice/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: docsum-llm-uservice
+  labels:
+    helm.sh/chart: llm-uservice-0.1.0
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: docsum
+    app.kubernetes.io/version: "1.0.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: llm-uservice
+      app.kubernetes.io/instance: docsum
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: llm-uservice
+        app.kubernetes.io/instance: docsum
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: docsum
+          env:
+            - name: TGI_LLM_ENDPOINT
+              value: "http://docsum-tgi"
+            - name: HUGGINGFACEHUB_API_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: http_proxy
+              value:
+            - name: https_proxy
+              value:
+            - name: no_proxy
+              value:
+            - name: LANGCHAIN_TRACING_V2
+              value: "false"
+            - name: LANGCHAIN_API_KEY
+              value: insert-your-langchain-key-here
+            - name: LANGCHAIN_PROJECT
+              value: "opea-llm-service"
+
+          securityContext:
+            {}
+          image: "opea/llm-docsum-tgi:latest"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: llm-uservice
+              containerPort: 9000
+              protocol: TCP
+          startupProbe:
+            exec:
+              command:
+              - curl
+              - http://docsum-tgi
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            failureThreshold: 120
+          resources:
+            {}