From 9ab8de026af52f854e758e2486c2f05ff2d1de3d Mon Sep 17 00:00:00 2001 From: dolpher Date: Thu, 20 Jun 2024 17:00:44 +0800 Subject: [PATCH] Add DocSum llm service manifests (#111) DocSum manifest support. Regenerate the CodeTrans yaml file. Signed-off-by: Dolpher Du --- .../workflows/scripts/e2e/manifest_test.sh | 62 +++++- helm-charts/common/llm-uservice/values.yaml | 1 - manifests/CodeTrans/README.md | 23 ++- manifests/CodeTrans/gaudi/llm.yaml | 56 ++++-- manifests/CodeTrans/xeon/llm.yaml | 56 ++++-- manifests/DocSum/README.md | 117 +++-------- manifests/DocSum/gaudi/llm.yaml | 182 ++++++++++++++++++ .../DocSum/manifests/backend-service.yaml | 58 ------ .../inference-serving-tgi-gaudi.yaml | 77 -------- .../manifests/inference-serving-tgi.yaml | 69 ------- manifests/DocSum/manifests/web-ui.yaml | 54 ------ manifests/DocSum/xeon/llm.yaml | 181 +++++++++++++++++ 12 files changed, 548 insertions(+), 388 deletions(-) create mode 100644 manifests/DocSum/gaudi/llm.yaml delete mode 100644 manifests/DocSum/manifests/backend-service.yaml delete mode 100644 manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml delete mode 100644 manifests/DocSum/manifests/inference-serving-tgi.yaml delete mode 100644 manifests/DocSum/manifests/web-ui.yaml create mode 100644 manifests/DocSum/xeon/llm.yaml diff --git a/.github/workflows/scripts/e2e/manifest_test.sh b/.github/workflows/scripts/e2e/manifest_test.sh index 715acd5f9..e510f9f2b 100755 --- a/.github/workflows/scripts/e2e/manifest_test.sh +++ b/.github/workflows/scripts/e2e/manifest_test.sh @@ -9,6 +9,16 @@ MOUNT_DIR=/home/$USER_ID/charts-mnt # IMAGE_REPO is $OPEA_IMAGE_REPO, or else "" IMAGE_REPO=${OPEA_IMAGE_REPO:-""} +function init_docsum() { + # executed under path manifest/docsum/xeon + # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT" + find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \; + # replace the repository "image: opea/*" with "image: ${IMAGE_REPO}opea/" + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \; + # set huggingface token + find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; +} + function init_codetrans() { # executed under path manifest/codetrans/xeon # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT" @@ -29,6 +39,11 @@ function init_codegen() { find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; } +function install_docsum { + echo "namespace is $NAMESPACE" + kubectl apply -f . -n $NAMESPACE +} + function install_codetrans { echo "namespace is $NAMESPACE" kubectl apply -f . -n $NAMESPACE @@ -61,6 +76,35 @@ function install_chatqna { kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE } +function validate_docsum() { + ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}') + port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}') + echo "try to curl http://${ip_address}:${port}/v1/chat/docsum..." + # Curl the DocSum LLM Service + curl http://${ip_address}:${port}/v1/chat/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' > $LOG_PATH/curl_docsum.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "LLM for docsum failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOG_PATH/curl_docsum.log ]] && \ + [[ $(grep -c "TEI" $LOG_PATH/curl_docsum.log) != 0 ]]; then + status=true + fi + + if [ $status == false ]; then + echo "Response check failed, please check the logs in artifacts!" + else + echo "Response check succeed!" + fi +} + function validate_codetrans() { ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}') port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}') @@ -158,6 +202,11 @@ if [ $# -eq 0 ]; then fi case "$1" in + init_docsum) + pushd manifests/DocSum/xeon + init_docsum + popd + ;; init_codetrans) pushd manifests/CodeTrans/xeon init_codetrans @@ -173,6 +222,12 @@ case "$1" in init_chatqna popd ;; + install_docsum) + pushd manifests/DocSum/xeon + NAMESPACE=$2 + install_docsum + popd + ;; install_codetrans) pushd manifests/CodeTrans/xeon NAMESPACE=$2 @@ -191,9 +246,14 @@ case "$1" in install_chatqna popd ;; + validate_docsum) + NAMESPACE=$2 + SERVICE_NAME=docsum-llm-uservice + validate_docsum + ;; validate_codetrans) NAMESPACE=$2 - SERVICE_NAME=llm-llm-uservice + SERVICE_NAME=codetrans-llm-uservice validate_codetrans ;; validate_codegen) diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml index 8bfb02412..884a95965 100644 --- a/helm-charts/common/llm-uservice/values.yaml +++ b/helm-charts/common/llm-uservice/values.yaml @@ -58,7 +58,6 @@ affinity: {} tgi: LLM_MODEL_ID: m-a-p/OpenCodeInterpreter-DS-6.7B # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B - port: 80 image: repository: ghcr.io/huggingface/text-generation-inference diff --git a/manifests/CodeTrans/README.md b/manifests/CodeTrans/README.md index f85230b5e..935ad18b8 100644 --- a/manifests/CodeTrans/README.md +++ b/manifests/CodeTrans/README.md @@ -3,13 +3,11 @@ > [NOTE] > The following values must be set before you can deploy: > HUGGINGFACEHUB_API_TOKEN -> You can also customize the "MODEL_ID" and "model-volume" -> The manifest llm.yaml is generated from helm chart. ## Deploy On Xeon ``` -cd GenAIExamples/CodeTrans/kubernetes/manifests/xeon +cd GenAIInfra/manifests/CodeTrans/xeon export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml kubectl apply -f llm.yaml @@ -18,7 +16,7 @@ kubectl apply -f llm.yaml ## Deploy On Gaudi ``` -cd GenAIExamples/CodeTrans/kubernetes/manifests/gaudi +cd GenAIInfra/manifests/CodeTrans/gaudi export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml kubectl apply -f llm.yaml @@ -30,8 +28,23 @@ Make sure all the pods are running, and restart the llm-xxxx pod if necessary. ``` kubectl get pods -curl http://llm-llm-uservice:9000/v1/chat/completions\ +curl http://codetrans-llm-uservice:9000/v1/chat/completions\ -X POST \ -d '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' \ -H 'Content-Type: application/json' ``` + +## Generate the llm file from helm chart + +The llm file is generated from llm-uservice helm chart automatically. + +Here is the exact command lines: + +``` +cd GenAIInfra/manifests/CodeTrans +export HF_TOKEN="insert-your-huggingface-token-here" +export MODELDIR="/mnt" +helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/values.yaml > xeon/llm.yaml +helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/gaudi-values.yaml > gaudi/llm.yaml + +``` diff --git a/manifests/CodeTrans/gaudi/llm.yaml b/manifests/CodeTrans/gaudi/llm.yaml index be54bfe0b..52aaa862e 100644 --- a/manifests/CodeTrans/gaudi/llm.yaml +++ b/manifests/CodeTrans/gaudi/llm.yaml @@ -6,11 +6,11 @@ apiVersion: v1 kind: Service metadata: - name: llm-tgi + name: codetrans-tgi labels: helm.sh/chart: tgi-0.1.0 app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.4" app.kubernetes.io/managed-by: Helm spec: @@ -22,16 +22,20 @@ spec: name: tgi selector: app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans --- +# Source: llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: Service metadata: - name: llm-llm-uservice + name: codetrans-llm-uservice labels: helm.sh/chart: llm-uservice-0.1.0 app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.0.0" app.kubernetes.io/managed-by: Helm spec: @@ -43,16 +47,20 @@ spec: name: llm-uservice selector: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans --- +# Source: llm-uservice/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: - name: llm-tgi + name: codetrans-tgi labels: helm.sh/chart: tgi-0.1.0 app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.4" app.kubernetes.io/managed-by: Helm spec: @@ -60,12 +68,12 @@ spec: selector: matchLabels: app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans template: metadata: labels: app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans spec: securityContext: {} @@ -76,6 +84,10 @@ spec: value: HuggingFaceH4/mistral-7b-grok - name: PORT value: "80" + - name: HUGGING_FACE_HUB_TOKEN + value: "insert-your-huggingface-token-here" + - name: HF_TOKEN + value: "insert-your-huggingface-token-here" - name: http_proxy value: - name: https_proxy @@ -102,14 +114,18 @@ spec: path: /mnt type: Directory --- +# Source: llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: - name: llm-llm-uservice + name: codetrans-llm-uservice labels: helm.sh/chart: llm-uservice-0.1.0 app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.0.0" app.kubernetes.io/managed-by: Helm spec: @@ -117,20 +133,20 @@ spec: selector: matchLabels: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans template: metadata: labels: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans spec: securityContext: {} containers: - - name: llm + - name: codetrans env: - name: TGI_LLM_ENDPOINT - value: "http://llm-tgi:80" + value: "http://codetrans-tgi" - name: HUGGINGFACEHUB_API_TOKEN value: "insert-your-huggingface-token-here" - name: http_proxy @@ -139,6 +155,12 @@ spec: value: - name: no_proxy value: + - name: LANGCHAIN_TRACING_V2 + value: "false" + - name: LANGCHAIN_API_KEY + value: insert-your-langchain-key-here + - name: LANGCHAIN_PROJECT + value: "opea-llm-service" securityContext: {} @@ -152,7 +174,7 @@ spec: exec: command: - curl - - http://llm-tgi:80 + - http://codetrans-tgi initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 120 diff --git a/manifests/CodeTrans/xeon/llm.yaml b/manifests/CodeTrans/xeon/llm.yaml index 6acb31d99..eab1cd025 100644 --- a/manifests/CodeTrans/xeon/llm.yaml +++ b/manifests/CodeTrans/xeon/llm.yaml @@ -6,11 +6,11 @@ apiVersion: v1 kind: Service metadata: - name: llm-tgi + name: codetrans-tgi labels: helm.sh/chart: tgi-0.1.0 app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.4" app.kubernetes.io/managed-by: Helm spec: @@ -22,16 +22,20 @@ spec: name: tgi selector: app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans --- +# Source: llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: Service metadata: - name: llm-llm-uservice + name: codetrans-llm-uservice labels: helm.sh/chart: llm-uservice-0.1.0 app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.0.0" app.kubernetes.io/managed-by: Helm spec: @@ -43,16 +47,20 @@ spec: name: llm-uservice selector: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans --- +# Source: llm-uservice/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: - name: llm-tgi + name: codetrans-tgi labels: helm.sh/chart: tgi-0.1.0 app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.4" app.kubernetes.io/managed-by: Helm spec: @@ -60,12 +68,12 @@ spec: selector: matchLabels: app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans template: metadata: labels: app.kubernetes.io/name: tgi - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans spec: securityContext: {} @@ -76,6 +84,10 @@ spec: value: HuggingFaceH4/mistral-7b-grok - name: PORT value: "80" + - name: HUGGING_FACE_HUB_TOKEN + value: "insert-your-huggingface-token-here" + - name: HF_TOKEN + value: "insert-your-huggingface-token-here" - name: http_proxy value: - name: https_proxy @@ -101,14 +113,18 @@ spec: path: /mnt type: Directory --- +# Source: llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: - name: llm-llm-uservice + name: codetrans-llm-uservice labels: helm.sh/chart: llm-uservice-0.1.0 app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "1.0.0" app.kubernetes.io/managed-by: Helm spec: @@ -116,20 +132,20 @@ spec: selector: matchLabels: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans template: metadata: labels: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: llm + app.kubernetes.io/instance: codetrans spec: securityContext: {} containers: - - name: llm + - name: codetrans env: - name: TGI_LLM_ENDPOINT - value: "http://llm-tgi:80" + value: "http://codetrans-tgi" - name: HUGGINGFACEHUB_API_TOKEN value: "insert-your-huggingface-token-here" - name: http_proxy @@ -138,6 +154,12 @@ spec: value: - name: no_proxy value: + - name: LANGCHAIN_TRACING_V2 + value: "false" + - name: LANGCHAIN_API_KEY + value: insert-your-langchain-key-here + - name: LANGCHAIN_PROJECT + value: "opea-llm-service" securityContext: {} @@ -151,7 +173,7 @@ spec: exec: command: - curl - - http://llm-tgi:80 + - http://codetrans-tgi initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 120 diff --git a/manifests/DocSum/README.md b/manifests/DocSum/README.md index a1a026791..e4b04a4f2 100644 --- a/manifests/DocSum/README.md +++ b/manifests/DocSum/README.md @@ -1,111 +1,50 @@ -

Deploy DocSum in Kubernetes Cluster

- -## Prebuilt images - -You should have prebuilt images - -- TGI: ghcr.io/huggingface/text-generation-inference:1.4 -- TGI-Gaudi: ghcr.io/huggingface/tgi-gaudi:1.2.1 -- Doc_Summary: intel/gen-ai-examples:document-summarize -- UI: ${docker_registry}/gen-ai-examples/doc-sum-ui:v1.0 - -> [NOTE] -> Please refer this OPEA repo https://github.com/opea-project/GenAIExamples/tree/main/DocSum to build UI - -## Deploy Services by Yaml files(Option 1) +

Deploy DocSum llm-uservice in Kubernetes Cluster

> [NOTE] -> Be sure to set the ${http_proxy} and ${https_proxy} in each yaml file properly. -> Be sure to modify the image name in web-ui.yaml by your own value -> Be sure to modify the ${HUGGINGFACEHUB_API_TOKEN} in backend-service.yaml +> The following values must be set before you can deploy: +> HUGGINGFACEHUB_API_TOKEN -### 1. Deploy Inference Service +## Deploy On Xeon ``` -$ cd ${RepoPath}/manifests/DocSum/manifests +cd GenAIInfra/manifests/DocSum/xeon +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml +kubectl apply -f llm.yaml ``` -#### 1.1 (Option 1) Deploy TGI on Xeon - -If you don't have Habana hardware, you could used TGI instead of TGI-Gaudi to serve as inference service. +## Deploy On Gaudi ``` -# deloy tgi -$ kubectl apply -f inference-serving-tgi.yaml - -# verify tgi -$ tgi_svc_ip=`k get svc|grep tgi-deploy|awk '{print $3}'` -$ curl ${tgi_svc_ip}:8180/generate_stream -X POST -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json' --noproxy "*" -# the output should be like: -data:{"index":1,"token":{"id":89554,"text":" Deep","logprob":-0.9719473,"special":false},"generated_text":null,"details":null} - -data:{"index":2,"token":{"id":89950,"text":" Learning","logprob":-0.39028463,"special":false},"generated_text":null,"details":null} - -data:{"index":3,"token":{"id":632,"text":" is","logprob":-0.56862223,"special":false},"generated_text":null,"details":null} - -data:{"index":4,"token":{"id":267,"text":" a","logprob":-0.7765873,"special":false},"generated_text":null,"details":null} - -``` - -#### 1.2 (Option 2) Deploy TGI-Gaudi on Gaudi - +cd GenAIInfra/manifests/DocSum/gaudi +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml +kubectl apply -f llm.yaml ``` -# deloy tgi-gaudi -$ kubectl apply -f inference-serving-tgi-gaudi.yaml - -# verify tgi-gaudi -$ tgi_svc_ip=`k get svc|grep tgi-deploy|awk '{print $3}'` -$ curl ${tgi_svc_ip}:8180/generate_stream -X POST -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json' --noproxy "*" -# the output should be like: -data:{"index":1,"token":{"id":89554,"text":" Deep","logprob":-0.9719473,"special":false},"generated_text":null,"details":null} -data:{"index":2,"token":{"id":89950,"text":" Learning","logprob":-0.39028463,"special":false},"generated_text":null,"details":null} +## Verify llm Services -data:{"index":3,"token":{"id":632,"text":" is","logprob":-0.56862223,"special":false},"generated_text":null,"details":null} +Make sure all the pods are running, and restart the llm-xxxx pod if necessary. -data:{"index":4,"token":{"id":267,"text":" a","logprob":-0.7765873,"special":false},"generated_text":null,"details":null} ``` - -### 2. Deploy Document Summary Service - +kubectl get pods +curl http://docsum-llm-uservice:9000/v1/chat/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' ``` -# deloy doc summary backend service -$ kubectl apply -f backend-service.yaml -# verify doc summary backend service -$ docsum_svc_ip=`k get svc|grep doc-sum|awk '{print $3}'` -$ curl ${docsum_svc_ip}:8080/v1/text_summarize -X POST -H 'Content-Type: application/json' -d '{"text":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' --noproxy "*" -# the output should be like: -data: {"ops":[{"op":"replace","path":"","value":{"id":"3ec5836a-6715-4289-961e-4a0bcb5f5937","streamed_output":[],"final_output":null,"logs":{},"name":"MapReduceDocumentsChain","type":"chain"}}]} +## Generate the llm file from helm chart -data: {"ops":[{"op":"add","path":"/logs/LLMChain","value":{"id":"7c4116cd-00a1-4958-919f-b43ecb3ad515","name":"LLMChain","type":"chain","tags":[],"metadata":{},"start_time":"2024-04-15T08:11:25.573+00:00","streamed_output":[],"streamed_output_str":[],"final_output":null,"end_time":null}}]} +The llm file is generated from llm-uservice helm chart automatically. -data: {"ops":[{"op":"add","path":"/logs/HuggingFaceEndpoint","value":{"id":"a1032421-ee98-422d-83c5-6f8377640cc3","name":"HuggingFaceEndpoint","type":"llm","tags":[],"metadata":{},"start_time":"2024-04-15T08:11:25.576+00:00","streamed_output":[],"streamed_output_str":[],"final_output":null,"end_time":null}}]} - -data: {"ops":[{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output_str/-","value":"\n\n"},{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output/-","value":"\n\n"}]} - -data: {"ops":[{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output_str/-","value":"The"},{"op":"add","path":"/logs/HuggingFaceEndpoint/streamed_output/-","value":"The"}]} +Here is the exact command lines: ``` - -### 3. Deploy UI Service +cd GenAIInfra/manifests/DocSum +export HF_TOKEN="insert-your-huggingface-token-here" +export MODELDIR="/mnt" +helm template docsum ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-docsum-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" --values ../../helm-charts/common/llm-uservice/values.yaml > xeon/llm.yaml +helm template docsum ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-docsum-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" --values ../../helm-charts/common/llm-uservice/gaudi-values.yaml > gaudi/llm.yaml ``` -# deloy ui service -$ kubectl apply -f web-ui.yaml - -# verify ui service -$ ui_svc_ip=`k get svc|grep ui-deploy|awk '{print $3}'` -$ curl ${ui_svc_ip}:5176 --noproxy "*" -``` - -### 4. Access the UI - -1. Be sure you could access the ui service by nodeport from your local pc - http://${nodeip}:30176 -2. Be sure you could access the doc summary service by nodeport from your local pc - http://${nodeip}:30123 - -## Deploy Services by helm chart(Option 2) - -Under Construction ... diff --git a/manifests/DocSum/gaudi/llm.yaml b/manifests/DocSum/gaudi/llm.yaml new file mode 100644 index 000000000..a9a952940 --- /dev/null +++ b/manifests/DocSum/gaudi/llm.yaml @@ -0,0 +1,182 @@ +--- +# Source: llm-uservice/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: docsum-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum +--- +# Source: llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: docsum-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum +--- +# Source: llm-uservice/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: docsum-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + spec: + securityContext: + {} + containers: + - name: tgi + env: + - name: MODEL_ID + value: Intel/neural-chat-7b-v3-3 + - name: PORT + value: "80" + - name: HUGGING_FACE_HUB_TOKEN + value: "insert-your-huggingface-token-here" + - name: HF_TOKEN + value: "insert-your-huggingface-token-here" + - name: http_proxy + value: + - name: https_proxy + value: + - name: no_proxy + value: + securityContext: + {} + image: "ghcr.io/huggingface/tgi-gaudi:1.2.1" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + hostPath: + path: /mnt + type: Directory +--- +# Source: llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: docsum-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + spec: + securityContext: + {} + containers: + - name: docsum + env: + - name: TGI_LLM_ENDPOINT + value: "http://docsum-tgi" + - name: HUGGINGFACEHUB_API_TOKEN + value: "insert-your-huggingface-token-here" + - name: http_proxy + value: + - name: https_proxy + value: + - name: no_proxy + value: + - name: LANGCHAIN_TRACING_V2 + value: "false" + - name: LANGCHAIN_API_KEY + value: insert-your-langchain-key-here + - name: LANGCHAIN_PROJECT + value: "opea-llm-service" + + securityContext: + {} + image: "opea/llm-docsum-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + startupProbe: + exec: + command: + - curl + - http://docsum-tgi + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} diff --git a/manifests/DocSum/manifests/backend-service.yaml b/manifests/DocSum/manifests/backend-service.yaml deleted file mode 100644 index 6051be2b9..000000000 --- a/manifests/DocSum/manifests/backend-service.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: doc-sum - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: doc-sum - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: doc-sum - spec: - containers: - - env: - - name: http_proxy - value: ${http_proxy} - - name: https_proxy - value: ${https_proxy} - - name: HUGGINGFACEHUB_API_TOKEN - value: ${HUGGINGFACEHUB_API_TOKEN} - - name: TGI_ENDPOINT - value: http://tgi-deploy.default.svc.cluster.local:8180 - image: intel/gen-ai-examples:document-summarize - name: doc-sum - command: ["nohup", "python", "app/server.py", "&"] - ports: - - containerPort: 8000 - resources: - limits: - cpu: 8000m - memory: 26Gi - requests: - cpu: 8000m - memory: 26Gi - serviceAccountName: default - ---- -kind: Service -apiVersion: v1 -metadata: - name: doc-sum -spec: - type: NodePort - selector: - app: doc-sum - ports: - - name: http-service - port: 8080 - targetPort: 8000 - nodePort: 30123 diff --git a/manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml b/manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml deleted file mode 100644 index 35123a2a0..000000000 --- a/manifests/DocSum/manifests/inference-serving-tgi-gaudi.yaml +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tgi-gaudi-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: tgi-gaudi-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: tgi-gaudi-deploy - spec: - hostIPC: true - containers: - - env: - - name: http_proxy - value: ${http_proxy} - - name: https_proxy - value: ${https_proxy} - - name: runtime - value: "habana" - - name: HABANA_VISIBLE_DEVICES - value: "0" - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: "none" - - name: model-id - value: "HuggingFaceH4/zephyr-7b-beta" - image: ghcr.io/huggingface/tgi-gaudi:1.2.1 - name: tgi-gaudi-deploy - volumeMounts: - - mountPath: /data - name: model-volume - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 3 - memory: 409Gi - # limits: - # cpu: 8000m - # memory: 26Gi - # requests: - # cpu: 8000m - # memory: 26Gi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - # directory location on host - path: /home/cloud/yulu/data - # this field is optional - type: Directory ---- -kind: Service -apiVersion: v1 -metadata: - name: tgi-gaudi-deploy - labels: - app: tgi-gaudi-deploy -spec: - type: NodePort - selector: - app: tgi-gaudi-deploy - ports: - - name: metric-service - port: 8080 - targetPort: 80 - nodePort: 30031 diff --git a/manifests/DocSum/manifests/inference-serving-tgi.yaml b/manifests/DocSum/manifests/inference-serving-tgi.yaml deleted file mode 100644 index 90ef0e731..000000000 --- a/manifests/DocSum/manifests/inference-serving-tgi.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tgi-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: tgi-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: tgi-deploy - spec: - containers: - - env: - - name: http_proxy - value: ${http_proxy} - - name: https_proxy - value: ${https_proxy} - - name: shm-size - value: "1g" - - name: model-id - value: "HuggingFaceH4/zephyr-7b-beta" - - name: disable-custom-kernels - value: "" - image: ghcr.io/huggingface/text-generation-inference:1.4 - name: tgi-deploy-demo - volumeMounts: - - mountPath: /data - name: model-volume - ports: - - containerPort: 8180 - resources: - limits: - cpu: 56000m - memory: 26Gi - requests: - cpu: 56000m - memory: 26Gi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - # directory location on host - path: /mnt/model - # this field is optional - type: Directory ---- -kind: Service -apiVersion: v1 -metadata: - name: tgi-deploy -spec: - type: NodePort - selector: - app: tgi-deploy - ports: - - name: service - port: 8180 - targetPort: 80 - nodePort: 30180 diff --git a/manifests/DocSum/manifests/web-ui.yaml b/manifests/DocSum/manifests/web-ui.yaml deleted file mode 100644 index d3341cb75..000000000 --- a/manifests/DocSum/manifests/web-ui.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ui-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: ui-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: ui-deploy - spec: - containers: - - env: - - name: BASIC_URL - value: "http://{node_ip}:30123/v1" - - name: SVC_PORT - value: "5176" - image: ${docker_registry}/gen-ai-examples/ui:v1.0 - name: ui-deploy-demo - command: ["npm", "run", "dev", "--", "--port", "5176", "--host"] - ports: - - containerPort: 5176 - resources: - limits: - cpu: 2000m - memory: 1Gi - requests: - cpu: 1000m - memory: 1Gi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: ui-deploy -spec: - type: NodePort - selector: - app: ui-deploy - ports: - - name: service - port: 5176 - targetPort: 5176 - nodePort: 30176 diff --git a/manifests/DocSum/xeon/llm.yaml b/manifests/DocSum/xeon/llm.yaml new file mode 100644 index 000000000..86a2bad29 --- /dev/null +++ b/manifests/DocSum/xeon/llm.yaml @@ -0,0 +1,181 @@ +--- +# Source: llm-uservice/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: docsum-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum +--- +# Source: llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: docsum-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum +--- +# Source: llm-uservice/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: docsum-tgi + labels: + helm.sh/chart: tgi-0.1.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.4" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: docsum + spec: + securityContext: + {} + containers: + - name: tgi + env: + - name: MODEL_ID + value: Intel/neural-chat-7b-v3-3 + - name: PORT + value: "80" + - name: HUGGING_FACE_HUB_TOKEN + value: "insert-your-huggingface-token-here" + - name: HF_TOKEN + value: "insert-your-huggingface-token-here" + - name: http_proxy + value: + - name: https_proxy + value: + - name: no_proxy + value: + securityContext: + {} + image: "ghcr.io/huggingface/text-generation-inference:1.4" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + {} + volumes: + - name: model-volume + hostPath: + path: /mnt + type: Directory +--- +# Source: llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: docsum-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.1.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: docsum + spec: + securityContext: + {} + containers: + - name: docsum + env: + - name: TGI_LLM_ENDPOINT + value: "http://docsum-tgi" + - name: HUGGINGFACEHUB_API_TOKEN + value: "insert-your-huggingface-token-here" + - name: http_proxy + value: + - name: https_proxy + value: + - name: no_proxy + value: + - name: LANGCHAIN_TRACING_V2 + value: "false" + - name: LANGCHAIN_API_KEY + value: insert-your-langchain-key-here + - name: LANGCHAIN_PROJECT + value: "opea-llm-service" + + securityContext: + {} + image: "opea/llm-docsum-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + startupProbe: + exec: + command: + - curl + - http://docsum-tgi + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {}