-
Notifications
You must be signed in to change notification settings - Fork 200
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add e2e test for example of SearchQnA in GenAIExample.
Signed-off-by: zhlsunshine <[email protected]>
- Loading branch information
1 parent
21b7d11
commit 2cd9073
Showing
5 changed files
with
362 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
<h1 align="center" id="title">Deploy Translation in a Kubernetes Cluster</h1> | ||
|
||
This document outlines the deployment process for a Code Generation (Translation) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines. | ||
|
||
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install. | ||
|
||
If you have only Intel Xeon machines you could use the translation_xeon.yaml file or if you have a Gaudi cluster you could use translation_gaudi.yaml | ||
In the below example we illustrate on Xeon. | ||
|
||
## Deploy the RAG application | ||
|
||
1. Create the desired namespace if it does not already exist and deploy the application | ||
```bash | ||
export APP_NAMESPACE=CT | ||
kubectl create ns $APP_NAMESPACE | ||
sed -i "s|namespace: translation|namespace: $APP_NAMESPACE|g" ./translation_xeon.yaml | ||
kubectl apply -f ./translation_xeon.yaml | ||
``` | ||
|
||
2. Check if the application is up and ready | ||
```bash | ||
kubectl get pods -n $APP_NAMESPACE | ||
``` | ||
|
||
3. Deploy a client pod for testing | ||
```bash | ||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity | ||
``` | ||
|
||
4. Check that client pod is ready | ||
```bash | ||
kubectl get pods -n $APP_NAMESPACE | ||
``` | ||
|
||
5. Send request to application | ||
```bash | ||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) | ||
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='translation')].status.accessUrl}") | ||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: gmc.opea.io/v1alpha3 | ||
kind: GMConnector | ||
metadata: | ||
labels: | ||
app.kubernetes.io/name: gmconnector | ||
app.kubernetes.io/managed-by: kustomize | ||
gmc/platform: gaudi | ||
name: translation | ||
namespace: translation | ||
spec: | ||
routerConfig: | ||
name: router | ||
serviceName: router-service | ||
nodes: | ||
root: | ||
routerType: Sequence | ||
steps: | ||
- name: Llm | ||
data: $response | ||
internalService: | ||
serviceName: llm-service | ||
config: | ||
endpoint: /v1/chat/completions | ||
TGI_LLM_ENDPOINT: tgi-gaudi-svc | ||
- name: TgiGaudi | ||
internalService: | ||
serviceName: tgi-gaudi-svc | ||
config: | ||
MODEL_ID: haoranxu/ALMA-13B | ||
endpoint: /generate | ||
isDownstreamService: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: gmc.opea.io/v1alpha3 | ||
kind: GMConnector | ||
metadata: | ||
labels: | ||
app.kubernetes.io/name: gmconnector | ||
app.kubernetes.io/managed-by: kustomize | ||
gmc/platform: xeon | ||
name: translation | ||
namespace: translation | ||
spec: | ||
routerConfig: | ||
name: router | ||
serviceName: router-service | ||
nodes: | ||
root: | ||
routerType: Sequence | ||
steps: | ||
- name: Llm | ||
data: $response | ||
internalService: | ||
serviceName: llm-service | ||
config: | ||
endpoint: /v1/chat/completions | ||
TGI_LLM_ENDPOINT: tgi-service | ||
- name: Tgi | ||
internalService: | ||
serviceName: tgi-service | ||
config: | ||
MODEL_ID: haoranxu/ALMA-13B | ||
endpoint: /generate | ||
isDownstreamService: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
#!/bin/bash | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set -xe | ||
USER_ID=$(whoami) | ||
LOG_PATH=/home/$(whoami)/logs | ||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub | ||
IMAGE_REPO=${IMAGE_REPO:-} | ||
|
||
function install_translation() { | ||
kubectl create ns $APP_NAMESPACE | ||
sed -i "s|namespace: translation|namespace: $APP_NAMESPACE|g" ./translation_gaudi.yaml | ||
kubectl apply -f ./translation_gaudi.yaml | ||
|
||
# Wait until the router service is ready | ||
echo "Waiting for the translation router service to be ready..." | ||
wait_until_pod_ready "translation router" $APP_NAMESPACE "router-service" | ||
output=$(kubectl get pods -n $APP_NAMESPACE) | ||
echo $output | ||
} | ||
|
||
function validate_translation() { | ||
# deploy client pod for testing | ||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity | ||
|
||
# wait for client pod ready | ||
wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test" | ||
# giving time to populating data | ||
sleep 60 | ||
|
||
kubectl get pods -n $APP_NAMESPACE | ||
# send request to translation | ||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) | ||
echo "$CLIENT_POD" | ||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='translation')].status.accessUrl}") | ||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log | ||
exit_code=$? | ||
if [ $exit_code -ne 0 ]; then | ||
echo "chatqna failed, please check the logs in ${LOG_PATH}!" | ||
exit 1 | ||
fi | ||
|
||
echo "Checking response results, make sure the output is reasonable. " | ||
local status=false | ||
if [[ -f $LOG_PATH/gmc_translation.log ]] && \ | ||
[[ $(grep -c "[DONE]" $LOG_PATH/gmc_translation.log) != 0 ]]; then | ||
status=true | ||
fi | ||
if [ $status == false ]; then | ||
if [[ -f $LOG_PATH/gmc_translation.log ]]; then | ||
cat $LOG_PATH/gmc_translation.log | ||
fi | ||
echo "Response check failed, please check the logs in artifacts!" | ||
cat $LOG_PATH/gmc_translation.log | ||
exit 1 | ||
else | ||
echo "Response check succeed!" | ||
fi | ||
} | ||
|
||
function wait_until_pod_ready() { | ||
echo "Waiting for the $1 to be ready..." | ||
max_retries=30 | ||
retry_count=0 | ||
while ! is_pod_ready $2 $3; do | ||
if [ $retry_count -ge $max_retries ]; then | ||
echo "$1 is not ready after waiting for a significant amount of time" | ||
get_gmc_controller_logs | ||
exit 1 | ||
fi | ||
echo "$1 is not ready yet. Retrying in 10 seconds..." | ||
sleep 10 | ||
output=$(kubectl get pods -n $2) | ||
echo $output | ||
retry_count=$((retry_count + 1)) | ||
done | ||
} | ||
|
||
function is_pod_ready() { | ||
if [ "$2" == "gmc-controller" ]; then | ||
pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') | ||
else | ||
pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') | ||
fi | ||
if [ "$pod_status" == "True" ]; then | ||
return 0 | ||
else | ||
return 1 | ||
fi | ||
} | ||
|
||
function get_gmc_controller_logs() { | ||
# Fetch the name of the pod with the app-name gmc-controller in the specified namespace | ||
pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}') | ||
|
||
# Check if the pod name was found | ||
if [ -z "$pod_name" ]; then | ||
echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE" | ||
return 1 | ||
fi | ||
|
||
# Get the logs of the found pod | ||
echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..." | ||
kubectl logs $pod_name -n $SYSTEM_NAMESPACE | ||
} | ||
|
||
if [ $# -eq 0 ]; then | ||
echo "Usage: $0 <function_name>" | ||
exit 1 | ||
fi | ||
|
||
case "$1" in | ||
install_Translation) | ||
pushd Translation/kubernetes | ||
install_translation | ||
popd | ||
;; | ||
validate_Translation) | ||
pushd Translation/kubernetes | ||
validate_translation | ||
popd | ||
;; | ||
*) | ||
echo "Unknown function: $1" | ||
;; | ||
esac |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
#!/bin/bash | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set -xe | ||
USER_ID=$(whoami) | ||
LOG_PATH=/home/$(whoami)/logs | ||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub | ||
IMAGE_REPO=${IMAGE_REPO:-} | ||
|
||
function install_translation() { | ||
kubectl create ns $APP_NAMESPACE | ||
sed -i "s|namespace: translation|namespace: $APP_NAMESPACE|g" ./translation_xeon.yaml | ||
kubectl apply -f ./translation_xeon.yaml | ||
|
||
# Wait until the router service is ready | ||
echo "Waiting for the translation router service to be ready..." | ||
wait_until_pod_ready "translation router" $APP_NAMESPACE "router-service" | ||
output=$(kubectl get pods -n $APP_NAMESPACE) | ||
echo $output | ||
} | ||
|
||
function validate_translation() { | ||
# deploy client pod for testing | ||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity | ||
|
||
# wait for client pod ready | ||
wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test" | ||
# giving time to populating data | ||
sleep 60 | ||
|
||
kubectl get pods -n $APP_NAMESPACE | ||
# send request to translation | ||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) | ||
echo "$CLIENT_POD" | ||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='translation')].status.accessUrl}") | ||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log | ||
exit_code=$? | ||
if [ $exit_code -ne 0 ]; then | ||
echo "chatqna failed, please check the logs in ${LOG_PATH}!" | ||
exit 1 | ||
fi | ||
|
||
echo "Checking response results, make sure the output is reasonable. " | ||
local status=false | ||
if [[ -f $LOG_PATH/gmc_translation.log ]] && \ | ||
[[ $(grep -c "[DONE]" $LOG_PATH/gmc_translation.log) != 0 ]]; then | ||
status=true | ||
fi | ||
if [ $status == false ]; then | ||
if [[ -f $LOG_PATH/gmc_translation.log ]]; then | ||
cat $LOG_PATH/gmc_translation.log | ||
fi | ||
echo "Response check failed, please check the logs in artifacts!" | ||
cat $LOG_PATH/gmc_translation.log | ||
exit 1 | ||
else | ||
echo "Response check succeed!" | ||
fi | ||
} | ||
|
||
function wait_until_pod_ready() { | ||
echo "Waiting for the $1 to be ready..." | ||
max_retries=30 | ||
retry_count=0 | ||
while ! is_pod_ready $2 $3; do | ||
if [ $retry_count -ge $max_retries ]; then | ||
echo "$1 is not ready after waiting for a significant amount of time" | ||
get_gmc_controller_logs | ||
exit 1 | ||
fi | ||
echo "$1 is not ready yet. Retrying in 10 seconds..." | ||
sleep 10 | ||
output=$(kubectl get pods -n $2) | ||
echo $output | ||
retry_count=$((retry_count + 1)) | ||
done | ||
} | ||
|
||
function is_pod_ready() { | ||
if [ "$2" == "gmc-controller" ]; then | ||
pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') | ||
else | ||
pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') | ||
fi | ||
if [ "$pod_status" == "True" ]; then | ||
return 0 | ||
else | ||
return 1 | ||
fi | ||
} | ||
|
||
function get_gmc_controller_logs() { | ||
# Fetch the name of the pod with the app-name gmc-controller in the specified namespace | ||
pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}') | ||
|
||
# Check if the pod name was found | ||
if [ -z "$pod_name" ]; then | ||
echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE" | ||
return 1 | ||
fi | ||
|
||
# Get the logs of the found pod | ||
echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..." | ||
kubectl logs $pod_name -n $SYSTEM_NAMESPACE | ||
} | ||
|
||
if [ $# -eq 0 ]; then | ||
echo "Usage: $0 <function_name>" | ||
exit 1 | ||
fi | ||
|
||
case "$1" in | ||
install_Translation) | ||
pushd Translation/kubernetes | ||
install_translation | ||
popd | ||
;; | ||
validate_Translation) | ||
pushd Translation/kubernetes | ||
validate_translation | ||
popd | ||
;; | ||
*) | ||
echo "Unknown function: $1" | ||
;; | ||
esac |