forked from opea-project/GenAIExamples
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
EdgeCraftRAG: Add E2E test cases for EdgeCraftRAG - local LLM and vllm (
opea-project#1137) Signed-off-by: Zhang, Rui <[email protected]> Signed-off-by: Mingyuan Qi <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mingyuan Qi <[email protected]> Signed-off-by: Chingis Yundunov <[email protected]>
- Loading branch information
1 parent
4a3c3d4
commit 6b16daf
Showing
8 changed files
with
524 additions
and
3 deletions.
There are no files selected for viewing
92 changes: 92 additions & 0 deletions
92
EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
services: | ||
server: | ||
image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest} | ||
container_name: edgecraftrag-server | ||
environment: | ||
no_proxy: ${no_proxy} | ||
http_proxy: ${http_proxy} | ||
https_proxy: ${https_proxy} | ||
HF_ENDPOINT: ${HF_ENDPOINT} | ||
vLLM_ENDPOINT: ${vLLM_ENDPOINT} | ||
volumes: | ||
- ${MODEL_PATH:-${PWD}}:/home/user/models | ||
- ${DOC_PATH:-${PWD}}:/home/user/docs | ||
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache | ||
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache | ||
ports: | ||
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} | ||
devices: | ||
- /dev/dri:/dev/dri | ||
group_add: | ||
- ${VIDEOGROUPID:-44} | ||
- ${RENDERGROUPID:-109} | ||
ecrag: | ||
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} | ||
container_name: edgecraftrag | ||
environment: | ||
no_proxy: ${no_proxy} | ||
http_proxy: ${http_proxy} | ||
https_proxy: ${https_proxy} | ||
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} | ||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} | ||
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} | ||
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} | ||
ports: | ||
- ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} | ||
depends_on: | ||
- server | ||
ui: | ||
image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} | ||
container_name: edgecraftrag-ui | ||
environment: | ||
no_proxy: ${no_proxy} | ||
http_proxy: ${http_proxy} | ||
https_proxy: ${https_proxy} | ||
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} | ||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} | ||
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} | ||
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} | ||
UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082} | ||
UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} | ||
volumes: | ||
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache | ||
ports: | ||
- ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} | ||
restart: always | ||
depends_on: | ||
- server | ||
- ecrag | ||
vllm-openvino-server: | ||
container_name: vllm-openvino-server | ||
image: opea/vllm-arc:latest | ||
ports: | ||
- ${VLLM_SERVICE_PORT:-8008}:80 | ||
environment: | ||
HTTPS_PROXY: ${https_proxy} | ||
HTTP_PROXY: ${https_proxy} | ||
VLLM_OPENVINO_DEVICE: GPU | ||
HF_ENDPOINT: ${HF_ENDPOINT} | ||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} | ||
volumes: | ||
- /dev/dri/by-path:/dev/dri/by-path | ||
- $HOME/.cache/huggingface:/root/.cache/huggingface | ||
devices: | ||
- /dev/dri | ||
group_add: | ||
- ${VIDEOGROUPID:-44} | ||
- ${RENDERGROUPID:-109} | ||
entrypoint: /bin/bash -c "\ | ||
cd / && \ | ||
export VLLM_CPU_KVCACHE_SPACE=50 && \ | ||
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \ | ||
python3 -m vllm.entrypoints.openai.api_server \ | ||
--model '${LLM_MODEL}' \ | ||
--max_model_len=4096 \ | ||
--host 0.0.0.0 \ | ||
--port 80" | ||
networks: | ||
default: | ||
driver: bridge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/bin/bash | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
function validate_services() { | ||
local URL="$1" | ||
local EXPECTED_RESULT="$2" | ||
local SERVICE_NAME="$3" | ||
local DOCKER_NAME="$4" | ||
local INPUT_DATA="$5" | ||
|
||
echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..." | ||
local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") | ||
while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do | ||
sleep 1 | ||
done | ||
local HTTP_STATUS="${RESPONSE: -3}" | ||
local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log) | ||
|
||
if [ "$HTTP_STATUS" -eq 200 ]; then | ||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." | ||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then | ||
echo "[ $SERVICE_NAME ] Content is as expected." | ||
else | ||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" | ||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log | ||
exit 1 | ||
fi | ||
else | ||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" | ||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log | ||
exit 1 | ||
fi | ||
sleep 1s | ||
} | ||
|
||
function check_gpu_usage() { | ||
echo $date > ${LOG_PATH}/gpu.log | ||
pci_address=$(lspci | grep -i '56a0' | awk '{print $1}') | ||
gpu_stats=$(sudo xpu-smi stats -d 0000:"$pci_address") #TODO need sudo | ||
gpu_utilization=$(echo "$gpu_stats" | grep -i "GPU Utilization" | awk -F'|' '{print $3}' | awk '{print $1}') | ||
memory_used=$(echo "$gpu_stats" | grep -i "GPU Memory Used" | awk -F'|' '{print $3}' | awk '{print $1}') | ||
memory_util=$(echo "$gpu_stats" | grep -i "GPU Memory Util" | awk -F'|' '{print $3}' | awk '{print $1}') | ||
|
||
echo "GPU Utilization (%): $gpu_utilization" >> ${LOG_PATH}/gpu.log | ||
echo "GPU Memory Used (MiB): $memory_used" >> ${LOG_PATH}/gpu.log | ||
echo "GPU Memory Util (%): $memory_util" >> ${LOG_PATH}/gpu.log | ||
|
||
if [ "$memory_used" -lt 1024 ]; then | ||
echo "GPU Memory Used is less than 1G. Please check." | ||
exit 1 | ||
fi | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"text": "A test case for the rag pipeline. The test id is 1234567890. There are several tests in this test case. The first test is for node parser. There are 3 types of node parsers. Their names are Aa, Bb and Cc. The second test is for indexer. The indexer will do the indexing for the given nodes. The last test is for retriever. Retrieving text is based on similarity search." | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
{ | ||
"name": "rag_test_local_llm", | ||
"node_parser": { | ||
"chunk_size": 400, | ||
"chunk_overlap": 48, | ||
"parser_type": "simple" | ||
}, | ||
"indexer": { | ||
"indexer_type": "faiss_vector", | ||
"embedding_model": { | ||
"model_id": "BAAI/bge-small-en-v1.5", | ||
"model_path": "./models/BAAI/bge-small-en-v1.5", | ||
"device": "auto", | ||
"weight": "INT4" | ||
} | ||
}, | ||
"retriever": { | ||
"retriever_type": "vectorsimilarity", | ||
"retrieve_topk": 30 | ||
}, | ||
"postprocessor": [ | ||
{ | ||
"processor_type": "reranker", | ||
"top_n": 2, | ||
"reranker_model": { | ||
"model_id": "BAAI/bge-reranker-large", | ||
"model_path": "./models/BAAI/bge-reranker-large", | ||
"device": "auto", | ||
"weight": "INT4" | ||
} | ||
} | ||
], | ||
"generator": { | ||
"model": { | ||
"model_id": "Qwen/Qwen2-7B-Instruct", | ||
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights", | ||
"device": "auto", | ||
"weight": "INT4" | ||
}, | ||
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt", | ||
"inference_type": "local" | ||
}, | ||
"active": "True" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
{ | ||
"name": "rag_test_local_llm", | ||
"node_parser": { | ||
"chunk_size": 400, | ||
"chunk_overlap": 48, | ||
"parser_type": "simple" | ||
}, | ||
"indexer": { | ||
"indexer_type": "faiss_vector", | ||
"embedding_model": { | ||
"model_id": "BAAI/bge-small-en-v1.5", | ||
"model_path": "./models/BAAI/bge-small-en-v1.5", | ||
"device": "auto", | ||
"weight": "INT4" | ||
} | ||
}, | ||
"retriever": { | ||
"retriever_type": "vectorsimilarity", | ||
"retrieve_topk": 30 | ||
}, | ||
"postprocessor": [ | ||
{ | ||
"processor_type": "reranker", | ||
"top_n": 2, | ||
"reranker_model": { | ||
"model_id": "BAAI/bge-reranker-large", | ||
"model_path": "./models/BAAI/bge-reranker-large", | ||
"device": "auto", | ||
"weight": "INT4" | ||
} | ||
} | ||
], | ||
"generator": { | ||
"model": { | ||
"model_id": "Qwen/Qwen2-7B-Instruct", | ||
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights", | ||
"device": "auto", | ||
"weight": "INT4" | ||
}, | ||
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt", | ||
"inference_type": "vllm" | ||
}, | ||
"active": "True" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
#!/bin/bash | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set -e | ||
source ./common.sh | ||
|
||
IMAGE_REPO=${IMAGE_REPO:-"opea"} | ||
IMAGE_TAG=${IMAGE_TAG:-"latest"} | ||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" | ||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}" | ||
export REGISTRY=${IMAGE_REPO} | ||
export TAG=${IMAGE_TAG} | ||
|
||
WORKPATH=$(dirname "$PWD") | ||
LOG_PATH="$WORKPATH/tests" | ||
|
||
ip_address=$(hostname -I | awk '{print $1}') | ||
HOST_IP=$ip_address | ||
|
||
COMPOSE_FILE="compose.yaml" | ||
EC_RAG_SERVICE_PORT=16010 | ||
#MODEL_PATH="$WORKPATH/models" | ||
MODEL_PATH="/home/media/models" | ||
HF_ENDPOINT=https://hf-mirror.com | ||
|
||
|
||
function build_docker_images() { | ||
cd $WORKPATH/docker_image_build | ||
echo "Build all the images with --no-cache, check docker_image_build.log for details..." | ||
service_list="server ui ecrag" | ||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log | ||
|
||
docker images && sleep 1s | ||
} | ||
|
||
function start_services() { | ||
export MODEL_PATH=${MODEL_PATH} | ||
export HOST_IP=${HOST_IP} | ||
export LLM_MODEL=${LLM_MODEL} | ||
export HF_ENDPOINT=${HF_ENDPOINT} | ||
export vLLM_ENDPOINT=${vLLM_ENDPOINT} | ||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} | ||
export no_proxy="localhost, 127.0.0.1, 192.168.1.1" | ||
|
||
cd $WORKPATH/docker_compose/intel/gpu/arc | ||
|
||
# Start Docker Containers | ||
docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log | ||
sleep 20 | ||
} | ||
|
||
function validate_rag() { | ||
cd $WORKPATH/tests | ||
|
||
# setup pipeline | ||
validate_services \ | ||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \ | ||
"active" \ | ||
"pipeline" \ | ||
"edgecraftrag-server" \ | ||
'@configs/test_pipeline_local_llm.json' | ||
|
||
# add data | ||
validate_services \ | ||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ | ||
"Done" \ | ||
"data" \ | ||
"edgecraftrag-server" \ | ||
'@configs/test_data.json' | ||
|
||
# query | ||
validate_services \ | ||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \ | ||
"1234567890" \ | ||
"query" \ | ||
"vllm-openvino-server" \ | ||
'{"messages":"What is the test id?"}' | ||
} | ||
|
||
function validate_megaservice() { | ||
# Curl the Mega Service | ||
validate_services \ | ||
"${HOST_IP}:16011/v1/chatqna" \ | ||
"1234567890" \ | ||
"query" \ | ||
"vllm-openvino-server" \ | ||
'{"messages":"What is the test id?"}' | ||
} | ||
|
||
function stop_docker() { | ||
cd $WORKPATH/docker_compose/intel/gpu/arc | ||
docker compose -f $COMPOSE_FILE down | ||
} | ||
|
||
|
||
function main() { | ||
mkdir -p $LOG_PATH | ||
|
||
stop_docker | ||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi | ||
start_services | ||
echo "EC_RAG service started" && sleep 1s | ||
|
||
validate_rag | ||
validate_megaservice | ||
|
||
stop_docker | ||
echo y | docker system prune | ||
|
||
} | ||
|
||
main |
Oops, something went wrong.