From 7949045176ace01ee419de8953b0b4ba6f587de7 Mon Sep 17 00:00:00 2001 From: rui2zhang Date: Sun, 17 Nov 2024 18:22:32 +0800 Subject: [PATCH] EdgeCraftRAG: Add E2E test cases for EdgeCraftRAG - local LLM and vllm (#1137) Signed-off-by: Zhang, Rui Signed-off-by: Mingyuan Qi Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mingyuan Qi --- .../intel/gpu/arc/compose_vllm.yaml | 92 ++++++++++ EdgeCraftRAG/tests/common.sh | 53 ++++++ EdgeCraftRAG/tests/configs/test_data.json | 3 + .../configs/test_pipeline_local_llm.json | 44 +++++ .../tests/configs/test_pipeline_vllm.json | 44 +++++ EdgeCraftRAG/tests/test_compose_on_arc.sh | 113 ++++++++++++ .../tests/test_compose_vllm_on_arc.sh | 166 ++++++++++++++++++ .../tests/test_pipeline_local_llm.json | 12 +- 8 files changed, 524 insertions(+), 3 deletions(-) create mode 100644 EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml create mode 100644 EdgeCraftRAG/tests/common.sh create mode 100644 EdgeCraftRAG/tests/configs/test_data.json create mode 100644 EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json create mode 100644 EdgeCraftRAG/tests/configs/test_pipeline_vllm.json create mode 100755 EdgeCraftRAG/tests/test_compose_on_arc.sh create mode 100755 EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml new file mode 100644 index 000000000..6ba7c4da2 --- /dev/null +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml @@ -0,0 +1,92 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + server: + image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest} + container_name: edgecraftrag-server + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_ENDPOINT: ${HF_ENDPOINT} + vLLM_ENDPOINT: ${vLLM_ENDPOINT} + volumes: + - ${MODEL_PATH:-${PWD}}:/home/user/models + - ${DOC_PATH:-${PWD}}:/home/user/docs + - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache + - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache + ports: + - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} + devices: + - /dev/dri:/dev/dri + group_add: + - ${VIDEOGROUPID:-44} + - ${RENDERGROUPID:-109} + ecrag: + image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} + container_name: edgecraftrag + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} + PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} + PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + ports: + - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} + depends_on: + - server + ui: + image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} + container_name: edgecraftrag-ui + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} + PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} + PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082} + UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} + volumes: + - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache + ports: + - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} + restart: always + depends_on: + - server + - ecrag + vllm-openvino-server: + container_name: vllm-openvino-server + image: opea/vllm-arc:latest + ports: + - ${VLLM_SERVICE_PORT:-8008}:80 + environment: + HTTPS_PROXY: ${https_proxy} + HTTP_PROXY: ${https_proxy} + VLLM_OPENVINO_DEVICE: GPU + HF_ENDPOINT: ${HF_ENDPOINT} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + volumes: + - /dev/dri/by-path:/dev/dri/by-path + - $HOME/.cache/huggingface:/root/.cache/huggingface + devices: + - /dev/dri + group_add: + - ${VIDEOGROUPID:-44} + - ${RENDERGROUPID:-109} + entrypoint: /bin/bash -c "\ + cd / && \ + export VLLM_CPU_KVCACHE_SPACE=50 && \ + export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \ + python3 -m vllm.entrypoints.openai.api_server \ + --model '${LLM_MODEL}' \ + --max_model_len=4096 \ + --host 0.0.0.0 \ + --port 80" +networks: + default: + driver: bridge diff --git a/EdgeCraftRAG/tests/common.sh b/EdgeCraftRAG/tests/common.sh new file mode 100644 index 000000000..b28978ca9 --- /dev/null +++ b/EdgeCraftRAG/tests/common.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..." + local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do + sleep 1 + done + local HTTP_STATUS="${RESPONSE: -3}" + local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log) + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function check_gpu_usage() { + echo $date > ${LOG_PATH}/gpu.log + pci_address=$(lspci | grep -i '56a0' | awk '{print $1}') + gpu_stats=$(sudo xpu-smi stats -d 0000:"$pci_address") #TODO need sudo + gpu_utilization=$(echo "$gpu_stats" | grep -i "GPU Utilization" | awk -F'|' '{print $3}' | awk '{print $1}') + memory_used=$(echo "$gpu_stats" | grep -i "GPU Memory Used" | awk -F'|' '{print $3}' | awk '{print $1}') + memory_util=$(echo "$gpu_stats" | grep -i "GPU Memory Util" | awk -F'|' '{print $3}' | awk '{print $1}') + + echo "GPU Utilization (%): $gpu_utilization" >> ${LOG_PATH}/gpu.log + echo "GPU Memory Used (MiB): $memory_used" >> ${LOG_PATH}/gpu.log + echo "GPU Memory Util (%): $memory_util" >> ${LOG_PATH}/gpu.log + + if [ "$memory_used" -lt 1024 ]; then + echo "GPU Memory Used is less than 1G. Please check." + exit 1 + fi +} diff --git a/EdgeCraftRAG/tests/configs/test_data.json b/EdgeCraftRAG/tests/configs/test_data.json new file mode 100644 index 000000000..648ae9624 --- /dev/null +++ b/EdgeCraftRAG/tests/configs/test_data.json @@ -0,0 +1,3 @@ +{ + "text": "A test case for the rag pipeline. The test id is 1234567890. There are several tests in this test case. The first test is for node parser. There are 3 types of node parsers. Their names are Aa, Bb and Cc. The second test is for indexer. The indexer will do the indexing for the given nodes. The last test is for retriever. Retrieving text is based on similarity search." +} diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json new file mode 100644 index 000000000..261459e83 --- /dev/null +++ b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json @@ -0,0 +1,44 @@ +{ + "name": "rag_test_local_llm", + "node_parser": { + "chunk_size": 400, + "chunk_overlap": 48, + "parser_type": "simple" + }, + "indexer": { + "indexer_type": "faiss_vector", + "embedding_model": { + "model_id": "BAAI/bge-small-en-v1.5", + "model_path": "./models/BAAI/bge-small-en-v1.5", + "device": "auto", + "weight": "INT4" + } + }, + "retriever": { + "retriever_type": "vectorsimilarity", + "retrieve_topk": 30 + }, + "postprocessor": [ + { + "processor_type": "reranker", + "top_n": 2, + "reranker_model": { + "model_id": "BAAI/bge-reranker-large", + "model_path": "./models/BAAI/bge-reranker-large", + "device": "auto", + "weight": "INT4" + } + } + ], + "generator": { + "model": { + "model_id": "Qwen/Qwen2-7B-Instruct", + "model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights", + "device": "auto", + "weight": "INT4" + }, + "prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt", + "inference_type": "local" + }, + "active": "True" +} diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_vllm.json b/EdgeCraftRAG/tests/configs/test_pipeline_vllm.json new file mode 100644 index 000000000..05809c8e1 --- /dev/null +++ b/EdgeCraftRAG/tests/configs/test_pipeline_vllm.json @@ -0,0 +1,44 @@ +{ + "name": "rag_test_local_llm", + "node_parser": { + "chunk_size": 400, + "chunk_overlap": 48, + "parser_type": "simple" + }, + "indexer": { + "indexer_type": "faiss_vector", + "embedding_model": { + "model_id": "BAAI/bge-small-en-v1.5", + "model_path": "./models/BAAI/bge-small-en-v1.5", + "device": "auto", + "weight": "INT4" + } + }, + "retriever": { + "retriever_type": "vectorsimilarity", + "retrieve_topk": 30 + }, + "postprocessor": [ + { + "processor_type": "reranker", + "top_n": 2, + "reranker_model": { + "model_id": "BAAI/bge-reranker-large", + "model_path": "./models/BAAI/bge-reranker-large", + "device": "auto", + "weight": "INT4" + } + } + ], + "generator": { + "model": { + "model_id": "Qwen/Qwen2-7B-Instruct", + "model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights", + "device": "auto", + "weight": "INT4" + }, + "prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt", + "inference_type": "vllm" + }, + "active": "True" +} diff --git a/EdgeCraftRAG/tests/test_compose_on_arc.sh b/EdgeCraftRAG/tests/test_compose_on_arc.sh new file mode 100755 index 000000000..cd38fbbdb --- /dev/null +++ b/EdgeCraftRAG/tests/test_compose_on_arc.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +source ./common.sh + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" + +ip_address=$(hostname -I | awk '{print $1}') +HOST_IP=$ip_address + +COMPOSE_FILE="compose.yaml" +EC_RAG_SERVICE_PORT=16010 +#MODEL_PATH="$WORKPATH/models" +MODEL_PATH="/home/media/models" +HF_ENDPOINT=https://hf-mirror.com + + +function build_docker_images() { + cd $WORKPATH/docker_image_build + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="server ui ecrag" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + export MODEL_PATH=${MODEL_PATH} + export HOST_IP=${HOST_IP} + export LLM_MODEL=${LLM_MODEL} + export HF_ENDPOINT=${HF_ENDPOINT} + export vLLM_ENDPOINT=${vLLM_ENDPOINT} + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export no_proxy="localhost, 127.0.0.1, 192.168.1.1" + + cd $WORKPATH/docker_compose/intel/gpu/arc + + # Start Docker Containers + docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log + sleep 20 +} + +function validate_rag() { + cd $WORKPATH/tests + + # setup pipeline + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \ + "active" \ + "pipeline" \ + "edgecraftrag-server" \ + '@configs/test_pipeline_local_llm.json' + + # add data + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ + "Done" \ + "data" \ + "edgecraftrag-server" \ + '@configs/test_data.json' + + # query + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \ + "1234567890" \ + "query" \ + "vllm-openvino-server" \ + '{"messages":"What is the test id?"}' +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${HOST_IP}:16011/v1/chatqna" \ + "1234567890" \ + "query" \ + "vllm-openvino-server" \ + '{"messages":"What is the test id?"}' +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/gpu/arc + docker compose -f $COMPOSE_FILE down +} + + +function main() { + mkdir -p $LOG_PATH + + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + echo "EC_RAG service started" && sleep 1s + + validate_rag + validate_megaservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh new file mode 100755 index 000000000..1d65057be --- /dev/null +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh @@ -0,0 +1,166 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +source ./common.sh + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" + +ip_address=$(hostname -I | awk '{print $1}') +HOST_IP=$ip_address + +COMPOSE_FILE="compose_vllm.yaml" +EC_RAG_SERVICE_PORT=16010 +#MODEL_PATH="$WORKPATH/models" +MODEL_PATH="/home/media/models" +#HF_ENDPOINT=https://hf-mirror.com +LLM_MODEL="Qwen/Qwen2-7B-Instruct" +VLLM_SERVICE_PORT=8008 +vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}" + + +function build_docker_images() { + cd $WORKPATH/docker_image_build + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="server ui ecrag" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + echo "Build vllm_openvino image from GenAIComps..." + cd $WORKPATH && git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" + cd comps/llms/text-generation/vllm/langchain/dependency + bash ./build_docker_vllm_openvino.sh gpu + + docker images && sleep 1s +} + +function start_services() { + export MODEL_PATH=${MODEL_PATH} + export HOST_IP=${HOST_IP} + export LLM_MODEL=${LLM_MODEL} + export HF_ENDPOINT=${HF_ENDPOINT} + export vLLM_ENDPOINT=${vLLM_ENDPOINT} + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export no_proxy="localhost, 127.0.0.1, 192.168.1.1" + + cd $WORKPATH/docker_compose/intel/gpu/arc + + # Start Docker Containers + docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs vllm-openvino-server > ${LOG_PATH}/vllm_service_start.log + if grep -q "metrics.py" ${LOG_PATH}/vllm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..." + local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do + sleep 1 + done + local HTTP_STATUS="${RESPONSE: -3}" + local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log) + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_rag() { + cd $WORKPATH/tests + + # setup pipeline + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \ + "active" \ + "pipeline" \ + "edgecraftrag-server" \ + '@configs/test_pipeline_vllm.json' + + # add data + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ + "Done" \ + "data" \ + "edgecraftrag-server" \ + '@configs/test_data.json' + + # query + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \ + "1234567890" \ + "query" \ + "vllm-openvino-server" \ + '{"messages":"What is the test id?"}' +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${HOST_IP}:16011/v1/chatqna" \ + "1234567890" \ + "query" \ + "vllm-openvino-server" \ + '{"messages":"What is the test id?"}' +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/gpu/arc + docker compose -f $COMPOSE_FILE down +} + + +function main() { + mkdir -p "$LOG_PATH" + + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_time=$(date +%s) + start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "EC_RAG service start duration is $duration s" && sleep 1s + + validate_rag + validate_megaservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json index 13485cebc..261459e83 100644 --- a/EdgeCraftRAG/tests/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json @@ -9,7 +9,9 @@ "indexer_type": "faiss_vector", "embedding_model": { "model_id": "BAAI/bge-small-en-v1.5", - "device": "auto" + "model_path": "./models/BAAI/bge-small-en-v1.5", + "device": "auto", + "weight": "INT4" } }, "retriever": { @@ -22,14 +24,18 @@ "top_n": 2, "reranker_model": { "model_id": "BAAI/bge-reranker-large", - "device": "auto" + "model_path": "./models/BAAI/bge-reranker-large", + "device": "auto", + "weight": "INT4" } } ], "generator": { "model": { "model_id": "Qwen/Qwen2-7B-Instruct", - "device": "cpu" + "model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights", + "device": "auto", + "weight": "INT4" }, "prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt", "inference_type": "local"