From e15610101cbfafd17796e1fd210de3837ef480c5 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Thu, 8 Aug 2024 17:14:46 +0800 Subject: [PATCH 01/15] Support llava-next using TGI (#423) Signed-off-by: lvliang-intel --- comps/cores/mega/gateway.py | 44 ++++++++++++++++++++++++ comps/lvms/Dockerfile_tgi | 19 +++++++++++ comps/lvms/lvm_tgi.py | 47 +++++++++++++++++++++++++ comps/lvms/requirements.txt | 1 + tests/test_lvms_tgi_llava_next.sh | 57 +++++++++++++++++++++++++++++++ 5 files changed, 168 insertions(+) create mode 100644 comps/lvms/Dockerfile_tgi create mode 100644 comps/lvms/lvm_tgi.py create mode 100644 tests/test_lvms_tgi_llava_next.sh diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index 862205414..dd05453fc 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -439,3 +439,47 @@ async def handle_request(self, request: Request): ) ) return ChatCompletionResponse(model="faqgen", choices=choices, usage=usage) + + +class VisualQnAGateway(Gateway): + def __init__(self, megaservice, host="0.0.0.0", port=8888): + super().__init__( + megaservice, host, port, str(MegaServiceEndpoint.VISUAL_QNA), ChatCompletionRequest, ChatCompletionResponse + ) + + async def handle_request(self, request: Request): + data = await request.json() + stream_opt = data.get("stream", True) + chat_request = ChatCompletionRequest.parse_obj(data) + prompt = self._handle_message(chat_request.messages) + parameters = LLMParams( + max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, + top_k=chat_request.top_k if chat_request.top_k else 10, + top_p=chat_request.top_p if chat_request.top_p else 0.95, + temperature=chat_request.temperature if chat_request.temperature else 0.01, + repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03, + streaming=stream_opt, + ) + result_dict, runtime_graph = await self.megaservice.schedule( + initial_inputs={"query": prompt}, llm_parameters=parameters + ) + for node, response in result_dict.items(): + # Here it suppose the last microservice in the megaservice is LVM. + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.LVM + ): + return response + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node]["text"] + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=response), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage) diff --git a/comps/lvms/Dockerfile_tgi b/comps/lvms/Dockerfile_tgi new file mode 100644 index 000000000..c6412ac5e --- /dev/null +++ b/comps/lvms/Dockerfile_tgi @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/lvms + +ENTRYPOINT ["python", "lvm_tgi.py"] + diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py new file mode 100644 index 000000000..7a51b562c --- /dev/null +++ b/comps/lvms/lvm_tgi.py @@ -0,0 +1,47 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from huggingface_hub import InferenceClient + +from comps import ( + LVMDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + + +@register_microservice( + name="opea_service@lvm_tgi", + service_type=ServiceType.LVM, + endpoint="/v1/lvm", + host="0.0.0.0", + port=9399, + input_datatype=LVMDoc, + output_datatype=TextDoc, +) +@register_statistics(names=["opea_service@lvm_tgi"]) +async def lvm(request: LVMDoc): + start = time.time() + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens + + image = f"data:image/png;base64,{img_b64_str}" + image_prompt = f"![]({image})\nUSER: {prompt}\nASSISTANT:" + generated_str = lvm_client.text_generation(image_prompt, max_new_tokens=max_new_tokens) + statistics_dict["opea_service@lvm_tgi"].append_latency(time.time() - start, None) + return TextDoc(text=generated_str) + + +if __name__ == "__main__": + lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + lvm_client = InferenceClient(lvm_endpoint) + print("[LVM] LVM initialized.") + opea_microservices["opea_service@lvm_tgi"].start() diff --git a/comps/lvms/requirements.txt b/comps/lvms/requirements.txt index 3651937bd..556dfb0c1 100644 --- a/comps/lvms/requirements.txt +++ b/comps/lvms/requirements.txt @@ -1,6 +1,7 @@ datasets docarray[full] fastapi +huggingface_hub opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/tests/test_lvms_tgi_llava_next.sh b/tests/test_lvms_tgi_llava_next.sh new file mode 100644 index 000000000..970e3004f --- /dev/null +++ b/tests/test_lvms_tgi_llava_next.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + git clone https://github.com/yuanwu2017/tgi-gaudi.git && cd tgi-gaudi && git checkout v2.0.4 + docker build -t opea/llava-tgi:latest . + cd .. + docker build --no-cache -t opea/lvm-tgi:latest -f comps/lvms/Dockerfile_tgi . +} + +function start_service() { + unset http_proxy + model="llava-hf/llava-v1.6-mistral-7b-hf" + docker run -d --name="test-comps-lvm-llava-tgi" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:80 --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e SKIP_TOKENIZER_IN_TGI=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host opea/llava-tgi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8192 + docker run -d --name="test-comps-lvm-tgi" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm-tgi:latest + sleep 3m +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-lvm*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main From f8d45e53c6ac87815c6580c0b45d2b96a8c2706a Mon Sep 17 00:00:00 2001 From: lkk <33276950+lkk12014402@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:45:15 +0800 Subject: [PATCH 02/15] unify port in one microservice. (#424) * unify port in one microservice. * update ut. --------- Co-authored-by: sdp --- comps/cores/mega/micro_service.py | 35 ++++++++++--------- comps/dataprep/redis/README.md | 12 +++---- .../redis/langchain/prepare_doc_redis.py | 6 ++-- tests/test_dataprep_redis_langchain.sh | 10 ++---- 4 files changed, 29 insertions(+), 34 deletions(-) diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py index e83a2836b..e1276716c 100644 --- a/comps/cores/mega/micro_service.py +++ b/comps/cores/mega/micro_service.py @@ -156,23 +156,24 @@ def register_microservice( provider_endpoint: Optional[str] = None, ): def decorator(func): - micro_service = MicroService( - name=name, - service_role=service_role, - service_type=service_type, - protocol=protocol, - host=host, - port=port, - ssl_keyfile=ssl_keyfile, - ssl_certfile=ssl_certfile, - endpoint=endpoint, - input_datatype=input_datatype, - output_datatype=output_datatype, - provider=provider, - provider_endpoint=provider_endpoint, - ) - micro_service.app.router.add_api_route(endpoint, func, methods=["POST"]) - opea_microservices[name] = micro_service + if name not in opea_microservices: + micro_service = MicroService( + name=name, + service_role=service_role, + service_type=service_type, + protocol=protocol, + host=host, + port=port, + ssl_keyfile=ssl_keyfile, + ssl_certfile=ssl_certfile, + endpoint=endpoint, + input_datatype=input_datatype, + output_datatype=output_datatype, + provider=provider, + provider_endpoint=provider_endpoint, + ) + opea_microservices[name] = micro_service + opea_microservices[name].app.router.add_api_route(endpoint, func, methods=["POST"]) return func return decorator diff --git a/comps/dataprep/redis/README.md b/comps/dataprep/redis/README.md index 8d1d29a97..1afb6e8e0 100644 --- a/comps/dataprep/redis/README.md +++ b/comps/dataprep/redis/README.md @@ -136,13 +136,13 @@ docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https - option 1: Start single-process version (for 1-10 files processing) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest ``` - option 2: Start multi-process version (for >10 files processing) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest ``` ## 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) @@ -250,7 +250,7 @@ To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6008/v1/dataprep/get_file + http://localhost:6007/v1/dataprep/get_file ``` Then you will get the response JSON like this: @@ -283,17 +283,17 @@ The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file # delete all files and links curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file ``` diff --git a/comps/dataprep/redis/langchain/prepare_doc_redis.py b/comps/dataprep/redis/langchain/prepare_doc_redis.py index aff3da605..13af980ab 100644 --- a/comps/dataprep/redis/langchain/prepare_doc_redis.py +++ b/comps/dataprep/redis/langchain/prepare_doc_redis.py @@ -261,7 +261,7 @@ async def ingest_documents( @register_microservice( - name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 ) @traceable(run_type="tool") async def rag_get_file_structure(): @@ -276,7 +276,7 @@ async def rag_get_file_structure(): @register_microservice( - name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 ) @traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): @@ -334,5 +334,3 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): if __name__ == "__main__": create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_redis"].start() - opea_microservices["opea_service@prepare_doc_redis_file"].start() - opea_microservices["opea_service@prepare_doc_redis_del"].start() diff --git a/tests/test_dataprep_redis_langchain.sh b/tests/test_dataprep_redis_langchain.sh index 760e7aa3d..1a4b06ef7 100644 --- a/tests/test_dataprep_redis_langchain.sh +++ b/tests/test_dataprep_redis_langchain.sh @@ -18,10 +18,8 @@ function start_service() { REDIS_PORT=6380 docker run -d --name="test-comps-dataprep-redis-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 dataprep_service_port=5013 - dataprep_file_service_port=5016 - dataprep_del_service_port=5020 REDIS_URL="redis://${ip_address}:${REDIS_PORT}" - docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -p ${dataprep_service_port}:6007 -p ${dataprep_file_service_port}:6008 -p ${dataprep_del_service_port}:6009 --ipc=host opea/dataprep-redis:comps + docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-redis:comps sleep 1m } @@ -72,8 +70,7 @@ function validate_microservice() { fi # test /v1/dataprep/get_file - dataprep_file_service_port=5016 - URL="http://${ip_address}:$dataprep_file_service_port/v1/dataprep/get_file" + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -94,8 +91,7 @@ function validate_microservice() { fi # test /v1/dataprep/delete_file - dataprep_del_service_port=5020 - URL="http://${ip_address}:$dataprep_del_service_port/v1/dataprep/delete_file" + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') From c5a034403e66bc6d508012e84fe41f37c0c41802 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Fri, 9 Aug 2024 09:51:45 +0800 Subject: [PATCH 03/15] Support streaming output for LVM microservice (#430) * Support llava-next using TGI Signed-off-by: lvliang-intel * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update test script Signed-off-by: lvliang-intel * fix ci issues Signed-off-by: lvliang-intel * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Support streeaming output Signed-off-by: lvliang-intel * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: lvliang-intel Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/__init__.py | 1 + comps/cores/mega/gateway.py | 44 +++++++++++++++++++++------ comps/cores/mega/orchestrator.py | 5 ++- comps/cores/proto/docarray.py | 6 ++++ comps/lvms/lvm_tgi.py | 52 ++++++++++++++++++++++++++++---- 5 files changed, 92 insertions(+), 16 deletions(-) diff --git a/comps/__init__.py b/comps/__init__.py index b483f46a7..cb7ed7a28 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -37,6 +37,7 @@ SearchQnAGateway, AudioQnAGateway, FaqGenGateway, + VisualQnAGateway, ) # Telemetry diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index dd05453fc..324f7081e 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -1,6 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import base64 + +import requests from fastapi import Request from fastapi.responses import StreamingResponse @@ -75,6 +78,8 @@ def _handle_message(self, messages): prompt = messages else: messages_dict = {} + system_prompt = "" + prompt = "" for message in messages: msg_role = message["role"] if msg_role == "system": @@ -84,20 +89,41 @@ def _handle_message(self, messages): text = "" text_list = [item["text"] for item in message["content"] if item["type"] == "text"] text += "\n".join(text_list) - messages_dict[msg_role] = text + image_list = [ + item["image_url"]["url"] for item in message["content"] if item["type"] == "image_url" + ] + if image_list: + messages_dict[msg_role] = (text, image_list) + else: + messages_dict[msg_role] = text else: messages_dict[msg_role] = message["content"] elif msg_role == "assistant": messages_dict[msg_role] = message["content"] else: raise ValueError(f"Unknown role: {msg_role}") - prompt = system_prompt + "\n" + if system_prompt: + prompt = system_prompt + "\n" + images = [] for role, message in messages_dict.items(): - if message: - prompt += role + ": " + message + "\n" + if isinstance(message, tuple): + text, image_list = message + if text: + prompt += role + ": " + text + "\n" + else: + prompt += role + ":" + for img in image_list: + response = requests.get(img) + images.append(base64.b64encode(response.content).decode("utf-8")) else: - prompt += role + ":" - return prompt + if message: + prompt += role + ": " + message + "\n" + else: + prompt += role + ":" + if images: + return prompt, images + else: + return prompt class ChatQnAGateway(Gateway): @@ -449,9 +475,9 @@ def __init__(self, megaservice, host="0.0.0.0", port=8888): async def handle_request(self, request: Request): data = await request.json() - stream_opt = data.get("stream", True) + stream_opt = data.get("stream", False) chat_request = ChatCompletionRequest.parse_obj(data) - prompt = self._handle_message(chat_request.messages) + prompt, images = self._handle_message(chat_request.messages) parameters = LLMParams( max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, @@ -461,7 +487,7 @@ async def handle_request(self, request: Request): streaming=stream_opt, ) result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"query": prompt}, llm_parameters=parameters + initial_inputs={"prompt": prompt, "image": images[0]}, llm_parameters=parameters ) for node, response in result_dict.items(): # Here it suppose the last microservice in the megaservice is LVM. diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index d4f3ac9b7..616af41c8 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -117,7 +117,10 @@ async def execute( if inputs.get(field) != value: inputs[field] = value - if self.services[cur_node].service_type == ServiceType.LLM and llm_parameters.streaming: + if ( + self.services[cur_node].service_type == ServiceType.LLM + or self.services[cur_node].service_type == ServiceType.LVM + ) and llm_parameters.streaming: # Still leave to sync requests.post for StreamingResponse response = requests.post( url=endpoint, data=json.dumps(inputs), proxies={"http": None}, stream=True, timeout=1000 diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 6a4e55d4c..9e07d618d 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -130,3 +130,9 @@ class LVMDoc(BaseDoc): image: str prompt: str max_new_tokens: conint(ge=0, le=1024) = 512 + top_k: int = 10 + top_p: float = 0.95 + typical_p: float = 0.95 + temperature: float = 0.01 + repetition_penalty: float = 1.03 + streaming: bool = False diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py index 7a51b562c..b7383fa0c 100644 --- a/comps/lvms/lvm_tgi.py +++ b/comps/lvms/lvm_tgi.py @@ -4,7 +4,8 @@ import os import time -from huggingface_hub import InferenceClient +from fastapi.responses import StreamingResponse +from huggingface_hub import AsyncInferenceClient from comps import ( LVMDoc, @@ -29,19 +30,58 @@ @register_statistics(names=["opea_service@lvm_tgi"]) async def lvm(request: LVMDoc): start = time.time() + stream_gen_time = [] img_b64_str = request.image prompt = request.prompt max_new_tokens = request.max_new_tokens + streaming = request.streaming + repetition_penalty = request.repetition_penalty + temperature = request.temperature + top_k = request.top_k + top_p = request.top_p image = f"data:image/png;base64,{img_b64_str}" - image_prompt = f"![]({image})\nUSER: {prompt}\nASSISTANT:" - generated_str = lvm_client.text_generation(image_prompt, max_new_tokens=max_new_tokens) - statistics_dict["opea_service@lvm_tgi"].append_latency(time.time() - start, None) - return TextDoc(text=generated_str) + image_prompt = f"![]({image})\n{prompt}\nASSISTANT:" + + if streaming: + + async def stream_generator(): + chat_response = "" + text_generation = await lvm_client.text_generation( + prompt=prompt, + stream=streaming, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + async for text in text_generation: + stream_gen_time.append(time.time() - start) + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + print(f"[llm - chat_stream] chunk:{chunk_repr}") + yield f"data: {chunk_repr}\n\n" + print(f"[llm - chat_stream] stream response: {chat_response}") + statistics_dict["opea_service@lvm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + generated_str = await lvm_client.text_generation( + image_prompt, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + statistics_dict["opea_service@lvm_tgi"].append_latency(time.time() - start, None) + return TextDoc(text=generated_str) if __name__ == "__main__": lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") - lvm_client = InferenceClient(lvm_endpoint) + lvm_client = AsyncInferenceClient(lvm_endpoint) print("[LVM] LVM initialized.") opea_microservices["opea_service@lvm_tgi"].start() From e3f29c3f16924a38864d3d7ebd234be5e5bd4060 Mon Sep 17 00:00:00 2001 From: Jincheng Miao Date: Fri, 9 Aug 2024 10:34:06 +0800 Subject: [PATCH 04/15] normalize embedding and reranking endpoint docker image name (#427) Signed-off-by: Jincheng Miao --- comps/embeddings/langchain-mosec/README.md | 4 ++-- comps/reranks/langchain-mosec/README.md | 4 ++-- tests/test_embeddings_langchain-mosec.sh | 4 ++-- tests/test_reranks_langchain-mosec.sh | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/comps/embeddings/langchain-mosec/README.md b/comps/embeddings/langchain-mosec/README.md index 788e88dd4..624fcf6a2 100644 --- a/comps/embeddings/langchain-mosec/README.md +++ b/comps/embeddings/langchain-mosec/README.md @@ -1,7 +1,7 @@ # build Mosec endpoint docker image ``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t langchain-mosec:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec-endpoint:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . ``` # build embedding microservice docker image @@ -13,7 +13,7 @@ docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_p # launch Mosec endpoint docker container ``` -docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 langchain-mosec:latest +docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 opea/embedding-langchain-mosec-endpoint:latest ``` # launch embedding microservice docker container diff --git a/comps/reranks/langchain-mosec/README.md b/comps/reranks/langchain-mosec/README.md index d67cf78b0..59592a4ba 100644 --- a/comps/reranks/langchain-mosec/README.md +++ b/comps/reranks/langchain-mosec/README.md @@ -1,7 +1,7 @@ # build reranking Mosec endpoint docker image ``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t reranking-langchain-mosec:latest -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec-endpoint:latest -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . ``` # build reranking microservice docker image @@ -13,7 +13,7 @@ docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_p # launch Mosec endpoint docker container ``` -docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000 reranking-langchain-mosec:latest +docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000 opea/reranking-langchain-mosec-endpoint:latest ``` # launch embedding microservice docker container diff --git a/tests/test_embeddings_langchain-mosec.sh b/tests/test_embeddings_langchain-mosec.sh index 1381a6dcb..198906346 100644 --- a/tests/test_embeddings_langchain-mosec.sh +++ b/tests/test_embeddings_langchain-mosec.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_mosec_docker_images() { cd $WORKPATH echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t langchain-mosec:comps -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . + docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec-endpoint:comps -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . } function build_docker_images() { @@ -23,7 +23,7 @@ function start_service() { mosec_endpoint=5001 model="BAAI/bge-large-en-v1.5" unset http_proxy - docker run -d --name="test-comps-embedding-langchain-mosec-endpoint" -p $mosec_endpoint:8000 langchain-mosec:comps + docker run -d --name="test-comps-embedding-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/embedding-langchain-mosec-endpoint:comps export MOSEC_EMBEDDING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" mosec_service_port=5002 docker run -d --name="test-comps-embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:comps diff --git a/tests/test_reranks_langchain-mosec.sh b/tests/test_reranks_langchain-mosec.sh index 899db5122..8d25d5fed 100644 --- a/tests/test_reranks_langchain-mosec.sh +++ b/tests/test_reranks_langchain-mosec.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_mosec_docker_images() { cd $WORKPATH echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t reranking-langchain-mosec:comps -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . + docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec-endpoint:comps -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . } function build_docker_images() { @@ -23,7 +23,7 @@ function start_service() { mosec_endpoint=5006 model="BAAI/bge-reranker-large" unset http_proxy - docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000 reranking-langchain-mosec:comps + docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/reranking-langchain-mosec-endpoint:comps export MOSEC_RERANKING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" mosec_service_port=5007 docker run -d --name="test-comps-reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:comps From cafcf1bf7fd3c9c8ee529cb2772612b5dc3a16dc Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Fri, 9 Aug 2024 13:06:47 +0800 Subject: [PATCH 05/15] add secrets for test (#436) Signed-off-by: chensuyue --- .github/workflows/microservice-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml index d8000b93d..e40745f0b 100644 --- a/.github/workflows/microservice-test.yml +++ b/.github/workflows/microservice-test.yml @@ -43,6 +43,7 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} + PINECONE_KEY: ${{ secrets.PINECONE_KEY }} service_path: ${{ matrix.service }} hardware: ${{ matrix.hardware }} run: | From 8f0f2b040fb5868f147dc6a5ebde668d218ec7a5 Mon Sep 17 00:00:00 2001 From: "Tian, Feng" Date: Fri, 9 Aug 2024 13:25:24 +0800 Subject: [PATCH 06/15] Update dataprep default mosec embedding model in config.py (#435) Signed-off-by: Tian, Feng --- comps/dataprep/milvus/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/dataprep/milvus/config.py b/comps/dataprep/milvus/config.py index 06aa60975..0f8c57139 100644 --- a/comps/dataprep/milvus/config.py +++ b/comps/dataprep/milvus/config.py @@ -12,7 +12,7 @@ MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") -MOSEC_EMBEDDING_MODEL = "/root/bce-embedding-base_v1" +MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/root/bce-embedding-base_v1") MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "") os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" From 0614fc2bbf1fbf4367e257decd8dea5bcc0ef026 Mon Sep 17 00:00:00 2001 From: XinyaoWa Date: Sat, 10 Aug 2024 12:58:48 +0800 Subject: [PATCH 07/15] Vllm and vllm-ray bug fix (add opea for vllm, update setuptools version) (#437) * add opea/ for vllm and vllm-ray docker Signed-off-by: Xinyao Wang * modify setuptools version Signed-off-by: Xinyao Wang * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix ut Signed-off-by: Xinyao Wang * refine readme Signed-off-by: Xinyao Wang --------- Signed-off-by: Xinyao Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/llms/README.md | 2 +- comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh | 2 +- comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml | 2 +- comps/llms/text-generation/vllm-ray/launch_vllmray.sh | 2 +- comps/llms/text-generation/vllm-ray/requirements.txt | 2 +- comps/llms/text-generation/vllm/README.md | 6 ++++++ comps/llms/text-generation/vllm/build_docker_vllm.sh | 4 ++-- comps/llms/text-generation/vllm/docker/Dockerfile.hpu | 2 +- comps/llms/text-generation/vllm/docker_compose_llm.yaml | 2 +- comps/llms/text-generation/vllm/launch_vllm_service.sh | 4 ++-- tests/test_llms_text-generation_vllm-ray.sh | 4 ++-- tests/test_llms_text-generation_vllm.sh | 4 ++-- 12 files changed, 21 insertions(+), 15 deletions(-) diff --git a/comps/llms/README.md b/comps/llms/README.md index 15c7c366c..584f2ba12 100644 --- a/comps/llms/README.md +++ b/comps/llms/README.md @@ -32,7 +32,7 @@ docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/h ```bash export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80" +docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data opea/vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80" ``` ## 1.2.3 Start Ray Service diff --git a/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh b/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh index 9e9fe3b71..8c4c13d3b 100755 --- a/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh +++ b/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh @@ -5,7 +5,7 @@ cd ../../../../ docker build \ -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray \ - -t vllm_ray:habana \ + -t opea/vllm_ray:habana \ --network=host \ --build-arg http_proxy=${http_proxy} \ --build-arg https_proxy=${https_proxy} \ diff --git a/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml b/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml index a3ae3ec04..76d3423f1 100644 --- a/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: vllm-ray-service: - image: vllm_ray:habana + image: opea/vllm_ray:habana container_name: vllm-ray-gaudi-server ports: - "8006:8000" diff --git a/comps/llms/text-generation/vllm-ray/launch_vllmray.sh b/comps/llms/text-generation/vllm-ray/launch_vllmray.sh index fcff33265..895e6a066 100755 --- a/comps/llms/text-generation/vllm-ray/launch_vllmray.sh +++ b/comps/llms/text-generation/vllm-ray/launch_vllmray.sh @@ -39,5 +39,5 @@ docker run -d --rm \ -e HTTPS_PROXY=$https_proxy \ -e HTTP_PROXY=$https_proxy \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - vllm_ray:habana \ + opea/vllm_ray:habana \ /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $model_name --tensor_parallel_size $parallel_number --enforce_eager $enforce_eager" diff --git a/comps/llms/text-generation/vllm-ray/requirements.txt b/comps/llms/text-generation/vllm-ray/requirements.txt index 2e8b8c578..083a2910b 100644 --- a/comps/llms/text-generation/vllm-ray/requirements.txt +++ b/comps/llms/text-generation/vllm-ray/requirements.txt @@ -11,7 +11,7 @@ opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator ray[serve]>=2.10 -setuptools==69.5.1 +setuptools shortuuid transformers uvicorn diff --git a/comps/llms/text-generation/vllm/README.md b/comps/llms/text-generation/vllm/README.md index 1445d1bd1..3f0184ed9 100644 --- a/comps/llms/text-generation/vllm/README.md +++ b/comps/llms/text-generation/vllm/README.md @@ -50,6 +50,12 @@ bash ./build_docker_vllm.sh hpu Set `hw_mode` to `hpu`. +Note: If you want to enable tensor parallel, please set `setuptools==69.5.1` in Dockerfile.hpu before build docker with following command. + +``` +sed -i "s/RUN pip install setuptools/RUN pip install setuptools==69.5.1/g" docker/Dockerfile.hpu +``` + #### Launch vLLM service on single node For small model, we can just use single node. diff --git a/comps/llms/text-generation/vllm/build_docker_vllm.sh b/comps/llms/text-generation/vllm/build_docker_vllm.sh index 3680f076c..c1037a5c7 100644 --- a/comps/llms/text-generation/vllm/build_docker_vllm.sh +++ b/comps/llms/text-generation/vllm/build_docker_vllm.sh @@ -30,9 +30,9 @@ fi # Build the docker image for vLLM based on the hardware mode if [ "$hw_mode" = "hpu" ]; then - docker build -f docker/Dockerfile.hpu -t vllm:hpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build -f docker/Dockerfile.hpu -t opea/vllm:hpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy else git clone https://github.com/vllm-project/vllm.git cd ./vllm/ - docker build -f Dockerfile.cpu -t vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build -f Dockerfile.cpu -t opea/vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy fi diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu index c7093d4c0..af5d70852 100644 --- a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu @@ -9,7 +9,7 @@ RUN pip install --upgrade-strategy eager optimum[habana] RUN pip install -v git+https://github.com/HabanaAI/vllm-fork.git@cf6952d -RUN pip install setuptools==69.5.1 +RUN pip install setuptools RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ service ssh restart diff --git a/comps/llms/text-generation/vllm/docker_compose_llm.yaml b/comps/llms/text-generation/vllm/docker_compose_llm.yaml index 818fdf54a..205c9293a 100644 --- a/comps/llms/text-generation/vllm/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: vllm-service: - image: vllm:hpu + image: opea/vllm:hpu container_name: vllm-gaudi-server ports: - "8008:80" diff --git a/comps/llms/text-generation/vllm/launch_vllm_service.sh b/comps/llms/text-generation/vllm/launch_vllm_service.sh index 0b225023c..0c7ed90de 100644 --- a/comps/llms/text-generation/vllm/launch_vllm_service.sh +++ b/comps/llms/text-generation/vllm/launch_vllm_service.sh @@ -38,7 +38,7 @@ volume=$PWD/data # Build the Docker run command based on hardware mode if [ "$hw_mode" = "hpu" ]; then - docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture " + docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture " else - docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 vllm:cpu --model $model_name --host 0.0.0.0 --port 80 + docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm:cpu --model $model_name --host 0.0.0.0 --port 80 fi diff --git a/tests/test_llms_text-generation_vllm-ray.sh b/tests/test_llms_text-generation_vllm-ray.sh index 8ecb487e9..7ab235a93 100644 --- a/tests/test_llms_text-generation_vllm-ray.sh +++ b/tests/test_llms_text-generation_vllm-ray.sh @@ -12,7 +12,7 @@ function build_docker_images() { cd $WORKPATH docker build \ -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray \ - -t vllm_ray:habana --network=host . + -t opea/vllm_ray:habana --network=host . ## Build OPEA microservice docker cd $WORKPATH @@ -34,7 +34,7 @@ function start_service() { --ipc=host \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ -p $port_number:8000 \ - vllm_ray:habana \ + opea/vllm_ray:habana \ /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL --tensor_parallel_size 2 --enforce_eager False" export vLLM_RAY_ENDPOINT="http://${ip_address}:${port_number}" diff --git a/tests/test_llms_text-generation_vllm.sh b/tests/test_llms_text-generation_vllm.sh index c5e7faa4b..48bee9ae8 100644 --- a/tests/test_llms_text-generation_vllm.sh +++ b/tests/test_llms_text-generation_vllm.sh @@ -12,7 +12,7 @@ function build_docker_images() { cd $WORKPATH/comps/llms/text-generation/vllm docker build \ -f docker/Dockerfile.hpu \ - -t vllm:hpu \ + -t opea/vllm:hpu \ --shm-size=128g . ## Build OPEA microservice docker @@ -35,7 +35,7 @@ function start_service() { --cap-add=sys_nice \ --ipc=host \ -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - vllm:hpu \ + opea/vllm:hpu \ /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048" export vLLM_ENDPOINT="http://${ip_address}:${port_number}" From 2098b913af374f34aee9ac6a4ee7327c321a41ef Mon Sep 17 00:00:00 2001 From: Yogesh Pandey Date: Sat, 10 Aug 2024 20:05:21 +0530 Subject: [PATCH 08/15] Added unified ports for Chat History Microservice. (#449) * Added unified ports Signed-off-by: Yogesh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added unified ports for compose Signed-off-by: Yogesh --------- Signed-off-by: Yogesh Co-authored-by: Yogesh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/chathistory/mongo/README.md | 6 +++--- comps/chathistory/mongo/chathistory_mongo.py | 14 ++++++-------- .../docker/docker-compose-chathistory-mongo.yaml | 2 -- tests/test_chathistory_mongo.sh | 2 +- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/comps/chathistory/mongo/README.md b/comps/chathistory/mongo/README.md index 4a132e9c8..2eaa62e55 100644 --- a/comps/chathistory/mongo/README.md +++ b/comps/chathistory/mongo/README.md @@ -60,7 +60,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6013/v1/chathistory/get \ + http://${host_ip}:6012/v1/chathistory/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -71,7 +71,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6013/v1/chathistory/get \ + http://${host_ip}:6012/v1/chathistory/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -97,7 +97,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6014/v1/chathistory/delete \ + http://${host_ip}:6012/v1/chathistory/delete \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ diff --git a/comps/chathistory/mongo/chathistory_mongo.py b/comps/chathistory/mongo/chathistory_mongo.py index 5b65d1d8e..1993503da 100644 --- a/comps/chathistory/mongo/chathistory_mongo.py +++ b/comps/chathistory/mongo/chathistory_mongo.py @@ -35,7 +35,7 @@ def get_first_string(value): @register_microservice( - name="opea_service@chathistory_mongo_create", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/create", host="0.0.0.0", input_datatype=ChatMessage, @@ -70,11 +70,11 @@ async def create_documents(document: ChatMessage): @register_microservice( - name="opea_service@chathistory_mongo_get", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/get", host="0.0.0.0", input_datatype=ChatId, - port=6013, + port=6012, ) async def get_documents(document: ChatId): """Retrieves documents from the document store based on the provided ChatId. @@ -100,11 +100,11 @@ async def get_documents(document: ChatId): @register_microservice( - name="opea_service@chathistory_mongo_delete", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/delete", host="0.0.0.0", input_datatype=ChatId, - port=6014, + port=6012, ) async def delete_documents(document: ChatId): """Deletes a document from the document store based on the provided ChatId. @@ -130,6 +130,4 @@ async def delete_documents(document: ChatId): if __name__ == "__main__": - opea_microservices["opea_service@chathistory_mongo_get"].start() - opea_microservices["opea_service@chathistory_mongo_create"].start() - opea_microservices["opea_service@chathistory_mongo_delete"].start() + opea_microservices["opea_service@chathistory_mongo"].start() diff --git a/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml b/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml index 97e17e077..e272d4f91 100644 --- a/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml +++ b/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml @@ -19,8 +19,6 @@ services: container_name: chathistory-mongo-server ports: - "6012:6012" - - "6013:6013" - - "6014:6014" ipc: host environment: http_proxy: ${http_proxy} diff --git a/tests/test_chathistory_mongo.sh b/tests/test_chathistory_mongo.sh index 1e60a59c9..005a1a6ef 100755 --- a/tests/test_chathistory_mongo.sh +++ b/tests/test_chathistory_mongo.sh @@ -22,7 +22,7 @@ function build_docker_images() { function start_service() { - docker run -d --name="test-comps-chathistory-mongo-server" -p 6013:6013 -p 6012:6012 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:comps + docker run -d --name="test-comps-chathistory-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:comps sleep 10s } From ed776acc4f585054994676df70dd124412adc185 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Sat, 10 Aug 2024 23:51:57 +0800 Subject: [PATCH 09/15] Support multiple image sources for LVM microservice (#451) Signed-off-by: lvliang-intel --- comps/cores/mega/gateway.py | 25 ++++++++++++++++++++++--- comps/lvms/lvm_tgi.py | 2 +- requirements.txt | 1 + 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index 324f7081e..8ad31c841 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -2,10 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 import base64 +import os +from io import BytesIO import requests from fastapi import Request from fastapi.responses import StreamingResponse +from PIL import Image from ..proto.api_protocol import ( AudioChatCompletionRequest, @@ -74,6 +77,7 @@ def list_parameter(self): pass def _handle_message(self, messages): + images = [] if isinstance(messages, str): prompt = messages else: @@ -104,7 +108,6 @@ def _handle_message(self, messages): raise ValueError(f"Unknown role: {msg_role}") if system_prompt: prompt = system_prompt + "\n" - images = [] for role, message in messages_dict.items(): if isinstance(message, tuple): text, image_list = message @@ -113,8 +116,24 @@ def _handle_message(self, messages): else: prompt += role + ":" for img in image_list: - response = requests.get(img) - images.append(base64.b64encode(response.content).decode("utf-8")) + # URL + if img.startswith("http://") or img.startswith("https://"): + response = requests.get(img) + image = Image.open(BytesIO(response.content)).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Local Path + elif os.path.exists(img): + image = Image.open(img).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Bytes + else: + img_b64_str = img + + images.append(img_b64_str) else: if message: prompt += role + ": " + message + "\n" diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py index b7383fa0c..b2eddf9f1 100644 --- a/comps/lvms/lvm_tgi.py +++ b/comps/lvms/lvm_tgi.py @@ -48,7 +48,7 @@ async def lvm(request: LVMDoc): async def stream_generator(): chat_response = "" text_generation = await lvm_client.text_generation( - prompt=prompt, + prompt=image_prompt, stream=streaming, max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty, diff --git a/requirements.txt b/requirements.txt index 53bfbf8d4..ef12b2fc4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ httpx opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk +Pillow prometheus-fastapi-instrumentator pyyaml requests From ac4a77798ef2f6d093550a234671511e067625f0 Mon Sep 17 00:00:00 2001 From: lkk <33276950+lkk12014402@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:41:06 +0800 Subject: [PATCH 10/15] align vllm-ray response format to tgi response format (#452) * align vllm-ray response format to tgi response format * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: changwangss Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/llms/text-generation/vllm-ray/llm.py | 26 ++++------------------ 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/comps/llms/text-generation/vllm-ray/llm.py b/comps/llms/text-generation/vllm-ray/llm.py index dc0c4b669..6d8abd028 100644 --- a/comps/llms/text-generation/vllm-ray/llm.py +++ b/comps/llms/text-generation/vllm-ray/llm.py @@ -21,18 +21,6 @@ from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice -@traceable(run_type="tool") -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - @register_microservice( name="opea_service@llm_vllm_ray", service_type=ServiceType.LLM, @@ -56,19 +44,13 @@ def llm_generate(input: LLMParamsDoc): if input.streaming: - async def stream_generator(): + def stream_generator(): chat_response = "" - async for text in llm.astream(input.query): + for text in llm.stream(input.query): text = text.content chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text + chunk_repr = repr(text.encode("utf-8")) + yield f"data: {chunk_repr}\n\n" print(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n" From 1eaf6b7afd7a7f8c7aee67236c55a5fd8090e0a0 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Mon, 12 Aug 2024 09:31:44 +0800 Subject: [PATCH 11/15] Support launch as Non-Root user in all published container images. (#406) Signed-off-by: zepan --- comps/asr/Dockerfile | 13 +++++++++---- comps/asr/whisper/Dockerfile | 17 ++++++++++++----- comps/asr/whisper/Dockerfile_hpu | 14 ++++++++++---- .../langchain-mosec/mosec-docker/Dockerfile | 14 +++++++++----- .../text-generation/vllm-xft/docker/Dockerfile | 1 - .../text-generation/vllm/docker/Dockerfile.hpu | 11 ++++++----- comps/llms/utils/lm-eval/Dockerfile.cpu | 6 ++++-- comps/lvms/Dockerfile | 13 ++++++++----- comps/lvms/llava/Dockerfile | 13 ++++++++----- comps/lvms/llava/Dockerfile_hpu | 14 +++++++++----- .../langchain-mosec/mosec-docker/Dockerfile | 12 ++++++++---- comps/tts/Dockerfile | 13 ++++++++----- comps/tts/speecht5/Dockerfile | 13 +++++++++---- comps/tts/speecht5/Dockerfile_hpu | 14 +++++++++----- .../langchain/chroma/docker/Dockerfile | 6 ++++-- tests/test_asr_whisper.sh | 14 ++++++++------ tests/test_embeddings_langchain-mosec.sh | 10 +++++++++- tests/test_lvms_llava.sh | 13 ++++++++----- tests/test_reranks_langchain-mosec.sh | 2 +- tests/test_tts_speecht5.sh | 12 +++++++----- tests/test_web_retrievers_langchain_chroma.sh | 3 ++- 21 files changed, 148 insertions(+), 80 deletions(-) diff --git a/comps/asr/Dockerfile b/comps/asr/Dockerfile index cc2740b7d..2e8731e85 100644 --- a/comps/asr/Dockerfile +++ b/comps/asr/Dockerfile @@ -3,15 +3,20 @@ FROM python:3.11-slim +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user + ENV LANG=C.UTF-8 -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr +WORKDIR /home/user/comps/asr ENTRYPOINT ["python", "asr.py"] \ No newline at end of file diff --git a/comps/asr/whisper/Dockerfile b/comps/asr/whisper/Dockerfile index c3e2a0025..f0e09e5fb 100644 --- a/comps/asr/whisper/Dockerfile +++ b/comps/asr/whisper/Dockerfile @@ -3,21 +3,28 @@ FROM python:3.11-slim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + # Set environment variables ENV LANG=en_US.UTF-8 -ENV PYTHONPATH=/home/user # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt && \ + pip list -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr/whisper +WORKDIR /home/user/comps/asr/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/asr/whisper/Dockerfile_hpu b/comps/asr/whisper/Dockerfile_hpu index 128b8d5cc..85f467606 100644 --- a/comps/asr/whisper/Dockerfile_hpu +++ b/comps/asr/whisper/Dockerfile_hpu @@ -4,6 +4,10 @@ # HABANA environment FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana @@ -12,15 +16,17 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana RUN apt-get update \ && apt-get install -y ffmpeg -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr/whisper +WORKDIR /home/user/comps/asr/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"] \ No newline at end of file diff --git a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile index eec3020a4..a8241e04e 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile +++ b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile @@ -2,22 +2,26 @@ # SPDX-License-Identifier: Apache-2.0 From ubuntu:22.04 +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG DEBIAN_FRONTEND=noninteractive ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive +RUN apt update && apt install -y python3 python3-pip -COPY comps /root/comps +USER user +COPY comps /home/user/comps -RUN apt update && apt install -y python3 python3-pip RUN pip3 install torch==2.2.2 torchvision --index-url https://download.pytorch.org/whl/cpu RUN pip3 install intel-extension-for-pytorch==2.2.0 RUN pip3 install transformers RUN pip3 install llmspec mosec -RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh-v1.5 --local-dir /root/bge-large-zh-v1.5 +RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh-v1.5 --local-dir /home/user/bge-large-zh-v1.5 -ENV EMB_MODEL="/root/bge-large-zh-v1.5/" +ENV EMB_MODEL="/home/user/bge-large-zh-v1.5/" -WORKDIR /root/comps/embeddings/langchain-mosec/mosec-docker +WORKDIR /home/user/comps/embeddings/langchain-mosec/mosec-docker CMD ["python3", "server-ipex.py"] diff --git a/comps/llms/text-generation/vllm-xft/docker/Dockerfile b/comps/llms/text-generation/vllm-xft/docker/Dockerfile index db682e04f..95cd596d7 100644 --- a/comps/llms/text-generation/vllm-xft/docker/Dockerfile +++ b/comps/llms/text-generation/vllm-xft/docker/Dockerfile @@ -95,4 +95,3 @@ RUN chmod +x /root/comps/llms/text-generation/vllm-xft/run.sh WORKDIR /root/comps/llms/text-generation/vllm-xft/ ENTRYPOINT ["/root/comps/llms/text-generation/vllm-xft/run.sh"] - diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu index af5d70852..730fe37e7 100644 --- a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu @@ -1,8 +1,12 @@ # FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ENV LANG=en_US.UTF-8 - +RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + service ssh restart +USER user WORKDIR /root RUN pip install --upgrade-strategy eager optimum[habana] @@ -11,9 +15,6 @@ RUN pip install -v git+https://github.com/HabanaAI/vllm-fork.git@cf6952d RUN pip install setuptools -RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ - service ssh restart - ENV no_proxy=localhost,127.0.0.1 ENV PT_HPU_LAZY_ACC_PAR_MODE=0 diff --git a/comps/llms/utils/lm-eval/Dockerfile.cpu b/comps/llms/utils/lm-eval/Dockerfile.cpu index 933a523a5..ceb98887d 100644 --- a/comps/llms/utils/lm-eval/Dockerfile.cpu +++ b/comps/llms/utils/lm-eval/Dockerfile.cpu @@ -1,6 +1,8 @@ ARG UBUNTU_VER=22.04 FROM ubuntu:${UBUNTU_VER} as devel - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG REPO_COMPS=https://github.com/opea-project/GenAIComps.git ARG BRANCH=main ENV LANG=C.UTF-8 @@ -16,7 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ git \ vim \ wget - +USER user RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /home/user/GenAIComps/ && \ cd /home/user/GenAIComps/ && python3 setup.py install && \ pip install --no-cache-dir -r /home/user/GenAIComps/comps/llms/utils/lm-eval/requirements.txt diff --git a/comps/lvms/Dockerfile b/comps/lvms/Dockerfile index 73be60ba6..734d2cdb6 100644 --- a/comps/lvms/Dockerfile +++ b/comps/lvms/Dockerfile @@ -2,17 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user # Set environment variables ENV LANG=en_US.UTF-8 -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms +WORKDIR /home/user/comps/lvms ENTRYPOINT ["python", "lvm.py"] \ No newline at end of file diff --git a/comps/lvms/llava/Dockerfile b/comps/lvms/llava/Dockerfile index efd2b1d45..07d5cc41d 100644 --- a/comps/lvms/llava/Dockerfile +++ b/comps/lvms/llava/Dockerfile @@ -2,18 +2,21 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms/llava +WORKDIR /home/user/comps/lvms/llava ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/lvms/llava/Dockerfile_hpu b/comps/lvms/llava/Dockerfile_hpu index bb2bf0676..272fad826 100644 --- a/comps/lvms/llava/Dockerfile_hpu +++ b/comps/lvms/llava/Dockerfile_hpu @@ -3,21 +3,25 @@ # HABANA environment FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu -RUN rm -rf /etc/ssh/ssh_host* +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +RUN rm -rf /etc/ssh/ssh_host* +USER user # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana -COPY comps /home/comps +COPY comps /home/user/comps # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms/llava +WORKDIR /home/user/comps/lvms/llava ENTRYPOINT ["python", "llava_server.py"] \ No newline at end of file diff --git a/comps/reranks/langchain-mosec/mosec-docker/Dockerfile b/comps/reranks/langchain-mosec/mosec-docker/Dockerfile index 0c634fb90..8fca32833 100644 --- a/comps/reranks/langchain-mosec/mosec-docker/Dockerfile +++ b/comps/reranks/langchain-mosec/mosec-docker/Dockerfile @@ -2,22 +2,26 @@ # SPDX-License-Identifier: Apache-2.0 From ubuntu:22.04 +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG DEBIAN_FRONTEND=noninteractive ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive -COPY comps /root/comps +COPY comps /home/user/comps RUN apt update && apt install -y python3 python3-pip +USER user RUN pip3 install torch==2.2.2 torchvision --trusted-host download.pytorch.org --index-url https://download.pytorch.org/whl/cpu RUN pip3 install intel-extension-for-pytorch==2.2.0 RUN pip3 install transformers sentence-transformers RUN pip3 install llmspec mosec -RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-large --local-dir /root/bge-reranker-large +RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-large --local-dir /home/user/bge-reranker-large -ENV EMB_MODEL="/root/bge-reranker-large/" +ENV EMB_MODEL="/home/user/bge-reranker-large/" -WORKDIR /root/comps/reranks/langchain-mosec/mosec-docker +WORKDIR /home/user/comps/reranks/langchain-mosec/mosec-docker CMD ["python3", "server-ipex.py"] diff --git a/comps/tts/Dockerfile b/comps/tts/Dockerfile index 73272567d..ae00a24e3 100644 --- a/comps/tts/Dockerfile +++ b/comps/tts/Dockerfile @@ -2,16 +2,19 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user ENV LANG=C.UTF-8 -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts +WORKDIR /home/user/comps/tts ENTRYPOINT ["python", "tts.py"] \ No newline at end of file diff --git a/comps/tts/speecht5/Dockerfile b/comps/tts/speecht5/Dockerfile index e4afd07db..5ddd43e2c 100644 --- a/comps/tts/speecht5/Dockerfile +++ b/comps/tts/speecht5/Dockerfile @@ -2,6 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ # Set environment variables ENV LANG=en_US.UTF-8 @@ -12,13 +15,15 @@ RUN apt-get update \ && apt-get install -y ffmpeg \ && apt-get install -y curl -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts/speecht5 +WORKDIR /home/user/comps/tts/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/tts/speecht5/Dockerfile_hpu b/comps/tts/speecht5/Dockerfile_hpu index 8f889b86a..330ae276e 100644 --- a/comps/tts/speecht5/Dockerfile_hpu +++ b/comps/tts/speecht5/Dockerfile_hpu @@ -3,7 +3,9 @@ # HABANA environment FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ RUN rm -rf /etc/ssh/ssh_host* # Set environment variables @@ -15,15 +17,17 @@ RUN apt-get update \ && apt-get install -y ffmpeg \ && apt-get install -y curl -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts/speecht5 +WORKDIR /home/user/comps/tts/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"] \ No newline at end of file diff --git a/comps/web_retrievers/langchain/chroma/docker/Dockerfile b/comps/web_retrievers/langchain/chroma/docker/Dockerfile index a6c3d80d5..c391fefe2 100644 --- a/comps/web_retrievers/langchain/chroma/docker/Dockerfile +++ b/comps/web_retrievers/langchain/chroma/docker/Dockerfile @@ -2,14 +2,16 @@ # SPDX-License-Identifier: Apache-2.0 FROM langchain/langchain:latest - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG ARCH="cpu" # Set this to "cpu" or "gpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ libjemalloc-dev \ vim - +USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/tests/test_asr_whisper.sh b/tests/test_asr_whisper.sh index 5e6e4a8c8..1cfc4a093 100644 --- a/tests/test_asr_whisper.sh +++ b/tests/test_asr_whisper.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,23 +10,25 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/whisper:latest -f comps/asr/whisper/Dockerfile . - docker build -t opea/asr:latest -f comps/asr/Dockerfile . + docker build -t opea/whisper:comps -f comps/asr/whisper/Dockerfile . + docker build -t opea/asr:comps -f comps/asr/Dockerfile . } function start_service() { unset http_proxy - docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper:latest - docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9099:9099 --ipc=host opea/asr:latest + docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper:comps + docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9089:9099 --ipc=host opea/asr:comps sleep 3m } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9099/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') if [[ $result == *"you"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-asr-whisper + docker logs test-comps-asr exit 1 fi diff --git a/tests/test_embeddings_langchain-mosec.sh b/tests/test_embeddings_langchain-mosec.sh index 198906346..a2f9aeb2a 100644 --- a/tests/test_embeddings_langchain-mosec.sh +++ b/tests/test_embeddings_langchain-mosec.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -36,6 +36,14 @@ function validate_microservice() { -X POST \ -d '{"text":"What is Deep Learning?"}' \ -H 'Content-Type: application/json' + if [ $? -eq 0 ]; then + echo "curl command executed successfully" + else + echo "curl command failed" + docker logs test-comps-embedding-langchain-mosec-endpoint + docker logs test-comps-embedding-langchain-mosec-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_lvms_llava.sh b/tests/test_lvms_llava.sh index da7c740a9..d9d4258e7 100644 --- a/tests/test_lvms_llava.sh +++ b/tests/test_lvms_llava.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,23 +10,26 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/llava:latest -f comps/lvms/llava/Dockerfile . - docker build --no-cache -t opea/lvm:latest -f comps/lvms/Dockerfile . + docker build -t opea/llava:comps -f comps/lvms/llava/Dockerfile . + docker build --no-cache -t opea/lvm:comps -f comps/lvms/Dockerfile . } function start_service() { unset http_proxy - docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:8399 --ipc=host opea/llava:latest - docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:latest + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:8399 --ipc=host opea/llava:comps + docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:comps sleep 8m } function validate_microservice() { + result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') if [[ $result == *"yellow"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-lvm-llava + docker logs test-comps-lvm exit 1 fi diff --git a/tests/test_reranks_langchain-mosec.sh b/tests/test_reranks_langchain-mosec.sh index 8d25d5fed..42f100156 100644 --- a/tests/test_reranks_langchain-mosec.sh +++ b/tests/test_reranks_langchain-mosec.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') diff --git a/tests/test_tts_speecht5.sh b/tests/test_tts_speecht5.sh index d9426bdff..1982271fa 100644 --- a/tests/test_tts_speecht5.sh +++ b/tests/test_tts_speecht5.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,14 +10,14 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/speecht5:latest -f comps/tts/speecht5/Dockerfile . - docker build -t opea/tts:latest -f comps/tts/Dockerfile . + docker build -t opea/speecht5:comps -f comps/tts/speecht5/Dockerfile . + docker build -t opea/tts:comps -f comps/tts/Dockerfile . } function start_service() { unset http_proxy - docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7055:7055 --ipc=host opea/speecht5:latest - docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:7055 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9088:9088 --ipc=host opea/tts:latest + docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7055:7055 --ipc=host opea/speecht5:comps + docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:7055 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9088:9088 --ipc=host opea/tts:comps sleep 3m } @@ -27,6 +27,8 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." + docker logs test-comps-tts-speecht5 + docker logs test-comps-tts exit 1 fi diff --git a/tests/test_web_retrievers_langchain_chroma.sh b/tests/test_web_retrievers_langchain_chroma.sh index d1e2c3ed5..132e7233d 100644 --- a/tests/test_web_retrievers_langchain_chroma.sh +++ b/tests/test_web_retrievers_langchain_chroma.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -36,6 +36,7 @@ function validate_microservice() { -d "{\"text\":\"What is OPEA?\",\"embedding\":${test_embedding}}" \ -H 'Content-Type: application/json' docker logs test-comps-web-retriever-tei-endpoint + docker logs test-comps-web-retriever-chroma-server } function stop_docker() { From ec4143e74f83a3662c972a303191e7d67dfafbf5 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Mon, 12 Aug 2024 10:25:43 +0800 Subject: [PATCH 12/15] Add E2E example test (#453) Signed-off-by: chensuyue --- ...t-test-matrix.yml => _get-test-matrix.yml} | 0 ...d-on-manual.yml => manual-image-build.yml} | 0 .../{code-scan.yml => mix-code-scan.yml} | 0 ...vice-test.yml => mix-megaservice-test.yml} | 2 +- .../{trellix.yml => mix-trellix.yml} | 0 ...test.yaml => pr-dockerfile-path-scan.yaml} | 0 .github/workflows/pr-examples-test.yml | 72 +++++++++++++++++ ...vice-test.yml => pr-microservice-test.yml} | 10 ++- ...build-on-push.yml => push-image-build.yml} | 2 +- .github/workflows/reuse-image-build.yml | 32 -------- ...-schedule.yml => schedule-image-build.yml} | 0 .../scripts/docker_images_build_push.sh | 77 ------------------ tests/{test_asr_whisper.sh => test_asr.sh} | 0 tests/{test_tts_speecht5.sh => test_tts.sh} | 0 tests/test_workflow_chatqna.py | 80 ------------------- 15 files changed, 80 insertions(+), 195 deletions(-) rename .github/workflows/{reuse-get-test-matrix.yml => _get-test-matrix.yml} (100%) rename .github/workflows/{image-build-on-manual.yml => manual-image-build.yml} (100%) rename .github/workflows/{code-scan.yml => mix-code-scan.yml} (100%) rename .github/workflows/{megaservice-test.yml => mix-megaservice-test.yml} (98%) rename .github/workflows/{trellix.yml => mix-trellix.yml} (100%) rename .github/workflows/{test.yaml => pr-dockerfile-path-scan.yaml} (100%) create mode 100644 .github/workflows/pr-examples-test.yml rename .github/workflows/{microservice-test.yml => pr-microservice-test.yml} (89%) rename .github/workflows/{image-build-on-push.yml => push-image-build.yml} (98%) delete mode 100644 .github/workflows/reuse-image-build.yml rename .github/workflows/{image-build-on-schedule.yml => schedule-image-build.yml} (100%) delete mode 100644 .github/workflows/scripts/docker_images_build_push.sh rename tests/{test_asr_whisper.sh => test_asr.sh} (100%) rename tests/{test_tts_speecht5.sh => test_tts.sh} (100%) delete mode 100644 tests/test_workflow_chatqna.py diff --git a/.github/workflows/reuse-get-test-matrix.yml b/.github/workflows/_get-test-matrix.yml similarity index 100% rename from .github/workflows/reuse-get-test-matrix.yml rename to .github/workflows/_get-test-matrix.yml diff --git a/.github/workflows/image-build-on-manual.yml b/.github/workflows/manual-image-build.yml similarity index 100% rename from .github/workflows/image-build-on-manual.yml rename to .github/workflows/manual-image-build.yml diff --git a/.github/workflows/code-scan.yml b/.github/workflows/mix-code-scan.yml similarity index 100% rename from .github/workflows/code-scan.yml rename to .github/workflows/mix-code-scan.yml diff --git a/.github/workflows/megaservice-test.yml b/.github/workflows/mix-megaservice-test.yml similarity index 98% rename from .github/workflows/megaservice-test.yml rename to .github/workflows/mix-megaservice-test.yml index 7e55e2bf2..83c826cd7 100644 --- a/.github/workflows/megaservice-test.yml +++ b/.github/workflows/mix-megaservice-test.yml @@ -10,7 +10,7 @@ on: branches: [main] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - - .github/workflows/megaservice-test.yml + - .github/workflows/mix-megaservice-test.yml - comps/cores/** - requirements.txt - setup.py diff --git a/.github/workflows/trellix.yml b/.github/workflows/mix-trellix.yml similarity index 100% rename from .github/workflows/trellix.yml rename to .github/workflows/mix-trellix.yml diff --git a/.github/workflows/test.yaml b/.github/workflows/pr-dockerfile-path-scan.yaml similarity index 100% rename from .github/workflows/test.yaml rename to .github/workflows/pr-dockerfile-path-scan.yaml diff --git a/.github/workflows/pr-examples-test.yml b/.github/workflows/pr-examples-test.yml new file mode 100644 index 000000000..92354c5c2 --- /dev/null +++ b/.github/workflows/pr-examples-test.yml @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Example-test + +on: + pull_request_target: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - .github/workflows/pr-examples-test.yml + - comps/cores/** + - comps/embeddings/langchain/** + - comps/retrievers/langchain/redis/** + - comps/reranks/tei/** + - comps/llms/text-generation/tgi/** + - comps/dataprep/redis/langchain/** + - requirements.txt + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + example-test: + runs-on: gaudi-01-3 + steps: + - name: Clean Up Working Directory + run: | + sudo rm -rf ${{github.workspace}}/* || true + echo y | docker system prune + docker rmi $(docker images --filter reference="*/*:comps" -q) || true + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + ref: "refs/pull/${{ github.event.number }}/merge" + + - name: Run ChatQnA + env: + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + run: | + git clone https://github.com/opea-project/GenAIExamples.git + cd ${{ github.workspace }}/GenAIExamples/ChatQnA/docker/gaudi + sed -i "s#:latest#:comps#g" compose.yaml + cat compose.yaml + + cd ${{ github.workspace }}/GenAIExamples/ChatQnA/tests + GenAIComps_dir=${{github.workspace}} + sed -i '/GenAIComps.git/d' test_chatqna_on_gaudi.sh + sed -i "s#cd GenAIComps#cd ${GenAIComps_dir}#g" test_chatqna_on_gaudi.sh + sed -i "s#docker build -t#docker build --no-cache -q -t#g" test_chatqna_on_gaudi.sh + sed -i "s#:latest#:comps#g" test_chatqna_on_gaudi.sh + cat test_chatqna_on_gaudi.sh + + echo "Run test..." + timeout 50m bash test_chatqna_on_gaudi.sh + + - name: Clean up container + if: cancelled() || failure() + run: | + cd ${{ github.workspace }}/GenAIExamples/ChatQnA/docker/gaudi + docker compose stop && docker compose rm -f + docker system prune -f + + - name: Publish pipeline artifact + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: "Examples-Test-Logs" + path: ${{ github.workspace }}/GenAIExamples/ChatQnA/tests/*.log diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/pr-microservice-test.yml similarity index 89% rename from .github/workflows/microservice-test.yml rename to .github/workflows/pr-microservice-test.yml index e40745f0b..9abdce833 100644 --- a/.github/workflows/microservice-test.yml +++ b/.github/workflows/pr-microservice-test.yml @@ -12,7 +12,7 @@ on: - tests/** - "!**.md" - "!**.txt" - - .github/workflows/microservice-test.yml + - .github/workflows/pr-microservice-test.yml # If there is a new commit, the previous jobs will be canceled concurrency: @@ -21,7 +21,7 @@ concurrency: jobs: job1: - uses: ./.github/workflows/reuse-get-test-matrix.yml + uses: ./.github/workflows/_get-test-matrix.yml Microservice-test: needs: job1 @@ -31,7 +31,9 @@ jobs: continue-on-error: true steps: - name: Clean Up Working Directory - run: sudo rm -rf ${{github.workspace}}/* + run: | + sudo rm -rf ${{github.workspace}}/* + docker system prune -f - name: Checkout out Repo uses: actions/checkout@v4 @@ -57,7 +59,7 @@ jobs: run: | cid=$(docker ps -aq --filter "name=test-comps-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - echo y | docker system prune --all + docker system prune -f - name: Publish pipeline artifact if: ${{ !cancelled() }} diff --git a/.github/workflows/image-build-on-push.yml b/.github/workflows/push-image-build.yml similarity index 98% rename from .github/workflows/image-build-on-push.yml rename to .github/workflows/push-image-build.yml index a72d13a61..5472111dd 100644 --- a/.github/workflows/image-build-on-push.yml +++ b/.github/workflows/push-image-build.yml @@ -10,7 +10,7 @@ on: - comps/** - "!**.md" - "!**.txt" - - .github/workflows/image-build-on-push.yml + - .github/workflows/push-image-build.yml concurrency: group: ${{ github.workflow }}-${{ github.ref }}-on-push diff --git a/.github/workflows/reuse-image-build.yml b/.github/workflows/reuse-image-build.yml deleted file mode 100644 index e2ed6883b..000000000 --- a/.github/workflows/reuse-image-build.yml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -name: Image Build -permissions: read-all -on: - workflow_call: - inputs: - micro_service: - required: true - type: string - -jobs: - micro-image-build: - continue-on-error: true - strategy: - matrix: - node: [docker-build-xeon, docker-build-gaudi] - runs-on: ${{ matrix.node }} - steps: - - name: Checkout out Repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Building MicroService Docker Image - id: build-microservice-image - env: - micro_service: ${{ inputs.micro_service }} - hardware: ${{ matrix.node }} - run: | - bash .github/workflows/scripts/docker_images_build_push.sh ${micro_service} ${hardware} diff --git a/.github/workflows/image-build-on-schedule.yml b/.github/workflows/schedule-image-build.yml similarity index 100% rename from .github/workflows/image-build-on-schedule.yml rename to .github/workflows/schedule-image-build.yml diff --git a/.github/workflows/scripts/docker_images_build_push.sh b/.github/workflows/scripts/docker_images_build_push.sh deleted file mode 100644 index eaf4d78f3..000000000 --- a/.github/workflows/scripts/docker_images_build_push.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKSPACE=$PWD -IMAGE_REPO=${IMAGE_REPO:-$OPEA_IMAGE_REPO} -IMAGE_TAG=${IMAGE_TAG:-latest} - -function docker_build() { - # docker_build - IMAGE_NAME=$1 - micro_service=$2 - dockerfile_path=${WORKSPACE}/comps/${micro_service} - if [[ "$IMAGE_NAME" == *"gaudi" ]]; then - dockerfile_name="Dockerfile_hpu" - else - dockerfile_name="Dockerfile" - fi - if [ -f "$dockerfile_path/$dockerfile_name" ]; then - DOCKERFILE_PATH="$dockerfile_path/$dockerfile_name" - elif [ -f "$dockerfile_path/docker/$dockerfile_name" ]; then - DOCKERFILE_PATH="$dockerfile_path/docker/$dockerfile_name" - else - echo "Dockerfile not found" - exit 1 - fi - echo "Building ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG using Dockerfile $DOCKERFILE_PATH" - - docker build --no-cache -t ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG -f $DOCKERFILE_PATH . - docker push ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG - docker rmi ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG -} - -micro_service=$1 -hardware=$(echo $2 | cut -d- -f3) -case ${micro_service} in - "asr"|"tts") - IMAGE_NAME="opea/${micro_service}" - ;; - "embeddings/langchain") - IMAGE_NAME="opea/embedding-tei" - ;; - "retrievers/langchain/redis") - IMAGE_NAME="opea/retriever-redis" - ;; - "reranks/tei") - IMAGE_NAME="opea/reranking-tei" - ;; - "llms/text-generation/tgi") - IMAGE_NAME="opea/llm-tgi" - ;; - "dataprep/redis/langchain") - IMAGE_NAME="opea/dataprep-redis" - ;; - "llms/summarization/tgi") - IMAGE_NAME="opea/llm-docsum-tgi" - ;; - "llms/faq-generation/tgi") - IMAGE_NAME="opea/llm-faqgen-tgi" - ;; - "web_retrievers/langchain/chroma") - IMAGE_NAME="opea/web-retriever-chroma" - ;; - "tts/speecht5") - if [ "${hardware}" == "gaudi" ]; then IMAGE_NAME="opea/speecht5-gaudi"; else IMAGE_NAME="opea/speecht5"; fi - ;; - "asr/whisper") - if [ "${hardware}" == "gaudi" ]; then IMAGE_NAME="opea/whisper-gaudi"; else IMAGE_NAME="opea/whisper"; fi - ;; - *) - echo "Not supported yet" - exit 0 - ;; -esac -docker_build "${IMAGE_NAME}" "${micro_service}" diff --git a/tests/test_asr_whisper.sh b/tests/test_asr.sh similarity index 100% rename from tests/test_asr_whisper.sh rename to tests/test_asr.sh diff --git a/tests/test_tts_speecht5.sh b/tests/test_tts.sh similarity index 100% rename from tests/test_tts_speecht5.sh rename to tests/test_tts.sh diff --git a/tests/test_workflow_chatqna.py b/tests/test_workflow_chatqna.py deleted file mode 100644 index a2ea0f2d0..000000000 --- a/tests/test_workflow_chatqna.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# - -import asyncio -import os - -from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType - -MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0") -MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 8888) -EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0") -EMBEDDING_SERVICE_PORT = os.getenv("EMBEDDING_SERVICE_PORT", 6000) -RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0") -RETRIEVER_SERVICE_PORT = os.getenv("RETRIEVER_SERVICE_PORT", 7000) -RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0") -RERANK_SERVICE_PORT = os.getenv("RERANK_SERVICE_PORT", 8000) -LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") -LLM_SERVICE_PORT = os.getenv("LLM_SERVICE_PORT", 9000) - - -class ChatQnAService: - def __init__(self, host="0.0.0.0", port=8000): - self.host = host - self.port = port - self.megaservice = ServiceOrchestrator() - - def add_remote_service(self): - embedding = MicroService( - name="embedding", - host=EMBEDDING_SERVICE_HOST_IP, - port=EMBEDDING_SERVICE_PORT, - endpoint="/v1/embeddings", - use_remote_service=True, - service_type=ServiceType.EMBEDDING, - ) - retriever = MicroService( - name="retriever", - host=RETRIEVER_SERVICE_HOST_IP, - port=RETRIEVER_SERVICE_PORT, - endpoint="/v1/retrieval", - use_remote_service=True, - service_type=ServiceType.RETRIEVER, - ) - rerank = MicroService( - name="rerank", - host=RERANK_SERVICE_HOST_IP, - port=RERANK_SERVICE_PORT, - endpoint="/v1/reranking", - use_remote_service=True, - service_type=ServiceType.RERANK, - ) - llm = MicroService( - name="llm", - host=LLM_SERVICE_HOST_IP, - port=LLM_SERVICE_PORT, - endpoint="/v1/chat/completions", - use_remote_service=True, - service_type=ServiceType.LLM, - ) - self.megaservice.add(embedding).add(retriever).add(rerank).add(llm) - self.megaservice.flow_to(embedding, retriever) - self.megaservice.flow_to(retriever, rerank) - self.megaservice.flow_to(rerank, llm) - self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) - - async def schedule(self): - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"text": "What is the revenue of Nike in 2023?"} - ) - print(result_dict) - - -if __name__ == "__main__": - chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) - chatqna.add_remote_service() - asyncio.run(chatqna.schedule()) From ed99d47d7f6834800e2e106243885e3832cdc622 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Mon, 12 Aug 2024 10:33:23 +0800 Subject: [PATCH 13/15] build new images for llms (#450) Signed-off-by: chensuyue --- .github/workflows/docker/compose/llms-compose.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml index a1bc921bb..e722682e8 100644 --- a/.github/workflows/docker/compose/llms-compose.yaml +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # this file should be run in the root of the repo -# images used by GenAIExamples: llm-tgi,llm-ollama,llm-docsum-tgi,llm-faqgen-tgi,llm-vllm,llm-vllm-ray +# images used by GenAIExamples: llm-tgi,llm-ollama,llm-docsum-tgi,llm-faqgen-tgi,llm-vllm,llm-vllm-hpu,llm-vllm-ray,llm-vllm-ray-hpu services: llm-tgi: build: @@ -24,7 +24,15 @@ services: build: dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.microservice image: ${REGISTRY}opea/llm-vllm:${TAG:-latest} + llm-vllm-hpu: + build: + dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.hpu + image: ${REGISTRY}opea/llm-vllm-hpu:${TAG:-latest} llm-vllm-ray: build: dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice image: ${REGISTRY}opea/llm-vllm-ray:${TAG:-latest} + llm-vllm-ray-hpu: + build: + dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray + image: ${REGISTRY}opea/llm-vllm-ray-hpu:${TAG:-latest} From 3ffcff41ea02f47e8403cb95d9b0a4f138d278cd Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Mon, 12 Aug 2024 10:46:43 +0800 Subject: [PATCH 14/15] Update Microservice CI trigger path (#458) Signed-off-by: chensuyue --- .github/workflows/_get-test-matrix.yml | 2 +- .github/workflows/pr-microservice-test.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/_get-test-matrix.yml b/.github/workflows/_get-test-matrix.yml index 6860f8e9b..09be978cb 100644 --- a/.github/workflows/_get-test-matrix.yml +++ b/.github/workflows/_get-test-matrix.yml @@ -45,7 +45,7 @@ jobs: merged_commit=$(git log -1 --format='%H') changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | \ - grep 'comps/' | grep -vE '*.md|*.txt|comps/cores')" || true + grep 'comps/' | grep -vE '*.md|comps/cores')" || true services=$(printf '%s\n' "${changed_files[@]}" | cut -d'/' -f2 | grep -vE '*.py' | sort -u) || true run_matrix="{\"include\":[" for service in ${services}; do diff --git a/.github/workflows/pr-microservice-test.yml b/.github/workflows/pr-microservice-test.yml index 9abdce833..a9a6cb26c 100644 --- a/.github/workflows/pr-microservice-test.yml +++ b/.github/workflows/pr-microservice-test.yml @@ -11,7 +11,6 @@ on: - comps/** - tests/** - "!**.md" - - "!**.txt" - .github/workflows/pr-microservice-test.yml # If there is a new commit, the previous jobs will be canceled From 27a01ee3b08561bce744f4605a0fbf54c47b10a8 Mon Sep 17 00:00:00 2001 From: "Hoong Tee, Yeoh" Date: Mon, 12 Aug 2024 10:47:15 +0800 Subject: [PATCH 15/15] prompt_registry: Unifying API endpoint port (#443) * prompt_registry: Unifying API endpoint port Unifying all port for API endpoints for prompt registry services with recent register_microservice wrapper function update. Signed-off-by: Yeoh, Hoong Tee * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Yeoh, Hoong Tee Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/prompt_registry/mongo/README.md | 10 +++++----- .../docker-compose-prompt-registry-mongo.yaml | 3 +-- comps/prompt_registry/mongo/prompt.py | 14 ++++++-------- tests/test_prompt_registry_mongo.sh | 2 +- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/comps/prompt_registry/mongo/README.md b/comps/prompt_registry/mongo/README.md index 799fec7ca..0cbfd6f99 100644 --- a/comps/prompt_registry/mongo/README.md +++ b/comps/prompt_registry/mongo/README.md @@ -41,7 +41,7 @@ docker run -d -p 27017:27017 --name=mongo mongo:latest 2. Run prompt_registry service ```bash -docker run -d --name="promptregistry-mongo-server" -p 6012:6012 -p 6013:6013 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest +docker run -d --name="promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest ``` ## Invoke Microservice @@ -64,7 +64,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -73,7 +73,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -84,7 +84,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -95,7 +95,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6014/v1/prompt/delete \ + http://{host_ip}:6012/v1/prompt/delete \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ diff --git a/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml b/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml index 23db61c89..f6cb68831 100644 --- a/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml +++ b/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml @@ -19,8 +19,6 @@ services: container_name: promptregistry-mongo-server ports: - "6012:6012" - - "6013:6013" - - "6014:6014" ipc: host environment: http_proxy: ${http_proxy} @@ -28,6 +26,7 @@ services: no_proxy: ${no_proxy} MONGO_HOST: ${MONGO_HOST} MONGO_PORT: ${MONGO_PORT} + DB_NAME: ${DB_NAME} COLLECTION_NAME: ${COLLECTION_NAME} restart: unless-stopped diff --git a/comps/prompt_registry/mongo/prompt.py b/comps/prompt_registry/mongo/prompt.py index 4a3f52bc1..e8d7d285e 100644 --- a/comps/prompt_registry/mongo/prompt.py +++ b/comps/prompt_registry/mongo/prompt.py @@ -34,7 +34,7 @@ class PromptId(BaseModel): @register_microservice( - name="opea_service@prompt_mongo_create", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/create", host="0.0.0.0", input_datatype=PromptCreate, @@ -62,11 +62,11 @@ async def create_prompt(prompt: PromptCreate): @register_microservice( - name="opea_service@prompt_mongo_get", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/get", host="0.0.0.0", input_datatype=PromptId, - port=6013, + port=6012, ) async def get_prompt(prompt: PromptId): """Retrieves prompt from prompt store based on provided PromptId or user. @@ -95,11 +95,11 @@ async def get_prompt(prompt: PromptId): @register_microservice( - name="opea_service@prompt_mongo_delete", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/delete", host="0.0.0.0", input_datatype=PromptId, - port=6014, + port=6012, ) async def delete_prompt(prompt: PromptId): """Delete a prompt from prompt store by given PromptId. @@ -125,6 +125,4 @@ async def delete_prompt(prompt: PromptId): if __name__ == "__main__": - opea_microservices["opea_service@prompt_mongo_get"].start() - opea_microservices["opea_service@prompt_mongo_create"].start() - opea_microservices["opea_service@prompt_mongo_delete"].start() + opea_microservices["opea_service@prompt_mongo"].start() diff --git a/tests/test_prompt_registry_mongo.sh b/tests/test_prompt_registry_mongo.sh index bdf5d907c..e91bf225c 100644 --- a/tests/test_prompt_registry_mongo.sh +++ b/tests/test_prompt_registry_mongo.sh @@ -22,7 +22,7 @@ function build_docker_images() { function start_service() { - docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -p 6013:6013 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest + docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest sleep 10s }