diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 82c5005cd..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,53 +0,0 @@ -# Contributing - -## License - -Generative AI Examples is licensed under the terms in [LICENSE](/LICENSE). -By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. - -## Create Pull Request - -If you have improvements to Generative AI Examples, send your pull requests for -[review](https://github.com/opea-project/GenAIExamples/pulls). -If you are new to GitHub, view the pull request [How To](https://help.github.com/articles/using-pull-requests/). - -### Step-by-Step Guidelines - -- Star this repository using the button `Star` in the top right corner. -- Fork this Repository using the button `Fork` in the top right corner. -- Clone your forked repository to your pc. - `git clone "url to your repo"` -- Create a new branch for your modifications. - `git checkout -b new-branch` -- Add your files with `git add -A`, commit with `git commit -s -m "This is my commit message"` and push `git push origin new-branch`. -- Create a [pull request](https://github.com/opea-project/GenAIExamples/pulls). - -## Pull Request Template - -See [PR template](/.github/pull_request_template.md) - -## Pull Request Acceptance Criteria - -- At least two approvals from reviewers - -- All detected status checks pass - -- All conversations solved - -- Third-party dependency license compatible - -## Pull Request Status Checks Overview - -Generative AI Examples use [Actions](https://github.com/opea-project/GenAIExamples/actions) for CI test. -| Test Name | Test Scope | Test Pass Criteria | -|-------------------------------|-----------------------------------------------|---------------------------| -| Security Scan | Dependabot/Bandit | PASS | -| Format Scan | pre-commit.ci | PASS | -| Examples Test | Cases under Examples/tests folder | PASS | -| DCO | Use `git commit -s` to sign off | PASS | - -> Notes: [Developer Certificate of Origin (DCO)](https://en.wikipedia.org/wiki/Developer_Certificate_of_Origin), you must agree to the terms of Developer Certificate of Origin by signing off each of your commits with `-s`, e.g. `git commit -s -m 'This is my commit message'`. - -## Support - -Submit your questions, feature requests, and bug reports to the [GitHub issues](https://github.com/opea-project/GenAIExamples/issues) page. diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md index fcd51a7a5..7c71af2b9 100644 --- a/LEGAL_INFORMATION.md +++ b/LEGAL_INFORMATION.md @@ -5,7 +5,7 @@ ## License -Generative AI Examples is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). +Generative AI Components is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). This software includes components that have separate copyright notices and licensing terms. Your use of the source code for these components is subject to the terms and conditions of the following licenses. @@ -15,13 +15,13 @@ See the accompanying [license](/LICENSE) file for full license text and copyrigh ## Citation -If you use Generative AI Examples in your research, use the following BibTeX entry. +If you use Generative AI Components in your research, use the following BibTeX entry. ``` -@misc{Generative AI Examples, +@misc{Generative AI Components, author = {Liang Lv, Haihao Shen}, - title = {Generative AI Examples}, - howpublished = {\url{https://github.com/opea-project/GenAIExamples}}, + title = {Generative AI Components}, + howpublished = {\url{https://github.com/opea-project/GenAIComps}}, year = {2024} } ``` diff --git a/README.md b/README.md index 392ac04cc..51090259a 100644 --- a/README.md +++ b/README.md @@ -53,17 +53,14 @@ The initially supported `Microservices` are described in the below table. More ` Description - Embedding - LangChain - BAAI/bge-large-en-v1.5 + Embedding + LangChain + BAAI/bge-large-en-v1.5 TEI-Gaudi Gaudi2 Embedding on Gaudi2 - Embedding - LangChain - BAAI/bge-base-en-v1.5 TEI Xeon Embedding on Xeon CPU @@ -77,58 +74,95 @@ The initially supported `Microservices` are described in the below table. More ` Retriever on Xeon CPU - Reranking - LangChain - BAAI/bge-reranker-large + Reranking + LangChain + BAAI/bge-reranker-large TEI-Gaudi Gaudi2 Reranking on Gaudi2 - Reranking - LangChain BBAAI/bge-reranker-base TEI Xeon Reranking on Xeon CPU - LLM - LangChain - Intel/neural-chat-7b-v3-3 + ASR + NA + openai/whisper-small + NA + Gaudi2 + Audio-Speech-Recognition on Gaudi2 + + + Xeon + Audio-Speech-RecognitionS on Xeon CPU + + + TTS + NA + microsoft/speecht5_tts + NA + Gaudi2 + Text-To-Speech on Gaudi2 + + + Xeon + Text-To-Speech on Xeon CPU + + + Dataprep + Qdrant + sentence-transformers/all-MiniLM-L6-v2 + NA + Gaudi2 + Dataprep on Gaudi2 + + + Xeon + Dataprep on Xeon CPU + + + Redis + BAAI/bge-base-en-v1.5 + Gaudi2 + Dataprep on Gaudi2 + + + Xeon + Dataprep on Xeon CPU + + + LLM + LangChain + Intel/neural-chat-7b-v3-3 TGI Gaudi Gaudi2 LLM on Gaudi2 - LLM - LangChain - Intel/neural-chat-7b-v3-3 TGI Xeon LLM on Xeon CPU - LLM - LangChain - Intel/neural-chat-7b-v3-3 - vLLM + Intel/neural-chat-7b-v3-3 + Ray Serve + Gaudi2 + LLM on Gaudi2 + + Xeon LLM on Xeon CPU - LLM - LangChain - Intel/neural-chat-7b-v3-3 - Ray Serve + Intel/neural-chat-7b-v3-3 + vLLM Gaudi2 LLM on Gaudi2 - LLM - LangChain - Intel/neural-chat-7b-v3-3 - Ray Serve Xeon LLM on Xeon CPU @@ -190,7 +224,7 @@ class ExampleService: host=EMBEDDING_SERVICE_HOST_IP, port=EMBEDDING_SERVICE_PORT, endpoint="/v1/embeddings", - use_remote_service=True,S + use_remote_service=True, service_type=ServiceType.EMBEDDING, ) llm = MicroService( @@ -221,6 +255,7 @@ self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port ## Additional Content -- [Contribution](/CONTRIBUTING.md) +- [Code of Conduct](https://github.com/opea-project/docs/tree/main/community/CODE_OF_CONDUCT.md) +- [Contribution](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md) +- [Security Policy](https://github.com/opea-project/docs/tree/main/community/SECURITY.md) - [Legal Information](/LEGAL_INFORMATION.md) -- [Security Policy](/SECURITY.md) diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index b830f7403..000000000 --- a/SECURITY.md +++ /dev/null @@ -1,9 +0,0 @@ -## Reporting a Vulnerability - -Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.linuxfoundation.org/security). - -## Script Usage Notice - -SCRIPT USAGE NOTICE: By downloading and using any script file included with the associated software package (such as files with .bat, .cmd, or .JS extensions, Dockerfiles, or any other type of file that, when executed, automatically downloads and/or installs files onto your system) -(the “Script File”), it is your obligation to review the Script File to understand what files (e.g., other software, AI models, AI Datasets) the Script File will download to your system (“Downloaded Files”). -Furthermore, by downloading and using the Downloaded Files, even if they are installed through a silent install, you agree to any and all terms and conditions associated with such files, including but not limited to, license terms, notices, or disclaimers. diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md index bcfdfffe3..169b9831c 100644 --- a/comps/embeddings/README.md +++ b/comps/embeddings/README.md @@ -27,7 +27,10 @@ For both of the implementations, you need to install requirements first. ## 1.1 Install Requirements ```bash +# run with langchain pip install -r langchain/requirements.txt +# run with llama_index +pip install -r llama_index/requirements.txt ``` ## 1.2 Start Embedding Service @@ -57,8 +60,12 @@ curl localhost:$your_port/embed \ Start the embedding service with the TEI_EMBEDDING_ENDPOINT. ```bash +# run with langchain cd langchain +# run with llama_index +cd llama_index export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" export LANGCHAIN_TRACING_V2=true export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/gen-ai-comps:embeddings" @@ -68,7 +75,10 @@ python embedding_tei_gaudi.py ### Start Embedding Service with Local Model ```bash +# run with langchain cd langchain +# run with llama_index +cd llama_index python local_embedding.py ``` @@ -98,19 +108,29 @@ Export the `TEI_EMBEDDING_ENDPOINT` for later usage: ```bash export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" ``` ## 2.2 Build Docker Image +### Build Langchain Docker (Option a) + ```bash cd ../../ docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile . ``` +### Build LlamaIndex Docker (Option b) + +```bash +cd ../../ +docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . +``` + ## 2.3 Run Docker with CLI ```bash -docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:latest +docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei:latest ``` ## 2.4 Run Docker with Docker Compose diff --git a/comps/embeddings/llama_index/__init__.py b/comps/embeddings/llama_index/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/embeddings/llama_index/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/embeddings/llama_index/docker/Dockerfile b/comps/embeddings/llama_index/docker/Dockerfile new file mode 100644 index 000000000..6d0bb57e1 --- /dev/null +++ b/comps/embeddings/llama_index/docker/Dockerfile @@ -0,0 +1,30 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:22.04 + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim \ + python3 \ + python3-pip + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/llama_index/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/embeddings/llama_index + +ENTRYPOINT ["python3", "embedding_tei_gaudi.py"] + diff --git a/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml new file mode 100644 index 000000000..90f1e52b9 --- /dev/null +++ b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + embedding: + image: opea/embedding-tei:latest + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/llama_index/embedding_tei_gaudi.py b/comps/embeddings/llama_index/embedding_tei_gaudi.py new file mode 100644 index 000000000..020f5e697 --- /dev/null +++ b/comps/embeddings/llama_index/embedding_tei_gaudi.py @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from langsmith import traceable +from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference + +from comps import EmbedDoc768, ServiceType, TextDoc, opea_microservices, register_microservice + + +@register_microservice( + name="opea_service@embedding_tgi_gaudi", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc768, +) +@traceable(run_type="embedding") +def embedding(input: TextDoc) -> EmbedDoc768: + embed_vector = embeddings._get_query_embedding(input.text) + embed_vector = embed_vector[:768] # Keep only the first 768 elements + res = EmbedDoc768(text=input.text, embedding=embed_vector) + return res + + +if __name__ == "__main__": + tei_embedding_model_name = os.getenv("TEI_EMBEDDING_MODEL_NAME", "BAAI/bge-large-en-v1.5") + tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8090") + embeddings = TextEmbeddingsInference(model_name=tei_embedding_model_name, base_url=tei_embedding_endpoint) + print("TEI Gaudi Embedding initialized.") + opea_microservices["opea_service@embedding_tgi_gaudi"].start() diff --git a/comps/embeddings/llama_index/local_embedding.py b/comps/embeddings/llama_index/local_embedding.py new file mode 100644 index 000000000..84a61806e --- /dev/null +++ b/comps/embeddings/llama_index/local_embedding.py @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from langsmith import traceable +from llama_index.embeddings.huggingface import HuggingFaceEmbedding + +from comps import EmbedDoc1024, ServiceType, TextDoc, opea_microservices, register_microservice + + +@register_microservice( + name="opea_service@local_embedding", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc1024, +) +@traceable(run_type="embedding") +def embedding(input: TextDoc) -> EmbedDoc1024: + embed_vector = embeddings.get_text_embedding(input.text) + res = EmbedDoc1024(text=input.text, embedding=embed_vector) + return res + + +if __name__ == "__main__": + embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5") + opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/llama_index/requirements.txt b/comps/embeddings/llama_index/requirements.txt new file mode 100644 index 000000000..5af75eeb1 --- /dev/null +++ b/comps/embeddings/llama_index/requirements.txt @@ -0,0 +1,9 @@ +docarray[full] +fastapi +huggingface_hub +langsmith +llama-index-embeddings-text-embeddings-inference +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +shortuuid diff --git a/comps/llms/text-generation/vllm/README.md b/comps/llms/text-generation/vllm/README.md index af5343da3..338631552 100644 --- a/comps/llms/text-generation/vllm/README.md +++ b/comps/llms/text-generation/vllm/README.md @@ -1,10 +1,10 @@ # vLLM Endpoint Serve -[vLLM](https://github.com/vllm-project/vllm) is a fast and easy-to-use library for LLM inference and serving, it delivers state-of-the-art serving throughput with a set of advanced features such as PagedAttention, Continuous batching and etc.. Besides GPUs, vLLM already supported [Intel CPUs](https://www.intel.com/content/www/us/en/products/overview.html), Gaudi accelerators support will be added soon. This guide provides an example on how to launch vLLM serving endpoint on CPU. +[vLLM](https://github.com/vllm-project/vllm) is a fast and easy-to-use library for LLM inference and serving, it delivers state-of-the-art serving throughput with a set of advanced features such as PagedAttention, Continuous batching and etc.. Besides GPUs, vLLM already supported [Intel CPUs](https://www.intel.com/content/www/us/en/products/overview.html) and [Gaudi accelerators](https://habana.ai/products). This guide provides an example on how to launch vLLM serving endpoint on CPU and Gaudi accelerators. ## Getting Started -### Launch vLLM CPU Service +### Launch vLLM Service #### Launch a local server instance: @@ -12,6 +12,8 @@ bash ./serving/vllm/launch_vllm_service.sh ``` +The `./serving/vllm/launch_vllm_service.sh` accepts one parameter `hw_mode` to specify the hardware mode of the service, with the default being `cpu`, and the optional selection can be `hpu`. + For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token. @@ -33,16 +35,17 @@ curl http://127.0.0.1:8080/v1/completions \ }' ``` -#### Customize vLLM CPU Service +#### Customize vLLM Service -The `./serving/vllm/launch_vllm_service.sh` script accepts two parameters: +The `./serving/vllm/launch_vllm_service.sh` script accepts three parameters: - port_number: The port number assigned to the vLLM CPU endpoint, with the default being 8080. -- model_name: The model name utilized for LLM, with the default set to "mistralai/Mistral-7B-v0.1". +- model_name: The model name utilized for LLM, with the default set to "Intel/neural-chat-7b-v3-3". +- hw_mode: The hardware mode utilized for LLM, with the default set to "cpu", and the optional selection can be "hpu" -You have the flexibility to customize two parameters according to your specific needs. Additionally, you can set the vLLM CPU endpoint by exporting the environment variable `vLLM_LLM_ENDPOINT`: +You have the flexibility to customize two parameters according to your specific needs. Additionally, you can set the vLLM endpoint by exporting the environment variable `vLLM_LLM_ENDPOINT`: ```bash export vLLM_LLM_ENDPOINT="http://xxx.xxx.xxx.xxx:8080" -export LLM_MODEL= # example: export LLM_MODEL="mistralai/Mistral-7B-v0.1" +export LLM_MODEL= # example: export LLM_MODEL="Intel/neural-chat-7b-v3-3" ``` diff --git a/comps/llms/text-generation/vllm/build_docker.sh b/comps/llms/text-generation/vllm/build_docker.sh new file mode 100644 index 000000000..3680f076c --- /dev/null +++ b/comps/llms/text-generation/vllm/build_docker.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set default values +default_hw_mode="cpu" + +# Assign arguments to variable +hw_mode=${1:-$default_hw_mode} + +# Check if all required arguments are provided +if [ "$#" -lt 0 ] || [ "$#" -gt 1 ]; then + echo "Usage: $0 [hw_mode]" + echo "Please customize the arguments you want to use. + - hw_mode: The hardware mode for the Ray Gaudi endpoint, with the default being 'cpu', and the optional selection can be 'cpu' and 'hpu'." + exit 1 +fi + +# Build the docker image for vLLM based on the hardware mode +if [ "$hw_mode" = "hpu" ]; then + docker build -f docker/Dockerfile.hpu -t vllm:hpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy +else + git clone https://github.com/vllm-project/vllm.git + cd ./vllm/ + docker build -f Dockerfile.cpu -t vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy +fi diff --git a/comps/llms/text-generation/vllm/build_docker_cpu.sh b/comps/llms/text-generation/vllm/build_docker_cpu.sh deleted file mode 100644 index 487c4221b..000000000 --- a/comps/llms/text-generation/vllm/build_docker_cpu.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -git clone https://github.com/vllm-project/vllm.git -cd ./vllm/ -docker build -f Dockerfile.cpu -t vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu new file mode 100644 index 000000000..430cf4641 --- /dev/null +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu @@ -0,0 +1,20 @@ +FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest + +ENV LANG=en_US.UTF-8 + +WORKDIR /root + +RUN pip install --upgrade-strategy eager optimum[habana] + +RUN pip install -v git+https://github.com/HabanaAI/vllm-fork.git@ae3d6121 + +RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + service ssh restart + +ENV no_proxy=localhost,127.0.0.1 + +ENV PT_HPU_LAZY_ACC_PAR_MODE=0 + +ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/comps/llms/text-generation/vllm/launch_vllm_service.sh b/comps/llms/text-generation/vllm/launch_vllm_service.sh index c6fc04210..7e32c8775 100644 --- a/comps/llms/text-generation/vllm/launch_vllm_service.sh +++ b/comps/llms/text-generation/vllm/launch_vllm_service.sh @@ -6,20 +6,29 @@ # Set default values default_port=8080 -default_model="mistralai/Mistral-7B-v0.1" +default_hw_mode="cpu" +default_model="Intel/neural-chat-7b-v3-3" # Assign arguments to variables port_number=${1:-$default_port} model_name=${2:-$default_model} +hw_mode=${3:-$default_hw_mode} # Check if all required arguments are provided -if [ "$#" -lt 0 ] || [ "$#" -gt 2 ]; then - echo "Usage: $0 [port_number] [model_name]" +if [ "$#" -lt 0 ] || [ "$#" -gt 3 ]; then + echo "Usage: $0 [port_number] [model_name] [hw_mode]" + echo "port_number: The port number assigned to the vLLM CPU endpoint, with the default being 8080." + echo "model_name: The model name utilized for LLM, with the default set to 'Intel/neural-chat-7b-v3-3'." + echo "hw_mode: The hardware mode utilized for LLM, with the default set to 'cpu', and the optional selection can be 'hpu'" exit 1 fi # Set the volume variable volume=$PWD/data -# Build the Docker run command based on the number of cards -docker run -it --rm --name="ChatQnA_server" -p $port_number:$port_number --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model $model_name --host 0.0.0.0 --port $port_number" +# Build the Docker run command based on hardware mode +if [ "$hw_mode" = "hpu" ]; then + docker run -it --runtime=habana --rm --name="ChatQnA_server" -p $port_number:$port_number -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --host 0.0.0.0 --port $port_number" +else + docker run -it --rm --name="ChatQnA_server" -p $port_number:$port_number --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --host 0.0.0.0 --port $port_number" +fi diff --git a/tests/test_embeddings_llama_index.sh b/tests/test_embeddings_llama_index.sh new file mode 100644 index 000000000..006a2c259 --- /dev/null +++ b/tests/test_embeddings_llama_index.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/embedding-tei:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . +} + +function start_service() { + tei_endpoint=5001 + model="BAAI/bge-large-en-v1.5" + revision="refs/pr/5" + docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + tei_service_port=5010 + docker run -d --name="test-comps-embedding-tei-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:comps + sleep 3m +} + +function validate_microservice() { + tei_service_port=5010 + URL="http://${ip_address}:$tei_service_port/v1/embeddings" + docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ embedding - llama_index ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/embedding.log) + + if echo '"text":"What is Deep Learning?","embedding":\[' | grep -q "$EXPECTED_RESULT"; then + echo "[ embedding - llama_index ] Content is as expected." + else + echo "[ embedding - llama_index ] Content does not match the expected result: $CONTENT" + docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + exit 1 + fi + else + echo "[ embedding - llama_index ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-embedding-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main