From f37ce2cafee23145a849463c9e1b2c38217e8018 Mon Sep 17 00:00:00 2001 From: Letong Han <106566639+letonghan@users.noreply.github.com> Date: Wed, 12 Jun 2024 20:53:52 +0800 Subject: [PATCH] Support Embedding Microservice with Llama Index (#150) * fix stream=false doesn't work issue Signed-off-by: letonghan * support embedding comp with llama_index Signed-off-by: letonghan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add More Contents to the Table of MicroService (#141) * Add More Contents to the Table MicroService Signed-off-by: zehao-intel * reorder Signed-off-by: zehao-intel * Update README.md * refine structure Signed-off-by: zehao-intel * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix model Signed-off-by: zehao-intel * refine table Signed-off-by: zehao-intel * put llm to the ground Signed-off-by: zehao-intel --------- Signed-off-by: zehao-intel Co-authored-by: Sihan Chen <39623753+Spycsh@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Use common security content for OPEA projects (#151) * add python coverage Signed-off-by: chensuyue * docs update Signed-off-by: chensuyue * Revert "add python coverage" This reverts commit 69615b16c8e7483f9fea742d1d3fa0707075a394. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: chensuyue Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Enable vLLM Gaudi support for LLM service based on officially habana vllm release (#137) Signed-off-by: tianyil1 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * support embedding comp with llama_index Signed-off-by: letonghan * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test script for embedding llama_inde Signed-off-by: letonghan * remove conflict requirements Signed-off-by: letonghan * update test script Signed-off-by: letonghan * udpate Signed-off-by: letonghan * update Signed-off-by: letonghan * update Signed-off-by: letonghan * fix ut issue Signed-off-by: letonghan --------- Signed-off-by: letonghan Signed-off-by: zehao-intel Signed-off-by: chensuyue Signed-off-by: tianyil1 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: zehao-intel Co-authored-by: Sihan Chen <39623753+Spycsh@users.noreply.github.com> Co-authored-by: chen, suyue Co-authored-by: Tianyi Liu --- comps/embeddings/README.md | 22 +++++- comps/embeddings/llama_index/__init__.py | 2 + .../embeddings/llama_index/docker/Dockerfile | 30 ++++++++ .../docker/docker_compose_embedding.yaml | 23 ++++++ .../llama_index/embedding_tei_gaudi.py | 34 +++++++++ .../embeddings/llama_index/local_embedding.py | 28 ++++++++ comps/embeddings/llama_index/requirements.txt | 9 +++ tests/test_embeddings_llama_index.sh | 70 +++++++++++++++++++ 8 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 comps/embeddings/llama_index/__init__.py create mode 100644 comps/embeddings/llama_index/docker/Dockerfile create mode 100644 comps/embeddings/llama_index/docker/docker_compose_embedding.yaml create mode 100644 comps/embeddings/llama_index/embedding_tei_gaudi.py create mode 100644 comps/embeddings/llama_index/local_embedding.py create mode 100644 comps/embeddings/llama_index/requirements.txt create mode 100644 tests/test_embeddings_llama_index.sh diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md index bcfdfffe3..169b9831c 100644 --- a/comps/embeddings/README.md +++ b/comps/embeddings/README.md @@ -27,7 +27,10 @@ For both of the implementations, you need to install requirements first. ## 1.1 Install Requirements ```bash +# run with langchain pip install -r langchain/requirements.txt +# run with llama_index +pip install -r llama_index/requirements.txt ``` ## 1.2 Start Embedding Service @@ -57,8 +60,12 @@ curl localhost:$your_port/embed \ Start the embedding service with the TEI_EMBEDDING_ENDPOINT. ```bash +# run with langchain cd langchain +# run with llama_index +cd llama_index export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" export LANGCHAIN_TRACING_V2=true export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/gen-ai-comps:embeddings" @@ -68,7 +75,10 @@ python embedding_tei_gaudi.py ### Start Embedding Service with Local Model ```bash +# run with langchain cd langchain +# run with llama_index +cd llama_index python local_embedding.py ``` @@ -98,19 +108,29 @@ Export the `TEI_EMBEDDING_ENDPOINT` for later usage: ```bash export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" ``` ## 2.2 Build Docker Image +### Build Langchain Docker (Option a) + ```bash cd ../../ docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile . ``` +### Build LlamaIndex Docker (Option b) + +```bash +cd ../../ +docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . +``` + ## 2.3 Run Docker with CLI ```bash -docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:latest +docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei:latest ``` ## 2.4 Run Docker with Docker Compose diff --git a/comps/embeddings/llama_index/__init__.py b/comps/embeddings/llama_index/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/embeddings/llama_index/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/embeddings/llama_index/docker/Dockerfile b/comps/embeddings/llama_index/docker/Dockerfile new file mode 100644 index 000000000..6d0bb57e1 --- /dev/null +++ b/comps/embeddings/llama_index/docker/Dockerfile @@ -0,0 +1,30 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:22.04 + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim \ + python3 \ + python3-pip + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/llama_index/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/embeddings/llama_index + +ENTRYPOINT ["python3", "embedding_tei_gaudi.py"] + diff --git a/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml new file mode 100644 index 000000000..90f1e52b9 --- /dev/null +++ b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + embedding: + image: opea/embedding-tei:latest + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/llama_index/embedding_tei_gaudi.py b/comps/embeddings/llama_index/embedding_tei_gaudi.py new file mode 100644 index 000000000..020f5e697 --- /dev/null +++ b/comps/embeddings/llama_index/embedding_tei_gaudi.py @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from langsmith import traceable +from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference + +from comps import EmbedDoc768, ServiceType, TextDoc, opea_microservices, register_microservice + + +@register_microservice( + name="opea_service@embedding_tgi_gaudi", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc768, +) +@traceable(run_type="embedding") +def embedding(input: TextDoc) -> EmbedDoc768: + embed_vector = embeddings._get_query_embedding(input.text) + embed_vector = embed_vector[:768] # Keep only the first 768 elements + res = EmbedDoc768(text=input.text, embedding=embed_vector) + return res + + +if __name__ == "__main__": + tei_embedding_model_name = os.getenv("TEI_EMBEDDING_MODEL_NAME", "BAAI/bge-large-en-v1.5") + tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8090") + embeddings = TextEmbeddingsInference(model_name=tei_embedding_model_name, base_url=tei_embedding_endpoint) + print("TEI Gaudi Embedding initialized.") + opea_microservices["opea_service@embedding_tgi_gaudi"].start() diff --git a/comps/embeddings/llama_index/local_embedding.py b/comps/embeddings/llama_index/local_embedding.py new file mode 100644 index 000000000..84a61806e --- /dev/null +++ b/comps/embeddings/llama_index/local_embedding.py @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from langsmith import traceable +from llama_index.embeddings.huggingface import HuggingFaceEmbedding + +from comps import EmbedDoc1024, ServiceType, TextDoc, opea_microservices, register_microservice + + +@register_microservice( + name="opea_service@local_embedding", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc1024, +) +@traceable(run_type="embedding") +def embedding(input: TextDoc) -> EmbedDoc1024: + embed_vector = embeddings.get_text_embedding(input.text) + res = EmbedDoc1024(text=input.text, embedding=embed_vector) + return res + + +if __name__ == "__main__": + embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5") + opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/llama_index/requirements.txt b/comps/embeddings/llama_index/requirements.txt new file mode 100644 index 000000000..5af75eeb1 --- /dev/null +++ b/comps/embeddings/llama_index/requirements.txt @@ -0,0 +1,9 @@ +docarray[full] +fastapi +huggingface_hub +langsmith +llama-index-embeddings-text-embeddings-inference +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +shortuuid diff --git a/tests/test_embeddings_llama_index.sh b/tests/test_embeddings_llama_index.sh new file mode 100644 index 000000000..006a2c259 --- /dev/null +++ b/tests/test_embeddings_llama_index.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/embedding-tei:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . +} + +function start_service() { + tei_endpoint=5001 + model="BAAI/bge-large-en-v1.5" + revision="refs/pr/5" + docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + tei_service_port=5010 + docker run -d --name="test-comps-embedding-tei-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:comps + sleep 3m +} + +function validate_microservice() { + tei_service_port=5010 + URL="http://${ip_address}:$tei_service_port/v1/embeddings" + docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ embedding - llama_index ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/embedding.log) + + if echo '"text":"What is Deep Learning?","embedding":\[' | grep -q "$EXPECTED_RESULT"; then + echo "[ embedding - llama_index ] Content is as expected." + else + echo "[ embedding - llama_index ] Content does not match the expected result: $CONTENT" + docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + exit 1 + fi + else + echo "[ embedding - llama_index ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-embedding-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main