From f37ce2cafee23145a849463c9e1b2c38217e8018 Mon Sep 17 00:00:00 2001
From: Letong Han <106566639+letonghan@users.noreply.github.com>
Date: Wed, 12 Jun 2024 20:53:52 +0800
Subject: [PATCH] Support Embedding Microservice with Llama Index (#150)

* fix stream=false doesn't work issue

Signed-off-by: letonghan <letong.han@intel.com>

* support embedding comp with llama_index

Signed-off-by: letonghan <letong.han@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add More Contents to the Table of MicroService (#141)

* Add More Contents to the Table MicroService

Signed-off-by: zehao-intel <zehao.huang@intel.com>

* reorder

Signed-off-by: zehao-intel <zehao.huang@intel.com>

* Update README.md

* refine structure

Signed-off-by: zehao-intel <zehao.huang@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix model

Signed-off-by: zehao-intel <zehao.huang@intel.com>

* refine table

Signed-off-by: zehao-intel <zehao.huang@intel.com>

* put llm to the ground

Signed-off-by: zehao-intel <zehao.huang@intel.com>

---------

Signed-off-by: zehao-intel <zehao.huang@intel.com>
Co-authored-by: Sihan Chen <39623753+Spycsh@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Use common security content for OPEA projects (#151)

* add python coverage

Signed-off-by: chensuyue <suyue.chen@intel.com>

* docs update

Signed-off-by: chensuyue <suyue.chen@intel.com>

* Revert "add python coverage"

This reverts commit 69615b16c8e7483f9fea742d1d3fa0707075a394.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: chensuyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Enable vLLM Gaudi support for LLM service based on officially habana vllm release (#137)

Signed-off-by: tianyil1 <tianyi.liu@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* support embedding comp with llama_index

Signed-off-by: letonghan <letong.han@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add test script for embedding llama_inde

Signed-off-by: letonghan <letong.han@intel.com>

* remove conflict requirements

Signed-off-by: letonghan <letong.han@intel.com>

* update test script

Signed-off-by: letonghan <letong.han@intel.com>

* udpate

Signed-off-by: letonghan <letong.han@intel.com>

* update

Signed-off-by: letonghan <letong.han@intel.com>

* update

Signed-off-by: letonghan <letong.han@intel.com>

* fix ut issue

Signed-off-by: letonghan <letong.han@intel.com>

---------

Signed-off-by: letonghan <letong.han@intel.com>
Signed-off-by: zehao-intel <zehao.huang@intel.com>
Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: tianyil1 <tianyi.liu@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: zehao-intel <zehao.huang@intel.com>
Co-authored-by: Sihan Chen <39623753+Spycsh@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: Tianyi Liu <tianyi.liu@intel.com>
---
 comps/embeddings/README.md                    | 22 +++++-
 comps/embeddings/llama_index/__init__.py      |  2 +
 .../embeddings/llama_index/docker/Dockerfile  | 30 ++++++++
 .../docker/docker_compose_embedding.yaml      | 23 ++++++
 .../llama_index/embedding_tei_gaudi.py        | 34 +++++++++
 .../embeddings/llama_index/local_embedding.py | 28 ++++++++
 comps/embeddings/llama_index/requirements.txt |  9 +++
 tests/test_embeddings_llama_index.sh          | 70 +++++++++++++++++++
 8 files changed, 217 insertions(+), 1 deletion(-)
 create mode 100644 comps/embeddings/llama_index/__init__.py
 create mode 100644 comps/embeddings/llama_index/docker/Dockerfile
 create mode 100644 comps/embeddings/llama_index/docker/docker_compose_embedding.yaml
 create mode 100644 comps/embeddings/llama_index/embedding_tei_gaudi.py
 create mode 100644 comps/embeddings/llama_index/local_embedding.py
 create mode 100644 comps/embeddings/llama_index/requirements.txt
 create mode 100644 tests/test_embeddings_llama_index.sh

diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md
index bcfdfffe3..169b9831c 100644
--- a/comps/embeddings/README.md
+++ b/comps/embeddings/README.md
@@ -27,7 +27,10 @@ For both of the implementations, you need to install requirements first.
 ## 1.1 Install Requirements
 
 ```bash
+# run with langchain
 pip install -r langchain/requirements.txt
+# run with llama_index
+pip install -r llama_index/requirements.txt
 ```
 
 ## 1.2 Start Embedding Service
@@ -57,8 +60,12 @@ curl localhost:$your_port/embed \
 Start the embedding service with the TEI_EMBEDDING_ENDPOINT.
 
 ```bash
+# run with langchain
 cd langchain
+# run with llama_index
+cd llama_index
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
+export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/gen-ai-comps:embeddings"
@@ -68,7 +75,10 @@ python embedding_tei_gaudi.py
 ### Start Embedding Service with Local Model
 
 ```bash
+# run with langchain
 cd langchain
+# run with llama_index
+cd llama_index
 python local_embedding.py
 ```
 
@@ -98,19 +108,29 @@ Export the `TEI_EMBEDDING_ENDPOINT` for later usage:
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
+export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
 ```
 
 ## 2.2 Build Docker Image
 
+### Build Langchain Docker (Option a)
+
 ```bash
 cd ../../
 docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
 ```
 
+### Build LlamaIndex Docker (Option b)
+
+```bash
+cd ../../
+docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile .
+```
+
 ## 2.3 Run Docker with CLI
 
 ```bash
-docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:latest
+docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei:latest
 ```
 
 ## 2.4 Run Docker with Docker Compose
diff --git a/comps/embeddings/llama_index/__init__.py b/comps/embeddings/llama_index/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/comps/embeddings/llama_index/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/embeddings/llama_index/docker/Dockerfile b/comps/embeddings/llama_index/docker/Dockerfile
new file mode 100644
index 000000000..6d0bb57e1
--- /dev/null
+++ b/comps/embeddings/llama_index/docker/Dockerfile
@@ -0,0 +1,30 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:22.04
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim \
+    python3 \
+    python3-pip
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/embeddings/llama_index/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/embeddings/llama_index
+
+ENTRYPOINT ["python3", "embedding_tei_gaudi.py"]
+
diff --git a/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml
new file mode 100644
index 000000000..90f1e52b9
--- /dev/null
+++ b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/embeddings/llama_index/embedding_tei_gaudi.py b/comps/embeddings/llama_index/embedding_tei_gaudi.py
new file mode 100644
index 000000000..020f5e697
--- /dev/null
+++ b/comps/embeddings/llama_index/embedding_tei_gaudi.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from langsmith import traceable
+from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference
+
+from comps import EmbedDoc768, ServiceType, TextDoc, opea_microservices, register_microservice
+
+
+@register_microservice(
+    name="opea_service@embedding_tgi_gaudi",
+    service_type=ServiceType.EMBEDDING,
+    endpoint="/v1/embeddings",
+    host="0.0.0.0",
+    port=6000,
+    input_datatype=TextDoc,
+    output_datatype=EmbedDoc768,
+)
+@traceable(run_type="embedding")
+def embedding(input: TextDoc) -> EmbedDoc768:
+    embed_vector = embeddings._get_query_embedding(input.text)
+    embed_vector = embed_vector[:768]  # Keep only the first 768 elements
+    res = EmbedDoc768(text=input.text, embedding=embed_vector)
+    return res
+
+
+if __name__ == "__main__":
+    tei_embedding_model_name = os.getenv("TEI_EMBEDDING_MODEL_NAME", "BAAI/bge-large-en-v1.5")
+    tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8090")
+    embeddings = TextEmbeddingsInference(model_name=tei_embedding_model_name, base_url=tei_embedding_endpoint)
+    print("TEI Gaudi Embedding initialized.")
+    opea_microservices["opea_service@embedding_tgi_gaudi"].start()
diff --git a/comps/embeddings/llama_index/local_embedding.py b/comps/embeddings/llama_index/local_embedding.py
new file mode 100644
index 000000000..84a61806e
--- /dev/null
+++ b/comps/embeddings/llama_index/local_embedding.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from langsmith import traceable
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+
+from comps import EmbedDoc1024, ServiceType, TextDoc, opea_microservices, register_microservice
+
+
+@register_microservice(
+    name="opea_service@local_embedding",
+    service_type=ServiceType.EMBEDDING,
+    endpoint="/v1/embeddings",
+    host="0.0.0.0",
+    port=6000,
+    input_datatype=TextDoc,
+    output_datatype=EmbedDoc1024,
+)
+@traceable(run_type="embedding")
+def embedding(input: TextDoc) -> EmbedDoc1024:
+    embed_vector = embeddings.get_text_embedding(input.text)
+    res = EmbedDoc1024(text=input.text, embedding=embed_vector)
+    return res
+
+
+if __name__ == "__main__":
+    embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
+    opea_microservices["opea_service@local_embedding"].start()
diff --git a/comps/embeddings/llama_index/requirements.txt b/comps/embeddings/llama_index/requirements.txt
new file mode 100644
index 000000000..5af75eeb1
--- /dev/null
+++ b/comps/embeddings/llama_index/requirements.txt
@@ -0,0 +1,9 @@
+docarray[full]
+fastapi
+huggingface_hub
+langsmith
+llama-index-embeddings-text-embeddings-inference
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+shortuuid
diff --git a/tests/test_embeddings_llama_index.sh b/tests/test_embeddings_llama_index.sh
new file mode 100644
index 000000000..006a2c259
--- /dev/null
+++ b/tests/test_embeddings_llama_index.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    docker build --no-cache -t opea/embedding-tei:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile .
+}
+
+function start_service() {
+    tei_endpoint=5001
+    model="BAAI/bge-large-en-v1.5"
+    revision="refs/pr/5"
+    docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
+    export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}"
+    tei_service_port=5010
+    docker run -d --name="test-comps-embedding-tei-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT  opea/embedding-tei:comps
+    sleep 3m
+}
+
+function validate_microservice() {
+    tei_service_port=5010
+    URL="http://${ip_address}:$tei_service_port/v1/embeddings"
+    docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ embedding - llama_index ] HTTP status is 200. Checking content..."
+        local CONTENT=$(curl -s -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/embedding.log)
+
+        if echo '"text":"What is Deep Learning?","embedding":\[' | grep -q "$EXPECTED_RESULT"; then
+            echo "[ embedding - llama_index ] Content is as expected."
+        else
+            echo "[ embedding - llama_index ] Content does not match the expected result: $CONTENT"
+            docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log
+            exit 1
+        fi
+    else
+        echo "[ embedding - llama_index ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-embedding-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main