From c2fac5d0c8248c111871ed56c88aec8fe97ac385 Mon Sep 17 00:00:00 2001 From: XinyuYe-Intel Date: Fri, 14 Jun 2024 20:35:00 +0800 Subject: [PATCH] Rebased the changes Support rerank and retrieval of RAG OPT (#164) * supported bce model for rerank. Signed-off-by: Xinyu Ye * change folder Signed-off-by: Xinyu Ye * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * change path in test file. Signed-off-by: Xinyu Ye --------- Signed-off-by: Xinyu Ye Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/reranks/README.md | 4 +- .../docker/docker_compose_reranking.yaml | 2 +- comps/reranks/langchain/reranking_tei_xeon.py | 15 ++- comps/retrievers/langchain/README.md | 0 comps/retrievers/langchain/redis/README.md | 119 ++++++++++++++++++ .../{ => redis}/data/nke-10k-2023.pdf | Bin .../langchain/redis/docker/Dockerfile | 2 +- .../{ => langchain/redis}/requirements.txt | 0 8 files changed, 133 insertions(+), 9 deletions(-) delete mode 100644 comps/retrievers/langchain/README.md create mode 100644 comps/retrievers/langchain/redis/README.md rename comps/retrievers/langchain/{ => redis}/data/nke-10k-2023.pdf (100%) rename comps/retrievers/{ => langchain/redis}/requirements.txt (100%) diff --git a/comps/reranks/README.md b/comps/reranks/README.md index f8f12251ee..ac3ab3f78b 100644 --- a/comps/reranks/README.md +++ b/comps/reranks/README.md @@ -19,10 +19,10 @@ export HF_TOKEN=${your_hf_api_token} export LANGCHAIN_TRACING_V2=true export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/reranks" -model=BAAI/bge-reranker-large +export RERANK_MODEL_ID="BAAI/bge-reranker-large" revision=refs/pr/4 volume=$PWD/data -docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision +docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $RERANK_MODEL_ID --revision $revision --hf-api-token $HF_TOKEN ``` ## 1.3 Verify the TEI Service diff --git a/comps/reranks/langchain/docker/docker_compose_reranking.yaml b/comps/reranks/langchain/docker/docker_compose_reranking.yaml index b0929d594b..4f6cc09e76 100644 --- a/comps/reranks/langchain/docker/docker_compose_reranking.yaml +++ b/comps/reranks/langchain/docker/docker_compose_reranking.yaml @@ -12,7 +12,7 @@ services: volumes: - "./data:/data" shm_size: 1g - command: --model-id ${RERANK_MODEL_ID} + command: --model-id ${RERANK_MODEL_ID} --hf-api-token ${HF_TOKEN} reranking: image: opea/reranking-tei:latest container_name: reranking-tei-xeon-server diff --git a/comps/reranks/langchain/reranking_tei_xeon.py b/comps/reranks/langchain/reranking_tei_xeon.py index 394264743f..0bfe88fb46 100644 --- a/comps/reranks/langchain/reranking_tei_xeon.py +++ b/comps/reranks/langchain/reranking_tei_xeon.py @@ -3,6 +3,7 @@ import json import os +import re import time import requests @@ -40,12 +41,16 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc: response = requests.post(url, data=json.dumps(data), headers=headers) response_data = response.json() best_response = max(response_data, key=lambda response: response["score"]) - template = """Answer the question based only on the following context: - {context} - Question: {question} - """ - prompt = ChatPromptTemplate.from_template(template) doc = input.retrieved_docs[best_response["index"]] + if doc.text and len(re.findall("[\u4E00-\u9FFF]", doc.text)) / len(doc.text) >= 0.3: + # chinese context + template = "仅基于以下背景回答问题:\n{context}\n问题: {question}" + else: + template = """Answer the question based only on the following context: +{context} +Question: {question} + """ + prompt = ChatPromptTemplate.from_template(template) final_prompt = prompt.format(context=doc.text, question=input.initial_query) statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None) return LLMParamsDoc(query=final_prompt.strip()) diff --git a/comps/retrievers/langchain/README.md b/comps/retrievers/langchain/README.md deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/comps/retrievers/langchain/redis/README.md b/comps/retrievers/langchain/redis/README.md new file mode 100644 index 0000000000..38f64f33eb --- /dev/null +++ b/comps/retrievers/langchain/redis/README.md @@ -0,0 +1,119 @@ +# Retriever Microservice + +This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector. + +The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval. + +Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. + +# 🚀1. Start Microservice with Python (Option 1) + +To start the retriever microservice, you must first install the required python packages. + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Start TEI Service + +```bash +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/retriever" +model=BAAI/bge-base-en-v1.5 +revision=refs/pr/4 +volume=$PWD/data +docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision +``` + +## 1.3 Verify the TEI Service + +```bash +curl 127.0.0.1:6060/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' +``` + +## 1.4 Setup VectorDB Service + +You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. + +As for Redis, you could start a docker container using the following commands. +Remember to ingest data into it manually. + +```bash +docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9 +``` + +## 1.5 Start Retriever Service + +```bash +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +python langchain/retriever_redis.py +``` + +# 🚀2. Start Microservice with Docker (Option 2) + +## 2.1 Setup Environment Variables + +```bash +export RETRIEVE_MODEL_ID="BAAI/bge-base-en-v1.5" +export REDIS_URL="redis://${your_ip}:6379" +export INDEX_NAME=${your_index_name} +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/retrievers" +``` + +## 2.2 Build Docker Image + +```bash +cd ../../ +docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/docker/Dockerfile . +``` + +To start a docker container, you have two options: + +- A. Run Docker with CLI +- B. Run Docker with Docker Compose + +You can choose one as needed. + +## 2.3 Run Docker with CLI (Option A) + +```bash +docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/retriever-redis:latest +``` + +## 2.4 Run Docker with Docker Compose (Option B) + +```bash +cd langchain/docker +docker compose -f docker_compose_retriever.yaml up -d +``` + +# 🚀3. Consume Retriever Service + +## 3.1 Check Service Status + +```bash +curl http://localhost:7000/v1/health_check \ + -X GET \ + -H 'Content-Type: application/json' +``` + +## 3.2 Consume Embedding Service + +To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. + +```bash +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://${your_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` diff --git a/comps/retrievers/langchain/data/nke-10k-2023.pdf b/comps/retrievers/langchain/redis/data/nke-10k-2023.pdf similarity index 100% rename from comps/retrievers/langchain/data/nke-10k-2023.pdf rename to comps/retrievers/langchain/redis/data/nke-10k-2023.pdf diff --git a/comps/retrievers/langchain/redis/docker/Dockerfile b/comps/retrievers/langchain/redis/docker/Dockerfile index 6d6c29d4e5..990ca4ebcb 100644 --- a/comps/retrievers/langchain/redis/docker/Dockerfile +++ b/comps/retrievers/langchain/redis/docker/Dockerfile @@ -20,7 +20,7 @@ RUN chmod +x /home/user/comps/retrievers/langchain/redis/run.sh USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/requirements.txt + pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/redis/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/retrievers/requirements.txt b/comps/retrievers/langchain/redis/requirements.txt similarity index 100% rename from comps/retrievers/requirements.txt rename to comps/retrievers/langchain/redis/requirements.txt