diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml index 81813d92d3..5426ed41fb 100644 --- a/.github/workflows/_comps-workflow.yml +++ b/.github/workflows/_comps-workflow.yml @@ -6,9 +6,6 @@ permissions: read-all on: workflow_call: inputs: - node: - required: true - type: string service: required: true type: string @@ -36,7 +33,7 @@ jobs: # Image Build #################################################################################################### build-images: - runs-on: "docker-build-${{ inputs.node }}" + runs-on: "docker-build-gaudi" continue-on-error: true outputs: file_exists: ${{ steps.get-yaml-path.outputs.file_exists }} @@ -84,10 +81,9 @@ jobs: #################################################################################################### test-service-compose: needs: [build-images] - if: ${{ fromJSON(inputs.test) && needs.build-images.outputs.file_exists == 'true' }} + if: ${{ fromJSON(inputs.test) }} uses: ./.github/workflows/_run-docker-compose.yml with: tag: ${{ inputs.tag }} service: ${{ inputs.service }} - hardware: ${{ inputs.node }} secrets: inherit diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml index 5f7ac72709..5b89f67b28 100644 --- a/.github/workflows/_run-docker-compose.yml +++ b/.github/workflows/_run-docker-compose.yml @@ -20,10 +20,6 @@ on: description: Example to test required: true type: string - hardware: - description: Hardware to run the test on - required: true - type: string jobs: get-test-case: runs-on: ubuntu-latest @@ -55,8 +51,9 @@ jobs: set -x service_l=$(echo ${{ inputs.service }} | tr '[:upper:]' '[:lower:]') cd ${{ github.workspace }}/tests - test_cases=$(find . -type f -name "test_${service_l}*.sh" -print | cut -d/ -f2 | jq -R '.' | jq -sc '.') + test_cases=$(find . -type f -name "test_${service_l}*.sh" -print | jq -R '.' | jq -sc '.') echo "test_cases=$test_cases" >> $GITHUB_OUTPUT + # TODO: get hardware for test matrix run-test: needs: [get-test-case] @@ -64,7 +61,7 @@ jobs: matrix: test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }} fail-fast: false - runs-on: ${{ inputs.hardware }} + runs-on: gaudi continue-on-error: true steps: - name: Clean up Working Directory @@ -88,11 +85,10 @@ jobs: GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} PINECONE_KEY: ${{ secrets.PINECONE_KEY }} service: ${{ inputs.service }} - hardware: ${{ inputs.hardware }} test_case: ${{ matrix.test_case }} run: | cd ${{ github.workspace }}/tests - service=$(echo "${test_case}" | sed 's/test_\(.*\)\.sh/\1/') + service=$(echo "${test_case}" | cut -d'_' -f2- |cut -d'.' -f1) echo "service=${service}" >> $GITHUB_ENV if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi diff --git a/.github/workflows/docker/compose/dataprep-compose-cd.yaml b/.github/workflows/docker/compose/dataprep-compose-cd.yaml index 7bf0087324..6622a29218 100644 --- a/.github/workflows/docker/compose/dataprep-compose-cd.yaml +++ b/.github/workflows/docker/compose/dataprep-compose-cd.yaml @@ -19,11 +19,15 @@ services: build: dockerfile: comps/dataprep/pinecone/langchain/Dockerfile image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest} - dataprep-redis: #need to rename + dataprep-multimodal-redis: build: dockerfile: comps/dataprep/multimodal/redis/langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} - dataprep-vdmsxx: #need to rename + image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest} + dataprep-vdms: build: dockerfile: comps/dataprep/vdms/langchain/Dockerfile image: ${REGISTRY:-opea}/dataprep-vdms:${TAG:-latest} + dataprep-neo4j: + build: + dockerfile: comps/dataprep/neo4j/langchain/Dockerfile + image: ${REGISTRY:-opea}/dataprep-neo4j:${TAG:-latest} diff --git a/.github/workflows/docker/compose/dataprep-compose.yaml b/.github/workflows/docker/compose/dataprep-compose.yaml index bb5f082185..3c03461030 100644 --- a/.github/workflows/docker/compose/dataprep-compose.yaml +++ b/.github/workflows/docker/compose/dataprep-compose.yaml @@ -17,7 +17,7 @@ services: build: dockerfile: comps/dataprep/redis/langchain_ray/Dockerfile image: ${REGISTRY:-opea}/dataprep-on-ray-redis:${TAG:-latest} - dataprep-vdms: + dataprep-multimodal-vdms: build: dockerfile: comps/dataprep/vdms/multimodal_langchain/Dockerfile - image: ${REGISTRY:-opea}/dataprep-vdms:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep-multimodal-vdms:${TAG:-latest} diff --git a/.github/workflows/docker/compose/embeddings-compose-cd.yaml b/.github/workflows/docker/compose/embeddings-compose-cd.yaml index 399683a61b..d9d0403dd8 100644 --- a/.github/workflows/docker/compose/embeddings-compose-cd.yaml +++ b/.github/workflows/docker/compose/embeddings-compose-cd.yaml @@ -22,7 +22,7 @@ services: build: dockerfile: comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/bridgetower-embedder-gaudi:${TAG:-latest} - embedding-multimodalxx: #need to rename + embedding-multimodal: build: dockerfile: comps/embeddings/multimodal/multimodal_langchain/Dockerfile image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest} diff --git a/.github/workflows/docker/compose/embeddings-compose.yaml b/.github/workflows/docker/compose/embeddings-compose.yaml index a97c8460fb..5f701b4c62 100644 --- a/.github/workflows/docker/compose/embeddings-compose.yaml +++ b/.github/workflows/docker/compose/embeddings-compose.yaml @@ -8,7 +8,7 @@ services: build: dockerfile: comps/embeddings/tei/langchain/Dockerfile image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} - embedding-multimodal: + embedding-multimodal-clip: build: dockerfile: comps/embeddings/multimodal_clip/Dockerfile - image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest} + image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest} diff --git a/.github/workflows/docker/compose/llms-compose-cd.yaml b/.github/workflows/docker/compose/llms-compose-cd.yaml index aa34c2d770..cbf463bd10 100644 --- a/.github/workflows/docker/compose/llms-compose-cd.yaml +++ b/.github/workflows/docker/compose/llms-compose-cd.yaml @@ -11,10 +11,10 @@ services: context: vllm-openvino dockerfile: Dockerfile.openvino image: ${REGISTRY:-opea}/vllm-openvino:${TAG:-latest} - llm-nativexx: #need to rename + llm-eval: build: dockerfile: comps/llms/utils/lm-eval/Dockerfile - image: ${REGISTRY:-opea}/llm-native:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest} llm-vllm-llamaindex: build: dockerfile: comps/llms/text-generation/vllm/llama_index/Dockerfile diff --git a/.github/workflows/docker/compose/reranks-compose.yaml b/.github/workflows/docker/compose/reranks-compose.yaml index b11fb8a4ab..1a160d3b63 100644 --- a/.github/workflows/docker/compose/reranks-compose.yaml +++ b/.github/workflows/docker/compose/reranks-compose.yaml @@ -8,7 +8,7 @@ services: build: dockerfile: comps/reranks/tei/Dockerfile image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} - reranking-videoragqna: + reranking-videoqna: build: - dockerfile: comps/reranks/video-rag-qna/Dockerfile - image: ${REGISTRY:-opea}/reranking-videoragqna:${TAG:-latest} + dockerfile: comps/reranks/videoqna/Dockerfile + image: ${REGISTRY:-opea}/reranking-videoqna:${TAG:-latest} diff --git a/.github/workflows/docker/compose/retrievers-compose-cd.yaml b/.github/workflows/docker/compose/retrievers-compose-cd.yaml index f9230412d7..67b44fd0f7 100644 --- a/.github/workflows/docker/compose/retrievers-compose-cd.yaml +++ b/.github/workflows/docker/compose/retrievers-compose-cd.yaml @@ -27,3 +27,7 @@ services: build: dockerfile: comps/retrievers/multimodal/redis/langchain/Dockerfile image: ${REGISTRY:-opea}/multimodal-retriever-redis:${TAG:-latest} + retriever-neo4j: + build: + dockerfile: comps/retrievers/neo4j/langchain/Dockerfile + image: ${REGISTRY:-opea}/retriever-neo4j:${TAG:-latest} diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml index bde3bf9fad..a56b297de3 100644 --- a/.github/workflows/manual-comps-test.yml +++ b/.github/workflows/manual-comps-test.yml @@ -56,7 +56,6 @@ jobs: with: service: ${{ matrix.service }} tag: ${{ inputs.tag }} - node: gaudi mode: ${{ inputs.mode }} test: ${{ inputs.test }} secrets: inherit diff --git a/comps/__init__.py b/comps/__init__.py index 1ad435bf26..712ce3f495 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -46,9 +46,9 @@ AudioQnAGateway, RetrievalToolGateway, FaqGenGateway, - VideoRAGQnAGateway, + VideoQnAGateway, VisualQnAGateway, - MultimodalRAGWithVideosGateway, + MultimodalQnAGateway, ) # Telemetry diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index a23fdaf558..34808c4351 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -38,12 +38,12 @@ class MegaServiceEndpoint(Enum): CHAT_QNA = "/v1/chatqna" AUDIO_QNA = "/v1/audioqna" VISUAL_QNA = "/v1/visualqna" - VIDEO_RAG_QNA = "/v1/videoragqna" + VIDEO_RAG_QNA = "/v1/videoqna" CODE_GEN = "/v1/codegen" CODE_TRANS = "/v1/codetrans" DOC_SUMMARY = "/v1/docsum" SEARCH_QNA = "/v1/searchqna" - MULTIMODAL_RAG_WITH_VIDEOS = "/v1/mmragvideoqna" + MULTIMODAL_QNA = "/v1/multimodalqna" TRANSLATION = "/v1/translation" RETRIEVALTOOL = "/v1/retrievaltool" FAQ_GEN = "/v1/faqgen" diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index b14ef60757..2636a0f834 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -548,7 +548,7 @@ async def handle_request(self, request: Request): return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage) -class VideoRAGQnAGateway(Gateway): +class VideoQnAGateway(Gateway): def __init__(self, megaservice, host="0.0.0.0", port=8888): super().__init__( megaservice, @@ -594,7 +594,7 @@ async def handle_request(self, request: Request): finish_reason="stop", ) ) - return ChatCompletionResponse(model="videoragqna", choices=choices, usage=usage) + return ChatCompletionResponse(model="videoqna", choices=choices, usage=usage) class RetrievalToolGateway(Gateway): @@ -634,14 +634,14 @@ def parser_input(data, TypeClass, key): return response -class MultimodalRAGWithVideosGateway(Gateway): +class MultimodalQnAGateway(Gateway): def __init__(self, multimodal_rag_megaservice, lvm_megaservice, host="0.0.0.0", port=9999): self.lvm_megaservice = lvm_megaservice super().__init__( multimodal_rag_megaservice, host, port, - str(MegaServiceEndpoint.MULTIMODAL_RAG_WITH_VIDEOS), + str(MegaServiceEndpoint.MULTIMODAL_QNA), ChatCompletionRequest, ChatCompletionResponse, ) @@ -737,7 +737,7 @@ async def handle_request(self, request: Request): data = await request.json() stream_opt = bool(data.get("stream", False)) if stream_opt: - print("[ MultimodalRAGWithVideosGateway ] stream=True not used, this has not support streaming yet!") + print("[ MultimodalQnAGateway ] stream=True not used, this has not support streaming yet!") stream_opt = False chat_request = ChatCompletionRequest.model_validate(data) # Multimodal RAG QnA With Videos has not yet accepts image as input during QnA. @@ -803,4 +803,4 @@ async def handle_request(self, request: Request): metadata=metadata, ) ) - return ChatCompletionResponse(model="multimodalragwithvideos", choices=choices, usage=usage) + return ChatCompletionResponse(model="multimodalqna", choices=choices, usage=usage) diff --git a/comps/dataprep/multimodal/redis/langchain/README.md b/comps/dataprep/multimodal/redis/langchain/README.md index 9a1729ca11..65e4f5d45f 100644 --- a/comps/dataprep/multimodal/redis/langchain/README.md +++ b/comps/dataprep/multimodal/redis/langchain/README.md @@ -83,13 +83,13 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ```bash cd ../../../../ -docker build -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . +docker build -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . ``` ### 2.5 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-redis:latest +docker run -d --name="dataprep-multimodal-redis" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-multimodal-redis:latest ``` ### 2.6 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) @@ -102,7 +102,7 @@ docker compose -f docker-compose-dataprep-redis.yaml up -d ## 🚀3. Status Microservice ```bash -docker container logs -f dataprep-redis-server +docker container logs -f dataprep-multimodal-redis ``` ## 🚀4. Consume Microservice diff --git a/comps/dataprep/multimodal/redis/langchain/docker-compose-dataprep-redis.yaml b/comps/dataprep/multimodal/redis/langchain/docker-compose-dataprep-redis.yaml index d98ddbd878..e3dc78a97b 100644 --- a/comps/dataprep/multimodal/redis/langchain/docker-compose-dataprep-redis.yaml +++ b/comps/dataprep/multimodal/redis/langchain/docker-compose-dataprep-redis.yaml @@ -9,9 +9,9 @@ services: ports: - "6379:6379" - "8001:8001" - dataprep-redis: - image: opea/dataprep-redis:latest - container_name: dataprep-redis-server + dataprep-multimodal-redis: + image: opea/dataprep-multimodal-redis:latest + container_name: dataprep-multimodal-redis ports: - "6007:6007" ipc: host diff --git a/comps/dataprep/neo4j/langchain/Dockerfile b/comps/dataprep/neo4j/langchain/Dockerfile new file mode 100644 index 0000000000..5c1884359b --- /dev/null +++ b/comps/dataprep/neo4j/langchain/Dockerfile @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + default-jre \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/dataprep/neo4j/langchain/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +USER root + +RUN mkdir -p /home/user/comps/dataprep/qdrant/langchain/uploaded_files && chown -R user /home/user/comps/dataprep/neo4j/langchain/uploaded_files + +USER user + +WORKDIR /home/user/comps/dataprep/neo4j/langchain + +ENTRYPOINT ["python", "prepare_doc_neo4j.py"] diff --git a/comps/dataprep/neo4j/langchain/README.md b/comps/dataprep/neo4j/langchain/README.md new file mode 100644 index 0000000000..31f92548b4 --- /dev/null +++ b/comps/dataprep/neo4j/langchain/README.md @@ -0,0 +1,116 @@ +# Dataprep Microservice with Neo4J + +## 🚀Start Microservice with Python + +### Install Requirements + +```bash +pip install -r requirements.txt +apt-get install libtesseract-dev -y +apt-get install poppler-utils -y +``` + +### Start Neo4J Server + +To launch Neo4j locally, first ensure you have docker installed. Then, you can launch the database with the following docker command. + +```bash +docker run \ + -p 7474:7474 -p 7687:7687 \ + -v $PWD/data:/data -v $PWD/plugins:/plugins \ + --name neo4j-apoc \ + -d \ + -e NEO4J_AUTH=neo4j/password \ + -e NEO4J_PLUGINS=\[\"apoc\"\] \ + neo4j:latest +``` + +### Setup Environment Variables + +```bash +export no_proxy=${your_no_proxy} +export http_proxy=${your_http_proxy} +export https_proxy=${your_http_proxy} +export NEO4J_URI=${your_neo4j_url} +export NEO4J_USERNAME=${your_neo4j_username} +export NEO4J_PASSWORD=${your_neo4j_password} +export PYTHONPATH=${path_to_comps} +``` + +### Start Document Preparation Microservice for Neo4J with Python Script + +Start document preparation microservice for Neo4J with below command. + +```bash +python prepare_doc_neo4j.py +``` + +## 🚀Start Microservice with Docker + +### Build Docker Image + +```bash +cd ../../../../ +docker build -t opea/dataprep-neo4j:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/neo4j/langchain/Dockerfile . +``` + +### Run Docker with CLI + +```bash +docker run -d --name="dataprep-neo4j-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-neo4j:latest +``` + +### Setup Environment Variables + +```bash +export no_proxy=${your_no_proxy} +export http_proxy=${your_http_proxy} +export https_proxy=${your_http_proxy} +export NEO4J_URI=${your_neo4j_url} +export NEO4J_USERNAME=${your_neo4j_username} +export NEO4J_PASSWORD=${your_neo4j_password} +``` + +### Run Docker with Docker Compose + +```bash +cd comps/dataprep/neo4j/langchain +docker compose -f docker-compose-dataprep-neo4j.yaml up -d +``` + +## Invoke Microservice + +Once document preparation microservice for Neo4J is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + http://localhost:6007/v1/dataprep +``` + +You can specify chunk_size and chunk_size by the following commands. + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + -F "chunk_size=1500" \ + -F "chunk_overlap=100" \ + http://localhost:6007/v1/dataprep +``` + +We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast". + +Note: If you specify "table_strategy=llm", You should first start TGI Service, please refer to 1.2.1, 1.3.1 in https://github.com/opea-project/GenAIComps/tree/main/comps/llms/README.md, and then `export TGI_LLM_ENDPOINT="http://${your_ip}:8008"`. + +For ensure the quality and comprehensiveness of the extracted entities, we recommend to use `gpt-4o` as the default model for parsing the document. To enable the openai service, please `export OPENAI_KEY=xxxx` before using this services. + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./your_file.pdf" \ + -F "process_table=true" \ + -F "table_strategy=hq" \ + http://localhost:6007/v1/dataprep +``` diff --git a/comps/dataprep/neo4j/langchain/__init__.py b/comps/dataprep/neo4j/langchain/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/dataprep/neo4j/langchain/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/dataprep/neo4j/langchain/config.py b/comps/dataprep/neo4j/langchain/config.py new file mode 100644 index 0000000000..bb21d57e3d --- /dev/null +++ b/comps/dataprep/neo4j/langchain/config.py @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +# Neo4J configuration +NEO4J_URL = os.getenv("NEO4J_URI", "bolt://localhost:7687") +NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") +NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") + +# LLM/Embedding endpoints +TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") +TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +OPENAI_KEY = os.getenv("OPENAI_API_KEY") diff --git a/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml b/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml new file mode 100644 index 0000000000..d7d210adf3 --- /dev/null +++ b/comps/dataprep/neo4j/langchain/docker-compose-dataprep-neo4j.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3" +services: + neo4j-vector-db: + image: neo4j/neo4j + container_name: neo4j-graph-db + ports: + - "6337:6337" + - "6338:6338" + tgi_gaudi_service: + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + container_name: tgi-service + ports: + - "8088:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + command: --model-id ${LLM_MODEL_ID} --auto-truncate --max-input-tokens 1024 --max-total-tokens 2048 + dataprep-neo4j: + image: opea/gen-ai-comps:dataprep-neo4j-xeon-server + container_name: dataprep-neo4j-server + depends_on: + - neo4j-vector-db + - tgi_gaudi_service + ports: + - "6007:6007" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + NEO4J_URL: ${NEO4J_URL} + NEO4J_USERNAME: ${NEO4J_USERNAME} + NEO4J_PASSWORD: ${NEO4J_PASSWORD} + TGI_LLM_ENDPOINT: ${TEI_ENDPOINT} + OPENAI_KEY: ${OPENAI_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py b/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py new file mode 100644 index 0000000000..39d88d0558 --- /dev/null +++ b/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py @@ -0,0 +1,191 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import List, Optional, Union + +import openai +from config import NEO4J_PASSWORD, NEO4J_URL, NEO4J_USERNAME, OPENAI_KEY, TGI_LLM_ENDPOINT +from fastapi import File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.graphs import Neo4jGraph +from langchain_community.graphs.graph_document import GraphDocument +from langchain_community.llms import HuggingFaceEndpoint +from langchain_core.documents import Document +from langchain_experimental.graph_transformers import LLMGraphTransformer +from langchain_openai import ChatOpenAI +from langchain_text_splitters import HTMLHeaderTextSplitter + +from comps import CustomLogger, DocPath, opea_microservices, register_microservice +from comps.dataprep.utils import ( + document_loader, + encode_filename, + get_separators, + get_tables_result, + parse_html, + save_content_to_local_disk, +) + +logger = CustomLogger("prepare_doc_neo4j") +logflag = os.getenv("LOGFLAG", False) + +upload_folder = "./uploaded_files/" + + +def ingest_data_to_neo4j(doc_path: DocPath): + """Ingest document to Neo4J.""" + path = doc_path.path + if logflag: + logger.info(f"Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") + + if OPENAI_KEY: + logger.info("OpenAI API Key is set. Verifying its validity...") + openai.api_key = OPENAI_KEY + + try: + response = openai.Engine.list() + logger.info("OpenAI API Key is valid.") + llm = ChatOpenAI(temperature=0, model_name="gpt-4o") + except openai.error.AuthenticationError: + logger.info("OpenAI API Key is invalid.") + except Exception as e: + logger.info(f"An error occurred while verifying the API Key: {e}") + else: + llm = HuggingFaceEndpoint( + endpoint_url=TGI_LLM_ENDPOINT, + max_new_tokens=512, + top_k=40, + top_p=0.9, + temperature=0.8, + timeout=600, + ) + + llm_transformer = LLMGraphTransformer( + llm=llm, node_properties=["description"], relationship_properties=["description"] + ) + + doc_list = [Document(page_content=text) for text in chunks] + graph_doc = llm_transformer.convert_to_graph_documents(doc_list) + + graph = Neo4jGraph(url=NEO4J_URL, username=NEO4J_USERNAME, password=NEO4J_PASSWORD) + + graph.add_graph_documents(graph_doc, baseEntityLabel=True, include_source=True) + + if logflag: + logger.info("The graph is built.") + + return True + + +@register_microservice( + name="opea_service@prepare_doc_neo4j", + endpoint="/v1/dataprep", + host="0.0.0.0", + port=6007, + input_datatype=DocPath, + output_datatype=None, +) +async def ingest_documents( + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), +): + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + ingest_data_to_neo4j( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + try: + await save_content_to_local_disk(save_path, content) + ingest_data_to_neo4j( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + except json.JSONDecodeError: + raise HTTPException(status_code=500, detail="Fail to ingest data into qdrant.") + + if logflag: + logger.info(f"Successfully saved link {link}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + +if __name__ == "__main__": + opea_microservices["opea_service@prepare_doc_neo4j"].start() diff --git a/comps/dataprep/neo4j/langchain/requirements.txt b/comps/dataprep/neo4j/langchain/requirements.txt new file mode 100644 index 0000000000..b8326a623f --- /dev/null +++ b/comps/dataprep/neo4j/langchain/requirements.txt @@ -0,0 +1,31 @@ +beautifulsoup4 +cairosvg +docarray[full] +docx2txt +easyocr +fastapi +huggingface_hub +langchain +langchain-community +langchain-experimental +langchain-openai +langchain-text-splitters +langchain_huggingface +markdown +neo4j +numpy +openai +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pandas +Pillow +prometheus-fastapi-instrumentator +pymupdf +python-docx +python-pptx +sentence_transformers +shortuuid +unstructured[all-docs]==0.15.7 +uvicorn + diff --git a/comps/dataprep/vdms/README.md b/comps/dataprep/vdms/README.md index 7c4d8e86f8..132a8816b3 100644 --- a/comps/dataprep/vdms/README.md +++ b/comps/dataprep/vdms/README.md @@ -6,9 +6,9 @@ For dataprep microservice, we currently provide one framework: `Langchain`. We organized the folders in the same way, so you can use either framework for dataprep microservice with the following constructions. -# 🚀1. Start Microservice with Python (Option 1) +## 🚀1. Start Microservice with Python (Option 1) -## 1.1 Install Requirements +### 1.1 Install Requirements Install Single-process version (for 1-10 files processing) @@ -25,11 +25,11 @@ pip install -r requirements.txt cd langchain_ray; pip install -r requirements_ray.txt ``` --> -## 1.2 Start VDMS Server +### 1.2 Start VDMS Server -Please refer to this [readme](../../vectorstores/vdms/README.md). +Refer to this [readme](../../vectorstores/vdms/README.md). -## 1.3 Setup Environment Variables +### 1.3 Setup Environment Variables ```bash export http_proxy=${your_http_proxy} @@ -40,7 +40,7 @@ export COLLECTION_NAME=${your_collection_name} export PYTHONPATH=${path_to_comps} ``` -## 1.4 Start Document Preparation Microservice for VDMS with Python Script +### 1.4 Start Document Preparation Microservice for VDMS with Python Script Start document preparation microservice for VDMS with below command. @@ -56,13 +56,13 @@ python prepare_doc_vdms.py python prepare_doc_redis_on_ray.py ``` --> -# 🚀2. Start Microservice with Docker (Option 2) +## 🚀2. Start Microservice with Docker (Option 2) -## 2.1 Start VDMS Server +### 2.1 Start VDMS Server -Please refer to this [readme](../../vectorstores/vdms/README.md). +Refer to this [readme](../../vectorstores/vdms/README.md). -## 2.2 Setup Environment Variables +### 2.2 Setup Environment Variables ```bash export http_proxy=${your_http_proxy} @@ -76,16 +76,16 @@ export DISTANCE_STRATEGY="L2" export PYTHONPATH=${path_to_comps} ``` -## 2.3 Build Docker Image +### 2.3 Build Docker Image - Build docker image with langchain -Start single-process version (for 1-10 files processing) + Start single-process version (for 1-10 files processing) -```bash -cd ../../../ -docker build -t opea/dataprep-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/langchain/Dockerfile . -``` + ```bash + cd ../../../ + docker build -t opea/dataprep-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/langchain/Dockerfile . + ``` -## 2.4 Run Docker with CLI +### 2.4 Run Docker with CLI Start single-process version (for 1-10 files processing) @@ -113,13 +113,13 @@ docker run -d --name="dataprep-vdms-server" -p 6007:6007 --runtime=runc --ipc=ho -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-vdms:latest ``` --> -# 🚀3. Status Microservice +## 🚀3. Status Microservice ```bash docker container logs -f dataprep-vdms-server ``` -# 🚀4. Consume Microservice +## 🚀4. Consume Microservice Once document preparation microservice for VDMS is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. @@ -127,61 +127,61 @@ Make sure the file path after `files=@` is correct. - Single file upload -```bash -curl -X POST \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.txt" \ - http://localhost:6007/v1/dataprep -``` + ```bash + curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + http://localhost:6007/v1/dataprep + ``` -You can specify chunk_size and chunk_size by the following commands. + You can specify `chunk_size` and `chunk_overlap` by the following commands. -```bash -curl -X POST \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./LLAMA2_page6.pdf" \ - -F "chunk_size=1500" \ - -F "chunk_overlap=100" \ - http://localhost:6007/v1/dataprep -``` + ```bash + curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./LLAMA2_page6.pdf" \ + -F "chunk_size=1500" \ + -F "chunk_overlap=100" \ + http://localhost:6007/v1/dataprep + ``` - Multiple file upload -```bash -curl -X POST \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.txt" \ - -F "files=@./file2.txt" \ - -F "files=@./file3.txt" \ - http://localhost:6007/v1/dataprep -``` - -- Links upload (not supported for llama_index now) - -```bash -curl -X POST \ - -F 'link_list=["https://www.ces.tech/"]' \ - http://localhost:6007/v1/dataprep -``` - -or - -```python -import requests -import json - -proxies = {"http": ""} -url = "http://localhost:6007/v1/dataprep" -urls = [ - "https://towardsdatascience.com/no-gpu-no-party-fine-tune-bert-for-sentiment-analysis-with-vertex-ai-custom-jobs-d8fc410e908b?source=rss----7f60cf5620c9---4" -] -payload = {"link_list": json.dumps(urls)} - -try: - resp = requests.post(url=url, data=payload, proxies=proxies) - print(resp.text) - resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes - print("Request successful!") -except requests.exceptions.RequestException as e: - print("An error occurred:", e) -``` + ```bash + curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + -F "files=@./file2.txt" \ + -F "files=@./file3.txt" \ + http://localhost:6007/v1/dataprep + ``` + +- Links upload (not supported for `llama_index` now) + + ```bash + curl -X POST \ + -F 'link_list=["https://www.ces.tech/"]' \ + http://localhost:6007/v1/dataprep + ``` + + or + + ```python + import requests + import json + + proxies = {"http": ""} + url = "http://localhost:6007/v1/dataprep" + urls = [ + "https://towardsdatascience.com/no-gpu-no-party-fine-tune-bert-for-sentiment-analysis-with-vertex-ai-custom-jobs-d8fc410e908b?source=rss----7f60cf5620c9---4" + ] + payload = {"link_list": json.dumps(urls)} + + try: + resp = requests.post(url=url, data=payload, proxies=proxies) + print(resp.text) + resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes + print("Request successful!") + except requests.exceptions.RequestException as e: + print("An error occurred:", e) + ``` diff --git a/comps/dataprep/vdms/multimodal_langchain/README.md b/comps/dataprep/vdms/multimodal_langchain/README.md index 0b5b721fa9..2d86c28b13 100644 --- a/comps/dataprep/vdms/multimodal_langchain/README.md +++ b/comps/dataprep/vdms/multimodal_langchain/README.md @@ -2,25 +2,25 @@ For dataprep microservice, we currently provide one framework: `Langchain`. -# 🚀1. Start Microservice with Python (Option 1) +## 🚀1. Start Microservice with Python (Option 1) -## 1.1 Install Requirements +### 1.1 Install Requirements - option 1: Install Single-process version (for 1-10 files processing) -```bash -apt-get update -apt-get install -y default-jre tesseract-ocr libtesseract-dev poppler-utils -pip install -r requirements.txt -``` + ```bash + apt-get update + apt-get install -y default-jre tesseract-ocr libtesseract-dev poppler-utils + pip install -r requirements.txt + ``` -## 1.2 Start VDMS Server +### 1.2 Start VDMS Server ```bash docker run -d --name="vdms-vector-db" -p 55555:55555 intellabs/vdms:latest ``` -## 1.3 Setup Environment Variables +### 1.3 Setup Environment Variables ```bash export http_proxy=${your_http_proxy} @@ -33,7 +33,7 @@ export your_hf_api_token="{your_hf_token}" export PYTHONPATH=${path_to_comps} ``` -## 1.4 Start Data Preparation Microservice for VDMS with Python Script +### 1.4 Start Data Preparation Microservice for VDMS with Python Script Start document preparation microservice for VDMS with below command. @@ -41,15 +41,15 @@ Start document preparation microservice for VDMS with below command. python ingest_videos.py ``` -# 🚀2. Start Microservice with Docker (Option 2) +## 🚀2. Start Microservice with Docker (Option 2) -## 2.1 Start VDMS Server +### 2.1 Start VDMS Server ```bash docker run -d --name="vdms-vector-db" -p 55555:55555 intellabs/vdms:latest ``` -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables ```bash export http_proxy=${your_http_proxy} @@ -61,29 +61,29 @@ export INDEX_NAME="rag-vdms" export your_hf_api_token="{your_hf_token}" ``` -## 2.3 Build Docker Image +### 2.3 Build Docker Image - Build docker image -```bash -cd ../../../ - docker build -t opea/dataprep-vdms:latest --network host --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/multimodal_langchain/Dockerfile . + ```bash + cd ../../../ + docker build -t opea/dataprep-vdms:latest --network host --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/multimodal_langchain/Dockerfile . -``` + ``` -## 2.4 Run Docker Compose +### 2.4 Run Docker Compose ```bash docker compose -f comps/dataprep/vdms/multimodal_langchain/docker-compose-dataprep-vdms.yaml up -d ``` -# 🚀3. Status Microservice +## 🚀3. Status Microservice ```bash docker container logs -f dataprep-vdms-server ``` -# 🚀4. Consume Microservice +## 🚀4. Consume Microservice Once data preparation microservice for VDMS is started, user can use below command to invoke the microservice to convert the videos to embedding and save to the database. @@ -91,34 +91,34 @@ Make sure the file path after `files=@` is correct. - Single file upload -```bash -curl -X POST \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.mp4" \ - http://localhost:6007/v1/dataprep -``` + ```bash + curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.mp4" \ + http://localhost:6007/v1/dataprep + ``` - Multiple file upload -```bash -curl -X POST \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.mp4" \ - -F "files=@./file2.mp4" \ - -F "files=@./file3.mp4" \ - http://localhost:6007/v1/dataprep -``` + ```bash + curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.mp4" \ + -F "files=@./file2.mp4" \ + -F "files=@./file3.mp4" \ + http://localhost:6007/v1/dataprep + ``` - List of uploaded files -```bash -curl -X GET http://localhost:6007/v1/dataprep/get_videos -``` + ```bash + curl -X GET http://localhost:6007/v1/dataprep/get_videos + ``` - Download uploaded files -Please use the file name from the list + Use the file name from the list -```bash -curl -X GET http://localhost:6007/v1/dataprep/get_file/${filename} -``` + ```bash + curl -X GET http://localhost:6007/v1/dataprep/get_file/${filename} + ``` diff --git a/comps/embeddings/mosec/langchain/Dockerfile b/comps/embeddings/mosec/langchain/Dockerfile index 57d21cbd23..6868fd57df 100644 --- a/comps/embeddings/mosec/langchain/Dockerfile +++ b/comps/embeddings/mosec/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/embeddings/multimodal/README.md b/comps/embeddings/multimodal/README.md index 97c6288d30..c75a60f12a 100644 --- a/comps/embeddings/multimodal/README.md +++ b/comps/embeddings/multimodal/README.md @@ -52,7 +52,7 @@ Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridge ```bash cd ../../.. -docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . +docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . cd comps/embeddings/multimodal/bridgetower/ docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d ``` @@ -61,7 +61,7 @@ docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d ```bash cd ../../.. -docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . +docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . cd comps/embeddings/multimodal/bridgetower/ docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d ``` @@ -116,7 +116,7 @@ Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridge ```bash cd ../../.. -docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . +docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . cd comps/embeddings/multimodal/bridgetower/ docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d ``` @@ -125,7 +125,7 @@ docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d ```bash cd ../../.. -docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . +docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . cd comps/embeddings/multimodal/bridgetower/ docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d ``` diff --git a/comps/embeddings/multimodal/bridgetower/docker_compose_bridgetower_embedding_endpoint.yaml b/comps/embeddings/multimodal/bridgetower/docker_compose_bridgetower_embedding_endpoint.yaml index 9767490d0a..1e42e6bffa 100644 --- a/comps/embeddings/multimodal/bridgetower/docker_compose_bridgetower_embedding_endpoint.yaml +++ b/comps/embeddings/multimodal/bridgetower/docker_compose_bridgetower_embedding_endpoint.yaml @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 services: - bridgetower: - image: opea/bridgetower-embedder:latest - container_name: bridgetower-embedding-server + embedding-multimodal-bridgetower: + image: opea/embedding-multimodal-bridgetower:latest + container_name: embedding-multimodal-bridgetower ports: - ${EMBEDDER_PORT}:${EMBEDDER_PORT} ipc: host diff --git a/comps/embeddings/multimodal/multimodal_langchain/Dockerfile b/comps/embeddings/multimodal/multimodal_langchain/Dockerfile index 923495add2..be0cad9441 100644 --- a/comps/embeddings/multimodal/multimodal_langchain/Dockerfile +++ b/comps/embeddings/multimodal/multimodal_langchain/Dockerfile @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/embeddings/multimodal/multimodal_langchain/docker_compose_multimodal_embedding.yaml b/comps/embeddings/multimodal/multimodal_langchain/docker_compose_multimodal_embedding.yaml index 314233f931..74927b25e8 100644 --- a/comps/embeddings/multimodal/multimodal_langchain/docker_compose_multimodal_embedding.yaml +++ b/comps/embeddings/multimodal/multimodal_langchain/docker_compose_multimodal_embedding.yaml @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 services: - embedding: + embedding-multimodal: image: opea/embedding-multimodal:latest - container_name: embedding-multimodal-server + container_name: embedding-multimodal ports: - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE} ipc: host diff --git a/comps/embeddings/multimodal_clip/Dockerfile b/comps/embeddings/multimodal_clip/Dockerfile index aefba8aadf..6f45a81b26 100644 --- a/comps/embeddings/multimodal_clip/Dockerfile +++ b/comps/embeddings/multimodal_clip/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" diff --git a/comps/embeddings/tei/langchain/Dockerfile b/comps/embeddings/tei/langchain/Dockerfile index b4a63d28e9..7b30734ad4 100644 --- a/comps/embeddings/tei/langchain/Dockerfile +++ b/comps/embeddings/tei/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" diff --git a/comps/guardrails/llama_guard/langchain/Dockerfile b/comps/guardrails/llama_guard/langchain/Dockerfile index 15027b3ee6..63988cc4e5 100644 --- a/comps/guardrails/llama_guard/langchain/Dockerfile +++ b/comps/guardrails/llama_guard/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ENV LANG=C.UTF-8 diff --git a/comps/guardrails/toxicity_detection/Dockerfile b/comps/guardrails/toxicity_detection/Dockerfile index 98dc0c0534..897e62a23c 100644 --- a/comps/guardrails/toxicity_detection/Dockerfile +++ b/comps/guardrails/toxicity_detection/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ENV LANG=C.UTF-8 diff --git a/comps/intent_detection/langchain/Dockerfile b/comps/intent_detection/langchain/Dockerfile index 297b1b88f4..b19f371328 100644 --- a/comps/intent_detection/langchain/Dockerfile +++ b/comps/intent_detection/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/knowledgegraphs/langchain/Dockerfile b/comps/knowledgegraphs/langchain/Dockerfile index 649ff023a3..9a6a85d002 100644 --- a/comps/knowledgegraphs/langchain/Dockerfile +++ b/comps/knowledgegraphs/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" # Set this to "cpu" or "gpu" diff --git a/comps/llms/faq-generation/tgi/langchain/Dockerfile b/comps/llms/faq-generation/tgi/langchain/Dockerfile index 990f410409..2cf7fa80cb 100644 --- a/comps/llms/faq-generation/tgi/langchain/Dockerfile +++ b/comps/llms/faq-generation/tgi/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/llms/summarization/tgi/langchain/Dockerfile b/comps/llms/summarization/tgi/langchain/Dockerfile index 363df42e8e..9691a5784d 100644 --- a/comps/llms/summarization/tgi/langchain/Dockerfile +++ b/comps/llms/summarization/tgi/langchain/Dockerfile @@ -1,7 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim + +ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ @@ -16,6 +18,7 @@ USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ pip install --no-cache-dir -r /home/user/comps/llms/summarization/tgi/langchain/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/llms/text-generation/README.md b/comps/llms/text-generation/README.md index 18897572ad..9c4af98c1f 100644 --- a/comps/llms/text-generation/README.md +++ b/comps/llms/text-generation/README.md @@ -6,108 +6,149 @@ A prerequisite for using this microservice is that users must have a LLM text ge Overall, this microservice offers a streamlined way to integrate large language model inference into applications, requiring minimal setup from the user beyond initiating a TGI/vLLM/Ray service and configuring the necessary environment variables. This allows for the seamless processing of queries and documents to generate intelligent, context-aware responses. -## 🚀1. Start Microservice with Python (Option 1) +## Validated LLM Models -To start the LLM microservice, you need to install python packages first. +| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | Ray | +| --------------------------- | --------- | -------- | ---------- | --- | +| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | ✓ | +| [Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | ✓ | +| [Llama-2-70b-chat-hf] | ✓ | - | ✓ | x | +| [Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | ✓ | +| [Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | x | +| [Phi-3] | x | Limit 4K | Limit 4K | ✓ | -### 1.1 Install Requirements +## Clone OPEA GenAIComps + +Clone this repository at your desired location and set an environment variable for easy setup and usage throughout the instructions. ```bash -pip install -r requirements.txt +git clone https://github.com/opea-project/GenAIComps.git + +export OPEA_GENAICOMPS_ROOT=$(pwd)/GenAIComps ``` -### 1.2 Start LLM Service +## 🚀1. Start Microservice with Python (Option 1) -#### 1.2.1 Start TGI Service +To start the LLM microservice, you need to install python packages first. + +### 1.1 Install Requirements ```bash -export HF_TOKEN=${your_hf_api_token} -docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} -``` +pip install opea-comps +pip install -r ${OPEA_GENAICOMPS_ROOT}/comps/llms/requirements.txt -#### 1.2.2 Start vLLM Service +# Install requirements of your choice of microservice in the text-generation folder (tgi, vllm, vllm-ray, etc.) +export MICROSERVICE_DIR=your_chosen_microservice -```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data opea/vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80" +pip install -r ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/${MICROSERVICE_DIR}/requirements.txt ``` -### 1.2.3 Start Ray Service +Set an environment variable `your_ip` to the IP address of the machine where you would like to consume the microservice. ```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export TRUST_REMOTE_CODE=True -docker run -it --runtime=habana --name ray_serve_service -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -p 8008:80 -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TRUST_REMOTE_CODE=$TRUST_REMOTE_CODE opea/llm-ray:latest /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path ${your_hf_llm_model} --chat_processor ${your_hf_chatprocessor}" +# For example, this command would set the IP address of your currently logged-in machine. +export your_ip=$(hostname -I | awk '{print $1}') ``` -### 1.3 Verify the LLM Service +### 1.2 Start LLM Service with Python Script -#### 1.3.1 Verify the TGI Service +#### 1.2.1 Start the TGI Service ```bash -curl http://${your_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ - -H 'Content-Type: application/json' +export TGI_LLM_ENDPOINT="http://${your_ip}:8008" +python ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/tgi/llm.py +python ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/tgi/llm.py ``` -#### 1.3.2 Verify the vLLM Service +#### 1.2.2 Start the vLLM Service ```bash -curl http://${your_ip}:8008/v1/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": ${your_hf_llm_model}, - "prompt": "What is Deep Learning?", - "max_tokens": 32, - "temperature": 0 - }' +export vLLM_LLM_ENDPOINT="http://${your_ip}:8008" +python ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/llm.py +python ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/llm.py ``` -#### 1.3.3 Verify the Ray Service +#### 1.2.3 Start the Ray Service ```bash -curl http://${your_ip}:8008/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": ${your_hf_llm_model}, - "messages": [ - {"role": "assistant", "content": "You are a helpful assistant."}, - {"role": "user", "content": "What is Deep Learning?"}, - ], - "max_tokens": 32, - "stream": True - }' +export RAY_Serve_ENDPOINT="http://${your_ip}:8008" +python ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/ray_serve/llm.py +python ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/ray_serve/llm.py ``` -### 1.4 Start LLM Service with Python Script +## 🚀2. Start Microservice with Docker (Option 2) + +You can use either a published docker image or build your own docker image with the respective microservice Dockerfile of your choice. You must create a user account with [HuggingFace] and obtain permission to use the restricted LLM models by adhering to the guidelines provided on the respective model's webpage. -#### 1.4.1 Start the TGI Service +### 2.1 Start LLM Service with published image + +#### 2.1.1 Start TGI Service ```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -python text-generation/tgi/llm.py +export HF_LLM_MODEL=${your_hf_llm_model} +export HF_TOKEN=${your_hf_api_token} + +docker run \ + -p 8008:80 \ + -e HF_TOKEN=${HF_TOKEN} \ + -v ./data:/data \ + --name tgi_service \ + --shm-size 1g \ + ghcr.io/huggingface/text-generation-inference:1.4 \ + --model-id ${HF_LLM_MODEL} ``` -#### 1.4.2 Start the vLLM Service +#### 2.1.2 Start vLLM Service ```bash -export vLLM_LLM_ENDPOINT="http://${your_ip}:8008" -python text-generation/vllm/llm.py +# Use the script to build the docker image as opea/vllm:cpu +bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/build_docker_vllm.sh cpu + +export HF_LLM_MODEL=${your_hf_llm_model} +export HF_TOKEN=${your_hf_api_token} + +docker run -it \ + --name vllm_service \ + -p 8008:80 \ + -e HF_TOKEN=${HF_TOKEN} \ + -e VLLM_CPU_KVCACHE_SPACE=40 \ + -v ./data:/data \ + opea/vllm:cpu \ + --model ${HF_LLM_MODEL} + --port 80 ``` -#### 1.4.3 Start the Ray Service +#### 2.1.3 Start Ray Service ```bash -export RAY_Serve_ENDPOINT="http://${your_ip}:8008" -python text-generation/ray_serve/llm.py +export HF_LLM_MODEL=${your_hf_llm_model} +export HF_CHAT_PROCESSOR=${your_hf_chatprocessor} +export HF_TOKEN=${your_hf_api_token} +export TRUST_REMOTE_CODE=True + +docker run -it \ + --runtime=habana \ + --name ray_serve_service \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + --cap-add=sys_nice \ + --ipc=host \ + -p 8008:80 \ + -e HF_TOKEN=$HF_TOKEN \ + -e TRUST_REMOTE_CODE=$TRUST_REMOTE_CODE \ + opea/llm-ray:latest \ + /bin/bash -c " \ + ray start --head && \ + python api_server_openai.py \ + --port_number 80 \ + --model_id_or_path ${HF_LLM_MODEL} \ + --chat_processor ${HF_CHAT_PROCESSOR}" ``` -## 🚀2. Start Microservice with Docker (Option 2) +### 2.2 Start LLM Service with image built from source If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker. -### 2.1 Setup Environment Variables +#### 2.2.1 Setup Environment Variables In order to start TGI and LLM services, you need to setup the following environment variables first. @@ -120,7 +161,7 @@ export LLM_MODEL_ID=${your_hf_llm_model} In order to start vLLM and LLM services, you need to setup the following environment variables first. ```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export vLLM_LLM_ENDPOINT="http://${your_ip}:8008" export LLM_MODEL_ID=${your_hf_llm_model} ``` @@ -128,7 +169,7 @@ export LLM_MODEL_ID=${your_hf_llm_model} In order to start Ray serve and LLM services, you need to setup the following environment variables first. ```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export RAY_Serve_ENDPOINT="http://${your_ip}:8008" export LLM_MODEL=${your_hf_llm_model} export CHAT_PROCESSOR="ChatModelLlama" @@ -139,8 +180,13 @@ export CHAT_PROCESSOR="ChatModelLlama" #### 2.2.1 TGI ```bash -cd ../../../ -docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +cd ${OPEA_GENAICOMPS_ROOT} + +docker build \ + -t opea/llm-tgi:latest \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -f comps/llms/text-generation/tgi/Dockerfile . ``` #### 2.2.2 vLLM @@ -148,15 +194,19 @@ docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build Build vllm docker. ```bash -cd text-generation/vllm/langchain/dependency -bash build_docker_vllm.sh +bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh ``` Build microservice docker. ```bash -cd ../../../ -docker build -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile . +cd ${OPEA_GENAICOMPS_ROOT} + +docker build \ + -t opea/llm-vllm:latest \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -f comps/llms/text-generation/vllm/langchain/Dockerfile . ``` #### 2.2.3 Ray Serve @@ -164,15 +214,19 @@ docker build -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --buil Build Ray Serve docker. ```bash -cd text-generation/vllm/ray/dependency -bash build_docker_vllmray.sh +bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/ray/dependency/build_docker_vllmray.sh ``` Build microservice docker. ```bash -cd ../../../ -docker build -t opea/llm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/ray/Dockerfile . +cd ${OPEA_GENAICOMPS_ROOT} + +docker build \ + -t opea/llm-ray:latest \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -f comps/llms/text-generation/vllm/ray/Dockerfile . ``` To start a docker container, you have two options: @@ -187,7 +241,15 @@ You can choose one as needed. #### 2.3.1 TGI ```bash -docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-tgi:latest +docker run -d \ + --name="llm-tgi-server" \ + -p 9000:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ + -e HF_TOKEN=$HF_TOKEN \ + opea/llm-tgi:latest ``` #### 2.3.2 vLLM @@ -195,13 +257,23 @@ docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$htt Start vllm endpoint. ```bash -bash launch_vllm_service.sh +bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh ``` Start vllm microservice. ```bash -docker run --name="llm-vllm-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=${no_proxy} -e vLLM_LLM_ENDPOINT=$vLLM_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e LLM_MODEL_ID=$LLM_MODEL_ID opea/llm-vllm:latest +docker run \ + --name="llm-vllm-server" \ + -p 9000:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=${no_proxy} \ + -e vLLM_LLM_ENDPOINT=$vLLM_LLM_ENDPOINT \ + -e HF_TOKEN=$HF_TOKEN \ + -e LLM_MODEL_ID=$LLM_MODEL_ID \ + opea/llm-vllm:latest ``` #### 2.3.3 Ray Serve @@ -209,13 +281,22 @@ docker run --name="llm-vllm-server" -p 9000:9000 --ipc=host -e http_proxy=$http_ Start Ray Serve endpoint. ```bash -bash launch_ray_service.sh +bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/ray/dependency/launch_vllmray.sh ``` Start Ray Serve microservice. ```bash -docker run -d --name="llm-ray-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e RAY_Serve_ENDPOINT=$RAY_Serve_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e LLM_MODEL=$LLM_MODEL opea/llm-ray:latest +docker run -d \ + --name="llm-ray-server" \ + -p 9000:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e RAY_Serve_ENDPOINT=$RAY_Serve_ENDPOINT \ + -e HF_TOKEN=$HF_TOKEN \ + -e LLM_MODEL=$LLM_MODEL \ + opea/llm-ray:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) @@ -223,21 +304,21 @@ docker run -d --name="llm-ray-server" -p 9000:9000 --ipc=host -e http_proxy=$htt #### 2.4.1 TGI ```bash -cd text-generation/tgi +cd ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/tgi docker compose -f docker_compose_llm.yaml up -d ``` #### 2.4.2 vLLM ```bash -cd text-generation/vllm/langchain +cd ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain docker compose -f docker_compose_llm.yaml up -d ``` #### 2.4.3 Ray Serve ```bash -cd text-genetation/vllm/ray +cd ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/ray docker compose -f docker_compose_llm.yaml up -d ``` @@ -251,7 +332,47 @@ curl http://${your_ip}:9000/v1/health_check\ -H 'Content-Type: application/json' ``` -### 3.2 Consume LLM Service +### 3.2 Verify the LLM Service + +#### 3.2.1 Verify the TGI Service + +```bash +curl http://${your_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +#### 3.2.2 Verify the vLLM Service + +```bash +curl http://${your_ip}:8008/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": ${your_hf_llm_model}, + "prompt": "What is Deep Learning?", + "max_tokens": 32, + "temperature": 0 + }' +``` + +#### 3.2.3 Verify the Ray Service + +```bash +curl http://${your_ip}:8008/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": ${your_hf_llm_model}, + "messages": [ + {"role": "assistant", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is Deep Learning?"}, + ], + "max_tokens": 32, + "stream": True + }' +``` + +### 3.3 Consume LLM Service You can set the following model parameters according to your actual needs, such as `max_new_tokens`, `streaming`. @@ -261,23 +382,42 @@ The `streaming` parameter determines the format of the data returned by the API. # non-streaming mode curl http://${your_ip}:9000/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ - -H 'Content-Type: application/json' + -H 'Content-Type: application/json' \ + -d '{ + "query":"What is Deep Learning?", + "max_new_tokens":17, + "top_k":10, + "top_p":0.95, + "typical_p":0.95, + "temperature":0.01, + "repetition_penalty":1.03, + "streaming":false + }' + # streaming mode curl http://${your_ip}:9000/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ - -H 'Content-Type: application/json' + -H 'Content-Type: application/json' \ + -d '{ + "query":"What is Deep Learning?", + "max_new_tokens":17, + "top_k":10, + "top_p":0.95, + "typical_p":0.95, + "temperature":0.01, + "repetition_penalty":1.03, + "streaming":true + }' + ``` -### 4. Validated Model + -| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | Ray | -| ------------------------- | --------- | -------- | ---------- | --- | -| Intel/neural-chat-7b-v3-3 | ✓ | ✓ | ✓ | ✓ | -| Llama-2-7b-chat-hf | ✓ | ✓ | ✓ | ✓ | -| Llama-2-70b-chat-hf | ✓ | - | ✓ | x | -| Meta-Llama-3-8B-Instruct | ✓ | ✓ | ✓ | ✓ | -| Meta-Llama-3-70B-Instruct | ✓ | - | ✓ | x | -| Phi-3 | x | Limit 4K | Limit 4K | ✓ | +[Intel/neural-chat-7b-v3-3]: https://huggingface.co/Intel/neural-chat-7b-v3-3 +[Llama-2-7b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf +[Llama-2-70b-chat-hf]: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf +[Meta-Llama-3-8B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct +[Meta-Llama-3-70B-Instruct]: https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct +[Phi-3]: https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3 +[HuggingFace]: https://huggingface.co/ diff --git a/comps/llms/text-generation/ollama/langchain/Dockerfile b/comps/llms/text-generation/ollama/langchain/Dockerfile index 13fe6a39e8..6ac66d056a 100644 --- a/comps/llms/text-generation/ollama/langchain/Dockerfile +++ b/comps/llms/text-generation/ollama/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ curl \ diff --git a/comps/llms/text-generation/tgi/Dockerfile b/comps/llms/text-generation/tgi/Dockerfile index 6797f86032..831d830147 100644 --- a/comps/llms/text-generation/tgi/Dockerfile +++ b/comps/llms/text-generation/tgi/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/llms/text-generation/vllm/langchain/Dockerfile b/comps/llms/text-generation/vllm/langchain/Dockerfile index 2c3d187c38..22d288225d 100644 --- a/comps/llms/text-generation/vllm/langchain/Dockerfile +++ b/comps/llms/text-generation/vllm/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" # Set this to "cpu" or "gpu" diff --git a/comps/llms/text-generation/vllm/langchain/README.md b/comps/llms/text-generation/vllm/langchain/README.md index 4bd51c8121..6db0065357 100644 --- a/comps/llms/text-generation/vllm/langchain/README.md +++ b/comps/llms/text-generation/vllm/langchain/README.md @@ -165,7 +165,7 @@ curl http://${your_ip}:8008/v1/completions \ ## 🚀3. Set up LLM microservice -Then we warp the VLLM service into LLM microcervice. +Then we warp the VLLM service into LLM microservice. ### Build docker @@ -179,11 +179,48 @@ bash build_docker_microservice.sh bash launch_microservice.sh ``` -### Query the microservice +### Consume the microservice + +#### Check microservice status ```bash +curl http://${your_ip}:9000/v1/health_check\ + -X GET \ + -H 'Content-Type: application/json' + +# Output +# {"Service Title":"opea_service@llm_vllm/MicroService","Service Description":"OPEA Microservice Infrastructure"} +``` + +#### Consume vLLM Service + +User can set the following model parameters according to needs: + +- max_new_tokens: Total output token +- streaming(true/false): return text response in streaming mode or non-streaming mode + +```bash +# 1. Non-streaming mode curl http://${your_ip}:9000/v1/chat/completions \ -X POST \ -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_p":0.95,"temperature":0.01,"streaming":false}' \ -H 'Content-Type: application/json' + +# 2. Streaming mode +curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' + +# 3. Custom chat template with streaming mode +curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true, "chat_template":"### You are a helpful, respectful and honest assistant to help the user with questions.\n### Context: {context}\n### Question: {question}\n### Answer:"}' \ + -H 'Content-Type: application/json' + +4. # Chat with SearchedDoc (Retrieval context) +curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?","retrieved_docs":[{"text":"Deep Learning is a ..."},{"text":"Deep Learning is b ..."}]}' \ + -H 'Content-Type: application/json' ``` diff --git a/comps/llms/text-generation/vllm/langchain/llm.py b/comps/llms/text-generation/vllm/langchain/llm.py index c730dd66b0..9c6f1a0470 100644 --- a/comps/llms/text-generation/vllm/langchain/llm.py +++ b/comps/llms/text-generation/vllm/langchain/llm.py @@ -2,23 +2,31 @@ # SPDX-License-Identifier: Apache-2.0 import os +from typing import Union from fastapi.responses import StreamingResponse from langchain_community.llms import VLLMOpenAI +from langchain_core.prompts import PromptTemplate +from template import ChatTemplate from comps import ( CustomLogger, GeneratedDoc, LLMParamsDoc, + SearchedDoc, ServiceType, opea_microservices, opea_telemetry, register_microservice, ) +from comps.cores.proto.api_protocol import ChatCompletionRequest logger = CustomLogger("llm_vllm") logflag = os.getenv("LOGFLAG", False) +llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") +model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") + @opea_telemetry def post_process_text(text: str): @@ -39,39 +47,120 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -def llm_generate(input: LLMParamsDoc): +def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): if logflag: logger.info(input) - llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") - model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") - llm = VLLMOpenAI( - openai_api_key="EMPTY", - openai_api_base=llm_endpoint + "/v1", - max_tokens=input.max_new_tokens, - model_name=model_name, - top_p=input.top_p, - temperature=input.temperature, - streaming=input.streaming, - ) - - if input.streaming: - - def stream_generator(): - chat_response = "" - for text in llm.stream(input.query): - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - yield f"data: {chunk_repr}\n\n" + + prompt_template = None + + if not isinstance(input, SearchedDoc) and input.chat_template: + prompt_template = PromptTemplate.from_template(input.chat_template) + input_variables = prompt_template.input_variables + + if isinstance(input, SearchedDoc): + if logflag: + logger.info("[ SearchedDoc ] input from retriever microservice") + + prompt = input.initial_query + + if input.retrieved_docs: + docs = [doc.text for doc in input.retrieved_docs] if logflag: - logger.info(f"[llm - chat_stream] stream response: {chat_response}") - yield "data: [DONE]\n\n" + logger.info(f"[ SearchedDoc ] combined retrieved docs: {docs}") + + prompt = ChatTemplate.generate_rag_prompt(input.initial_query, docs) + + # use default llm parameter for inference + new_input = LLMParamsDoc(query=prompt) - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = llm.invoke(input.query) if logflag: - logger.info(response) - return GeneratedDoc(text=response, prompt=input.query) + logger.info(f"[ SearchedDoc ] final input: {new_input}") + + llm = VLLMOpenAI( + openai_api_key="EMPTY", + openai_api_base=llm_endpoint + "/v1", + max_tokens=new_input.max_new_tokens, + model_name=model_name, + top_p=new_input.top_p, + temperature=new_input.temperature, + streaming=new_input.streaming, + ) + + if new_input.streaming: + + def stream_generator(): + chat_response = "" + for text in llm.stream(new_input.query): + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + if logflag: + logger.info(f"[ SearchedDoc ] chunk: {chunk_repr}") + yield f"data: {chunk_repr}\n\n" + if logflag: + logger.info(f"[ SearchedDoc ] stream response: {chat_response}") + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + + else: + response = llm.invoke(new_input.query) + if logflag: + logger.info(response) + + return GeneratedDoc(text=response, prompt=new_input.query) + + elif isinstance(input, LLMParamsDoc): + if logflag: + logger.info("[ LLMParamsDoc ] input from rerank microservice") + + prompt = input.query + + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.query) + else: + logger.info( + f"[ LLMParamsDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) + else: + if input.documents: + # use rag default template + prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents) + + llm = VLLMOpenAI( + openai_api_key="EMPTY", + openai_api_base=llm_endpoint + "/v1", + max_tokens=input.max_new_tokens, + model_name=model_name, + top_p=input.top_p, + temperature=input.temperature, + streaming=input.streaming, + ) + + if input.streaming: + + def stream_generator(): + chat_response = "" + for text in llm.stream(input.query): + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + if logflag: + logger.info(f"[ LLMParamsDoc ] chunk: {chunk_repr}") + yield f"data: {chunk_repr}\n\n" + if logflag: + logger.info(f"[ LLMParamsDoc ] stream response: {chat_response}") + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + + else: + response = llm.invoke(input.query) + if logflag: + logger.info(response) + + return GeneratedDoc(text=response, prompt=input.query) if __name__ == "__main__": diff --git a/comps/llms/text-generation/vllm/langchain/template.py b/comps/llms/text-generation/vllm/langchain/template.py new file mode 100644 index 0000000000..447efcc673 --- /dev/null +++ b/comps/llms/text-generation/vllm/langchain/template.py @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import re + + +class ChatTemplate: + @staticmethod + def generate_rag_prompt(question, documents): + context_str = "\n".join(documents) + if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: + # chinese context + template = """ +### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。 +### 搜索结果:{context} +### 问题:{question} +### 回答: +""" + else: + template = """ +### You are a helpful, respectful and honest assistant to help the user with questions. \ +Please refer to the search results obtained from the local knowledge base. \ +But be careful to not incorporate the information that you think is not relevant to the question. \ +If you don't know the answer to a question, please don't share false information. \n +### Search results: {context} \n +### Question: {question} \n +### Answer: +""" + return template.format(context=context_str, question=question) diff --git a/comps/llms/text-generation/vllm/llama_index/llm.py b/comps/llms/text-generation/vllm/llama_index/llm.py index db4652b96d..b66348bf36 100644 --- a/comps/llms/text-generation/vllm/llama_index/llm.py +++ b/comps/llms/text-generation/vllm/llama_index/llm.py @@ -57,13 +57,11 @@ def llm_generate(input: LLMParamsDoc): if input.streaming: def stream_generator(): - chat_response = "" for text in llm.stream_complete(input.query): - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - yield f"data: {chunk_repr}\n\n" + output = text.text + yield f"data: {output}\n\n" if logflag: - logger.info(f"[llm - chat_stream] stream response: {chat_response}") + logger.info(f"[llm - chat_stream] stream response: {output}") yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream") diff --git a/comps/llms/text-generation/vllm/ray/Dockerfile b/comps/llms/text-generation/vllm/ray/Dockerfile index 40d7473798..148a4b01d0 100644 --- a/comps/llms/text-generation/vllm/ray/Dockerfile +++ b/comps/llms/text-generation/vllm/ray/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/lvms/llava/README.md b/comps/lvms/llava/README.md index 59545d6869..adef9ef2e8 100644 --- a/comps/lvms/llava/README.md +++ b/comps/lvms/llava/README.md @@ -57,21 +57,21 @@ python check_lvm.py ```bash cd ../../../ -docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile . +docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile . ``` - Gaudi2 HPU ```bash cd ../../../ -docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile.intel_hpu . +docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile.intel_hpu . ``` #### 2.1.2 LVM Service Image ```bash cd ../../../ -docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile . +docker build -t opea/lvm-llava-svc:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile . ``` ### 2.2 Start LLaVA and LVM Service @@ -81,13 +81,13 @@ docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg - Xeon ```bash -docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$https_proxy opea/llava:latest +docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$https_proxy opea/lvm-llava:latest ``` - Gaudi2 HPU ```bash -docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llava:latest +docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/lvm-llava:latest ``` #### 2.2.2 Start LVM service @@ -95,7 +95,7 @@ docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_M ```bash ip_address=$(hostname -I | awk '{print $1}') -docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm:latest +docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm-llava-svc:latest ``` #### 2.2.3 Test diff --git a/comps/lvms/llava/lvm.py b/comps/lvms/llava/lvm.py index 78e0d345cb..26dac1dda9 100644 --- a/comps/lvms/llava/lvm.py +++ b/comps/lvms/llava/lvm.py @@ -89,6 +89,7 @@ async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc return_metadata["video_id"] = retrieved_metadata["video_id"] return_metadata["source_video"] = retrieved_metadata["source_video"] return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"] + return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"] return MetadataTextDoc(text=result, metadata=return_metadata) else: return TextDoc(text=result) diff --git a/comps/lvms/tgi-llava/lvm_tgi.py b/comps/lvms/tgi-llava/lvm_tgi.py index 875df14279..b4367c181c 100644 --- a/comps/lvms/tgi-llava/lvm_tgi.py +++ b/comps/lvms/tgi-llava/lvm_tgi.py @@ -134,6 +134,7 @@ async def stream_generator(): return_metadata["video_id"] = retrieved_metadata["video_id"] return_metadata["source_video"] = retrieved_metadata["source_video"] return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"] + return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"] return MetadataTextDoc(text=generated_str, metadata=return_metadata) else: return TextDoc(text=generated_str) diff --git a/comps/ragas/tgi/langchain/Dockerfile b/comps/ragas/tgi/langchain/Dockerfile index 3328e8ed11..adec50e58d 100644 --- a/comps/ragas/tgi/langchain/Dockerfile +++ b/comps/ragas/tgi/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/reranks/mosec/langchain/Dockerfile b/comps/reranks/mosec/langchain/Dockerfile index 23faf56c68..509c0238b0 100644 --- a/comps/reranks/mosec/langchain/Dockerfile +++ b/comps/reranks/mosec/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ diff --git a/comps/reranks/mosec/langchain/dependency/Dockerfile b/comps/reranks/mosec/langchain/dependency/Dockerfile index 25dbeafece..dd54c1ff63 100644 --- a/comps/reranks/mosec/langchain/dependency/Dockerfile +++ b/comps/reranks/mosec/langchain/dependency/Dockerfile @@ -15,7 +15,7 @@ RUN apt update && apt install -y python3 python3-pip RUN pip3 install torch==2.2.2 torchvision --trusted-host download.pytorch.org --index-url https://download.pytorch.org/whl/cpu RUN pip3 install intel-extension-for-pytorch==2.2.0 -RUN pip3 install transformers sentence-transformers +RUN pip3 install transformers sentence-transformers==3.0.1 RUN pip3 install llmspec mosec RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-base --local-dir /home/user/bge-reranker-large diff --git a/comps/reranks/video-rag-qna/Dockerfile b/comps/reranks/videoqna/Dockerfile similarity index 86% rename from comps/reranks/video-rag-qna/Dockerfile rename to comps/reranks/videoqna/Dockerfile index 617f47b6af..17a4a1c921 100644 --- a/comps/reranks/video-rag-qna/Dockerfile +++ b/comps/reranks/videoqna/Dockerfile @@ -15,10 +15,10 @@ USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/reranks/video-rag-qna/requirements.txt + pip install --no-cache-dir -r /home/user/comps/reranks/videoqna/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/reranks/video-rag-qna +WORKDIR /home/user/comps/reranks/videoqna ENTRYPOINT ["python", "local_reranking.py"] \ No newline at end of file diff --git a/comps/reranks/video-rag-qna/README.md b/comps/reranks/videoqna/README.md similarity index 57% rename from comps/reranks/video-rag-qna/README.md rename to comps/reranks/videoqna/README.md index bdb4fc47db..3ec4810be4 100644 --- a/comps/reranks/video-rag-qna/README.md +++ b/comps/reranks/videoqna/README.md @@ -1,8 +1,8 @@ # Rerank Microservice -This is a Docker-based microservice that do result rerank for VideoRAGQnA use case. Local rerank is used rather than rerank model. +This is a Docker-based microservice that do result rerank for VideoQnA use case. Local rerank is used rather than rerank model. -For the `VideoRAGQnA` usecase, during the data preparation phase, frames are extracted from videos and stored in a vector database. To identify the most relevant video, we count the occurrences of each video source among the retrieved data with rerank function `get_top_doc`. This sorts the video as a descending list of names, ranked by their degree of match with the query. Then we could send the `top_n` videos to the downstream LVM. +For the `VideoQnA` usecase, during the data preparation phase, frames are extracted from videos and stored in a vector database. To identify the most relevant video, we count the occurrences of each video source among the retrieved data with rerank function `get_top_doc`. This sorts the video as a descending list of names, ranked by their degree of match with the query. Then we could send the `top_n` videos to the downstream LVM. ## 🚀1. Start Microservice with Docker @@ -10,22 +10,22 @@ For the `VideoRAGQnA` usecase, during the data preparation phase, frames are ext ```bash cd GenAIComps -docker build --no-cache -t opea/reranking-videoragqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/video-rag-qna/Dockerfile . +docker build --no-cache -t opea/reranking-videoqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/videoqna/Dockerfile . ``` ### 1.2 Start Rerank Service ```bash -docker compose -f comps/reranks/video-rag-qna/docker/docker_compose_reranking.yaml up -d +docker compose -f comps/reranks/videoqna/docker_compose_reranking.yaml up -d # wait until ready -until docker logs reranking-videoragqna-server 2>&1 | grep -q "Uvicorn running on"; do +until docker logs reranking-videoqna-server 2>&1 | grep -q "Uvicorn running on"; do sleep 2 done ``` Available configuration by environment variable: -- CHUNK_DURATION: target chunk duration, should be aligned with VideoRAGQnA dataprep. Default 10s. +- CHUNK_DURATION: target chunk duration, should be aligned with VideoQnA dataprep. Default 10s. ## ✅ 2. Test @@ -57,6 +57,6 @@ The result should be: ```bash # remove the container -cid=$(docker ps -aq --filter "name=reranking-videoragqna-server") +cid=$(docker ps -aq --filter "name=reranking-videoqna-server") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi ``` diff --git a/comps/reranks/video-rag-qna/docker_compose_reranking.yaml b/comps/reranks/videoqna/docker_compose_reranking.yaml similarity index 81% rename from comps/reranks/video-rag-qna/docker_compose_reranking.yaml rename to comps/reranks/videoqna/docker_compose_reranking.yaml index d819f331a1..4b39fd115a 100644 --- a/comps/reranks/video-rag-qna/docker_compose_reranking.yaml +++ b/comps/reranks/videoqna/docker_compose_reranking.yaml @@ -3,8 +3,8 @@ services: reranking: - image: opea/reranking-videoragqna:latest - container_name: reranking-videoragqna-server + image: opea/reranking-videoqna:latest + container_name: reranking-videoqna-server ports: - "8000:8000" ipc: host diff --git a/comps/reranks/video-rag-qna/local_reranking.py b/comps/reranks/videoqna/local_reranking.py similarity index 92% rename from comps/reranks/video-rag-qna/local_reranking.py rename to comps/reranks/videoqna/local_reranking.py index 9ba8aeb6f5..ac234499b3 100644 --- a/comps/reranks/video-rag-qna/local_reranking.py +++ b/comps/reranks/videoqna/local_reranking.py @@ -75,7 +75,7 @@ def format_video_name(video_name): @register_microservice( - name="opea_service@reranking_visual_rag", + name="opea_service@reranking_videoqna", service_type=ServiceType.RERANK, endpoint="/v1/reranking", host="0.0.0.0", @@ -83,7 +83,7 @@ def format_video_name(video_name): input_datatype=SearchedMultimodalDoc, output_datatype=LVMVideoDoc, ) -@register_statistics(names=["opea_service@reranking_visual_rag"]) +@register_statistics(names=["opea_service@reranking_videoqna"]) def reranking(input: SearchedMultimodalDoc) -> LVMVideoDoc: start = time.time() try: @@ -110,10 +110,10 @@ def reranking(input: SearchedMultimodalDoc) -> LVMVideoDoc: # Handle any other exceptions with a generic server error response raise HTTPException(status_code=500, detail="An unexpected error occurred.") - statistics_dict["opea_service@reranking_visual_rag"].append_latency(time.time() - start, None) + statistics_dict["opea_service@reranking_videoqna"].append_latency(time.time() - start, None) return result if __name__ == "__main__": - opea_microservices["opea_service@reranking_visual_rag"].start() + opea_microservices["opea_service@reranking_videoqna"].start() diff --git a/comps/reranks/video-rag-qna/requirements.txt b/comps/reranks/videoqna/requirements.txt similarity index 100% rename from comps/reranks/video-rag-qna/requirements.txt rename to comps/reranks/videoqna/requirements.txt diff --git a/comps/retrievers/multimodal/redis/langchain/Dockerfile b/comps/retrievers/multimodal/redis/langchain/Dockerfile index 9423561e16..fa25b2efec 100644 --- a/comps/retrievers/multimodal/redis/langchain/Dockerfile +++ b/comps/retrievers/multimodal/redis/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" diff --git a/comps/retrievers/multimodal/redis/langchain/README.md b/comps/retrievers/multimodal/redis/langchain/README.md index bdd1fb2ad5..c67dc9e775 100644 --- a/comps/retrievers/multimodal/redis/langchain/README.md +++ b/comps/retrievers/multimodal/redis/langchain/README.md @@ -51,7 +51,7 @@ export INDEX_NAME=${your_index_name} ```bash cd ../../../../../ -docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal/redis/langchain/Dockerfile . +docker build -t opea/retriever-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal/redis/langchain/Dockerfile . ``` To start a docker container, you have two options: @@ -64,7 +64,7 @@ You can choose one as needed. ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="multimodal-retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:latest +docker run -d --name="retriever-multimodal-redis" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-multimodal-redis:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) diff --git a/comps/retrievers/multimodal/redis/langchain/docker_compose_retriever.yaml b/comps/retrievers/multimodal/redis/langchain/docker_compose_retriever.yaml index efba29a4e1..6c4f76ad0e 100644 --- a/comps/retrievers/multimodal/redis/langchain/docker_compose_retriever.yaml +++ b/comps/retrievers/multimodal/redis/langchain/docker_compose_retriever.yaml @@ -4,9 +4,9 @@ version: "1.0" services: - retriever: - image: opea/multimodal-retriever-redis:latest - container_name: multimodal-retriever-redis-server + retriever-multimodal-redis: + image: opea/retriever-multimodal-redis:latest + container_name: retriever-multimodal-redis ports: - "7000:7000" ipc: host diff --git a/comps/retrievers/neo4j/langchain/Dockerfile b/comps/retrievers/neo4j/langchain/Dockerfile new file mode 100644 index 0000000000..5d8e8d254e --- /dev/null +++ b/comps/retrievers/neo4j/langchain/Dockerfile @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +ENV no_proxy=localhost,127.0.0.1 + +ENV HUGGINGFACEHUB_API_TOKEN=dummy + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/retrievers/neo4j/langchain/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/retrievers/neo4j/langchain + +ENTRYPOINT ["python", "retriever_neo4j.py"] diff --git a/comps/retrievers/neo4j/langchain/README.md b/comps/retrievers/neo4j/langchain/README.md new file mode 100644 index 0000000000..731abc20f5 --- /dev/null +++ b/comps/retrievers/neo4j/langchain/README.md @@ -0,0 +1,112 @@ +# Retriever Microservice with Neo4J + +## 🚀Start Microservice with Python + +### Install Requirements + +```bash +pip install -r requirements.txt +``` + +### Start Neo4J Server + +To launch Neo4j locally, first ensure you have docker installed. Then, you can launch the database with the following docker command. + +```bash +docker run \ + -p 7474:7474 -p 7687:7687 \ + -v $PWD/data:/data -v $PWD/plugins:/plugins \ + --name neo4j-apoc \ + -d \ + -e NEO4J_AUTH=neo4j/password \ + -e NEO4J_PLUGINS=\[\"apoc\"\] \ + neo4j:latest +``` + +### Setup Environment Variables + +```bash +export no_proxy=${your_no_proxy} +export http_proxy=${your_http_proxy} +export https_proxy=${your_http_proxy} +export NEO4J_URI=${your_neo4j_url} +export NEO4J_USERNAME=${your_neo4j_username} +export NEO4J_PASSWORD=${your_neo4j_password} +``` + +### Start Retriever Service + +```bash +python retriever_neo4j.py +``` + +## 🚀Start Microservice with Docker + +### Build Docker Image + +```bash +cd ../../ +docker build -t opea/retriever-neo4j:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/neo4j/langchain/Dockerfile . +``` + +### Run Docker with CLI + +```bash +docker run -d --name="retriever-neo4j-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URI=${your_neo4j_host_ip} opea/retriever-neo4j:latest +``` + +## 🚀3. Consume Retriever Service + +### 3.1 Check Service Status + +```bash +curl http://${your_ip}:7000/v1/health_check \ + -X GET \ + -H 'Content-Type: application/json' +``` + +### 3.2 Consume Embedding Service + +To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. + +```bash +export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://${your_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` + +You can set the parameters for the retriever. + +```bash +export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity\", \"k\":4}" \ + -H 'Content-Type: application/json' +``` + +```bash +export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_distance_threshold\", \"k\":4, \"distance_threshold\":1.0}" \ + -H 'Content-Type: application/json' +``` + +```bash +export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_score_threshold\", \"k\":4, \"score_threshold\":0.2}" \ + -H 'Content-Type: application/json' +``` + +```bash +export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4, \"fetch_k\":20, \"lambda_mult\":0.5}" \ + -H 'Content-Type: application/json' +``` diff --git a/comps/retrievers/neo4j/langchain/__init__.py b/comps/retrievers/neo4j/langchain/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/retrievers/neo4j/langchain/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/neo4j/langchain/config.py b/comps/retrievers/neo4j/langchain/config.py new file mode 100644 index 0000000000..39adf6d89d --- /dev/null +++ b/comps/retrievers/neo4j/langchain/config.py @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +# Neo4J configuration +NEO4J_URL = os.getenv("NEO4J_URI", "bolt://localhost:7687") +NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") +NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + +# Embedding endpoints +EMBED_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") diff --git a/comps/retrievers/neo4j/langchain/requirements.txt b/comps/retrievers/neo4j/langchain/requirements.txt new file mode 100644 index 0000000000..24f579c6a6 --- /dev/null +++ b/comps/retrievers/neo4j/langchain/requirements.txt @@ -0,0 +1,21 @@ +docarray[full] +fastapi +frontend==0.0.3 +huggingface_hub +langchain==0.2 +langchain-community +neo4j +numpy +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pandas +Pillow +prometheus-fastapi-instrumentator +pydantic==2.7.3 +pymupdf==1.24.5 +python-docx==0.8.11 +sentence_transformers +shortuuid +tiktoken +uvicorn diff --git a/comps/retrievers/neo4j/langchain/retriever_neo4j.py b/comps/retrievers/neo4j/langchain/retriever_neo4j.py new file mode 100644 index 0000000000..47ce4a5442 --- /dev/null +++ b/comps/retrievers/neo4j/langchain/retriever_neo4j.py @@ -0,0 +1,117 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import os +import time +from typing import Union + +from config import EMBED_ENDPOINT, EMBED_MODEL, NEO4J_PASSWORD, NEO4J_URL, NEO4J_USERNAME +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.vectorstores import Neo4jVector + +from comps import ( + CustomLogger, + EmbedDoc, + SearchedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RetrievalRequest, + RetrievalResponse, + RetrievalResponseData, +) + +logger = CustomLogger("retriever_neo4j") +logflag = os.getenv("LOGFLAG", False) + + +@register_microservice( + name="opea_service@retriever_neo4j", + service_type=ServiceType.RETRIEVER, + endpoint="/v1/retrieval", + host="0.0.0.0", + port=7000, +) +@register_statistics(names=["opea_service@retriever_neo4j"]) +def retrieve( + input: Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest] +) -> Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: + if logflag: + logger.info(input) + start = time.time() + + if isinstance(input, EmbedDoc): + query = input.text + else: + # for RetrievalRequest, ChatCompletionRequest + query = input.input + + if input.search_type == "similarity": + search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, query=input.text, k=input.k) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + search_res = vector_db.similarity_search_by_vector( + embedding=input.embedding, query=input.text, k=input.k, distance_threshold=input.distance_threshold + ) + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = vector_db.similarity_search_with_relevance_scores( + query=input.text, k=input.k, score_threshold=input.score_threshold + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "mmr": + search_res = vector_db.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + else: + raise ValueError(f"{input.search_type} not valid") + + # return different response format + retrieved_docs = [] + if isinstance(input, EmbedDoc): + for r in search_res: + retrieved_docs.append(TextDoc(text=r.page_content)) + result = SearchedDoc(retrieved_docs=retrieved_docs, initial_query=input.text) + else: + for r in search_res: + retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) + if isinstance(input, RetrievalRequest): + result = RetrievalResponse(retrieved_docs=retrieved_docs) + elif isinstance(input, ChatCompletionRequest): + input.retrieved_docs = retrieved_docs + input.documents = [doc.text for doc in retrieved_docs] + result = input + + statistics_dict["opea_service@retriever_neo4j"].append_latency(time.time() - start, None) + if logflag: + logger.info(result) + return result + + +if __name__ == "__main__": + + if EMBED_ENDPOINT: + # create embeddings using TEI endpoint service + hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") + embeddings = HuggingFaceHubEmbeddings(model=EMBED_ENDPOINT, huggingfacehub_api_token=hf_token) + else: + # create embeddings using local embedding model + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + vector_db = Neo4jVector.from_existing_graph( + embedding=embeddings, + url=NEO4J_URL, + username=NEO4J_USERNAME, + password=NEO4J_PASSWORD, + node_label="__Entity__", + text_node_properties=["id", "description"], + embedding_node_property="embedding", + ) + opea_microservices["opea_service@retriever_neo4j"].start() diff --git a/comps/retrievers/pathway/langchain/Dockerfile b/comps/retrievers/pathway/langchain/Dockerfile index d14d078611..c8d3b72c4c 100644 --- a/comps/retrievers/pathway/langchain/Dockerfile +++ b/comps/retrievers/pathway/langchain/Dockerfile @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" diff --git a/comps/retrievers/pgvector/langchain/Dockerfile b/comps/retrievers/pgvector/langchain/Dockerfile index c5d460346f..484db5af7b 100644 --- a/comps/retrievers/pgvector/langchain/Dockerfile +++ b/comps/retrievers/pgvector/langchain/Dockerfile @@ -1,7 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim + +ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ @@ -16,6 +18,7 @@ COPY comps /home/user/comps USER user RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ pip install --no-cache-dir -r /home/user/comps/retrievers/pgvector/langchain/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/retrievers/pinecone/langchain/Dockerfile b/comps/retrievers/pinecone/langchain/Dockerfile index 5dc83e72c9..2d488d3edc 100644 --- a/comps/retrievers/pinecone/langchain/Dockerfile +++ b/comps/retrievers/pinecone/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" diff --git a/comps/retrievers/redis/langchain/Dockerfile b/comps/retrievers/redis/langchain/Dockerfile index 852323d032..6b240a989d 100644 --- a/comps/retrievers/redis/langchain/Dockerfile +++ b/comps/retrievers/redis/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" diff --git a/comps/retrievers/vdms/langchain/Dockerfile b/comps/retrievers/vdms/langchain/Dockerfile index a44ea04759..baf1ea7315 100644 --- a/comps/retrievers/vdms/langchain/Dockerfile +++ b/comps/retrievers/vdms/langchain/Dockerfile @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" diff --git a/comps/vectorstores/pathway/Dockerfile b/comps/vectorstores/pathway/Dockerfile index 31cd06a824..3b775e31eb 100644 --- a/comps/vectorstores/pathway/Dockerfile +++ b/comps/vectorstores/pathway/Dockerfile @@ -5,6 +5,7 @@ FROM pathwaycom/pathway:0.13.2-slim ENV DOCKER_BUILDKIT=1 ENV PYTHONUNBUFFERED=1 +ARG ARCH="cpu" RUN apt-get update && apt-get install -y \ poppler-utils \ @@ -16,7 +17,8 @@ WORKDIR /app COPY requirements.txt /app/ -RUN pip install --no-cache-dir -r requirements.txt +RUN if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r requirements.txt COPY vectorstore_pathway.py /app/ diff --git a/comps/vectorstores/pathway/requirements.txt b/comps/vectorstores/pathway/requirements.txt index 3d88eddf6c..df17635f4a 100644 --- a/comps/vectorstores/pathway/requirements.txt +++ b/comps/vectorstores/pathway/requirements.txt @@ -1,4 +1,4 @@ langchain_openai pathway[xpack-llm] >= 0.14.1 -sentence_transformers +sentence-transformers unstructured[all-docs] >= 0.10.28,<0.15 diff --git a/comps/web_retrievers/chroma/langchain/Dockerfile b/comps/web_retrievers/chroma/langchain/Dockerfile index 149e4d3622..614dd6c9b0 100644 --- a/comps/web_retrievers/chroma/langchain/Dockerfile +++ b/comps/web_retrievers/chroma/langchain/Dockerfile @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -FROM langchain/langchain:latest +FROM python:3.11-slim ARG ARCH="cpu" # Set this to "cpu" or "gpu" diff --git a/tests/cores/mega/test_multimodalrag_with_videos_gateway.py b/tests/cores/mega/test_multimodalqna_gateway.py similarity index 93% rename from tests/cores/mega/test_multimodalrag_with_videos_gateway.py rename to tests/cores/mega/test_multimodalqna_gateway.py index 4fc72a6eb1..c05bf57bdd 100644 --- a/tests/cores/mega/test_multimodalrag_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalqna_gateway.py @@ -14,7 +14,7 @@ LVMDoc, LVMSearchedMultimodalDoc, MultimodalDoc, - MultimodalRAGWithVideosGateway, + MultimodalQnAGateway, SearchedMultimodalDoc, ServiceOrchestrator, TextDoc, @@ -93,9 +93,7 @@ def setUpClass(cls): cls.follow_up_query_service_builder = ServiceOrchestrator() cls.follow_up_query_service_builder.add(cls.lvm) - cls.gateway = MultimodalRAGWithVideosGateway( - cls.service_builder, cls.follow_up_query_service_builder, port=9898 - ) + cls.gateway = MultimodalQnAGateway(cls.service_builder, cls.follow_up_query_service_builder, port=9898) @classmethod def tearDownClass(cls): @@ -115,13 +113,13 @@ async def test_follow_up_query_service_builder_schedule(self): # print(result_dict) self.assertEqual(result_dict[self.lvm.name]["text"], "\nUSER: chao, \nASSISTANT:") - def test_multimodal_rag_with_videos_gateway(self): + def test_MultimodalQnAGateway_gateway(self): json_data = {"messages": "hello, "} - response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) + response = requests.post("http://0.0.0.0:9898/v1/multimodalqna", json=json_data) response = response.json() self.assertEqual(response["choices"][-1]["message"]["content"], "hello, opea project!") - def test_follow_up_mm_rag_with_videos_gateway(self): + def test_follow_up_MultimodalQnAGateway_gateway(self): json_data = { "messages": [ { @@ -139,7 +137,7 @@ def test_follow_up_mm_rag_with_videos_gateway(self): ], "max_tokens": 300, } - response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) + response = requests.post("http://0.0.0.0:9898/v1/multimodalqna", json=json_data) response = response.json() self.assertEqual( response["choices"][-1]["message"]["content"], diff --git a/tests/cores/mega/test_service_orchestrator_with_videoragqnagateway.py b/tests/cores/mega/test_service_orchestrator_with_videoqnagateway.py similarity index 90% rename from tests/cores/mega/test_service_orchestrator_with_videoragqnagateway.py rename to tests/cores/mega/test_service_orchestrator_with_videoqnagateway.py index a9bdcdb332..4905120fbb 100644 --- a/tests/cores/mega/test_service_orchestrator_with_videoragqnagateway.py +++ b/tests/cores/mega/test_service_orchestrator_with_videoqnagateway.py @@ -6,14 +6,7 @@ from fastapi.responses import StreamingResponse -from comps import ( - ServiceOrchestrator, - ServiceType, - TextDoc, - VideoRAGQnAGateway, - opea_microservices, - register_microservice, -) +from comps import ServiceOrchestrator, ServiceType, TextDoc, VideoQnAGateway, opea_microservices, register_microservice from comps.cores.proto.docarray import LLMParams @@ -51,7 +44,7 @@ def setUp(self): self.service_builder.add(opea_microservices["s1"]).add(opea_microservices["s2"]) self.service_builder.flow_to(self.s1, self.s2) - self.gateway = VideoRAGQnAGateway(self.service_builder, port=9898) + self.gateway = VideoQnAGateway(self.service_builder, port=9898) def tearDown(self): self.s1.stop() diff --git a/tests/dataprep/test_dataprep_multimodal_redis_langchain.sh b/tests/dataprep/test_dataprep_multimodal_redis_langchain.sh index 43b9e89129..a7461a8abb 100644 --- a/tests/dataprep/test_dataprep_multimodal_redis_langchain.sh +++ b/tests/dataprep/test_dataprep_multimodal_redis_langchain.sh @@ -18,39 +18,39 @@ video_fn="${video_name}.mp4" function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep-multimodal-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/dataprep-redis built fail" + echo "opea/dataprep-multimodal-redis built fail" exit 1 else - echo "opea/dataprep-redis built successful" + echo "opea/dataprep-multimodal-redis built successful" fi } function build_lvm_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/llava:comps -f comps/lvms/llava/dependency/Dockerfile . + docker build --no-cache -t opea/lvm-llava:comps -f comps/lvms/llava/dependency/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/llava built fail" + echo "opea/lvm-llava built fail" exit 1 else - echo "opea/llava built successful" + echo "opea/lvm-llava built successful" fi - docker build --no-cache -t opea/lvm:comps -f comps/lvms/llava/Dockerfile . + docker build --no-cache -t opea/lvm-llava-svc:comps -f comps/lvms/llava/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/lvm built fail" + echo "opea/lvm-llava-svc built fail" exit 1 else - echo "opea/lvm built successful" + echo "opea/lvm-llava-svc built successful" fi } function start_lvm_service() { unset http_proxy - docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps - docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${LVM_PORT}:9399 --ipc=host opea/lvm:comps + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/lvm-llava:comps + docker run -d --name="test-comps-lvm-llava-svc" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${LVM_PORT}:9399 --ipc=host opea/lvm-llava-svc:comps sleep 5m } @@ -68,13 +68,13 @@ function start_service() { # start redis echo "Starting Redis server" REDIS_PORT=6380 - docker run -d --name="test-comps-dataprep-redis-multimodal-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 + docker run -d --name="test-redis" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 # start dataprep microservice echo "Starting dataprep microservice" dataprep_service_port=5013 REDIS_URL="redis://${ip_address}:${REDIS_PORT}" - docker run -d --name="test-comps-dataprep-redis-multimodal-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -p ${dataprep_service_port}:6007 --runtime=runc --ipc=host opea/dataprep-redis:comps + docker run -d --name="test-comps-dataprep-multimodal-redis" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -p ${dataprep_service_port}:6007 --runtime=runc --ipc=host opea/dataprep-multimodal-redis:comps sleep 1m } @@ -133,14 +133,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -157,14 +157,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -181,14 +181,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -206,14 +206,14 @@ function validate_microservice() { if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *${video_name}* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_file.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -230,7 +230,7 @@ function validate_microservice() { # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -238,7 +238,7 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_del.log + docker logs test-comps-dataprep-multimodal-redis >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -246,10 +246,10 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-dataprep-redis-multimodal-langchain*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - cid=$(docker ps -aq --filter "name=test-comps-lvm*") + cid=$(docker ps -aq --filter "name=test-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + # cid=$(docker ps -aq --filter "name=test-comps-lvm*") + # if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_multimodal.sh b/tests/embeddings/test_embeddings_multimodal.sh index 30907c61d6..bd2ca93b70 100644 --- a/tests/embeddings/test_embeddings_multimodal.sh +++ b/tests/embeddings/test_embeddings_multimodal.sh @@ -16,13 +16,13 @@ unset http_proxy function build_mmei_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . + docker build --no-cache -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/bridgetower-embedder built fail" + echo "opea/embedding-multimodal-bridgetower built fail" exit 1 else - echo "opea/bridgetower-embedder built successful" + echo "opea/embedding-multimodal-bridgetower built successful" fi } @@ -63,8 +63,8 @@ function validate_microservice_text_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs bridgetower-embedding-server - docker logs embedding-multimodal-server + docker logs embedding-multimodal-bridgetower + docker logs embedding-multimodal exit 1 fi } @@ -79,8 +79,8 @@ function validate_microservice_image_text_pair_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs bridgetower-embedding-server - docker logs embedding-multimodal-server + docker logs embedding-multimodal-bridgetower + docker logs embedding-multimodal exit 1 fi } @@ -91,7 +91,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=bridgetower-embedding-server" --filter "name=embedding-multimodal-server") + cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower" --filter "name=embedding-multimodal") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh b/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh index b9e48c3ff1..fe3cb8e2e6 100644 --- a/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh +++ b/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh @@ -16,13 +16,13 @@ unset http_proxy function build_mmei_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . + docker build --no-cache -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . if [ $? -ne 0 ]; then - echo "opea/bridgetower-embedder built fail" + echo "opea/embedding-multimodal-bridgetower built fail" exit 1 else - echo "opea/bridgetower-embedder built successful" + echo "opea/embedding-multimodal-bridgetower built successful" fi } @@ -64,8 +64,8 @@ function validate_microservice_text_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs bridgetower-embedding-server - docker logs embedding-multimodal-server + docker logs embedding-multimodal-bridgetower + docker logs embedding-multimodal exit 1 fi } @@ -80,8 +80,8 @@ function validate_microservice_image_text_pair_embedding() { echo "Result correct." else echo "Result wrong. Received was $result" - docker logs bridgetower-embedding-server - docker logs embedding-multimodal-server + docker logs embedding-multimodal-bridgetower + docker logs embedding-multimodal exit 1 fi } @@ -92,7 +92,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=bridgetower-embedding-server" --filter "name=embedding-multimodal-server") + cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower" --filter "name=embedding-multimodal") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_tei_langchain.sh b/tests/embeddings/test_embeddings_tei_langchain.sh index cd957a2b8d..a7b9e19274 100644 --- a/tests/embeddings/test_embeddings_tei_langchain.sh +++ b/tests/embeddings/test_embeddings_tei_langchain.sh @@ -47,7 +47,7 @@ function validate_microservice() { } function validate_microservice_with_openai() { - tei_service_port=5001 + tei_service_port=5002 python3 ${WORKPATH}/tests/utils/validate_svc_with_openai.py $ip_address $tei_service_port "embedding" if [ $? -ne 0 ]; then docker logs test-comps-embedding-tei-endpoint diff --git a/tests/llms/test_llms_text-generation_tgi.sh b/tests/llms/test_llms_text-generation_tgi.sh index 293bbb292b..db01b60e2f 100644 --- a/tests/llms/test_llms_text-generation_tgi.sh +++ b/tests/llms/test_llms_text-generation_tgi.sh @@ -21,10 +21,10 @@ function build_docker_images() { function start_service() { tgi_endpoint_port=5004 - export your_hf_llm_model=$1 + export hf_llm_model=$1 # Remember to set HF_TOKEN before invoking this test! export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048 + docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ~/.cache/huggingface/hub:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048 export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" llm_port=5005 @@ -34,9 +34,9 @@ function start_service() { # check whether tgi is fully ready n=0 until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/llm-tgi.log + docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/${hf_llm_model}-llm-tgi.log n=$((n+1)) - if grep -q Connected ${LOG_PATH}/llm-tgi.log; then + if grep -q Connected ${LOG_PATH}/${hf_llm_model}-llm-tgi.log; then break fi sleep 5s diff --git a/tests/lvms/test_lvms_llava.sh b/tests/lvms/test_lvms_llava.sh index f0acb3ae93..18db0c40e5 100644 --- a/tests/lvms/test_lvms_llava.sh +++ b/tests/lvms/test_lvms_llava.sh @@ -11,27 +11,27 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/llava:comps -f comps/lvms/llava/dependency/Dockerfile . + docker build --no-cache -t opea/lvm-llava:comps -f comps/lvms/llava/dependency/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/llava built fail" + echo "opea/lvm-llava built fail" exit 1 else - echo "opea/llava built successful" + echo "opea/lvm-llava built successful" fi - docker build --no-cache -t opea/lvm:comps -f comps/lvms/llava/Dockerfile . + docker build --no-cache -t opea/lvm-llava-svc:comps -f comps/lvms/llava/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/lvm built fail" + echo "opea/lvm-llava-svc built fail" exit 1 else - echo "opea/lvm built successful" + echo "opea/lvm-llava-svc built successful" fi } function start_service() { unset http_proxy lvm_port=5051 - docker run -d --name="test-comps-lvm-llava-dependency" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:8399 --ipc=host opea/llava:comps - docker run -d --name="test-comps-lvm-llava-server" -e LVM_ENDPOINT=http://$ip_address:5028 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $lvm_port:9399 --ipc=host opea/lvm:comps + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:8399 --ipc=host opea/lvm-llava:comps + docker run -d --name="test-comps-lvm-llava-svc" -e LVM_ENDPOINT=http://$ip_address:5028 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $lvm_port:9399 --ipc=host opea/lvm-llava-svc:comps sleep 8m } @@ -43,8 +43,8 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-lvm-llava-dependency >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-llava-server >> ${LOG_PATH}/llava-server.log + docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log + docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log exit 1 fi @@ -53,8 +53,8 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-lvm-llava-dependency >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-llava-server >> ${LOG_PATH}/llava-server.log + docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log + docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log exit 1 fi @@ -63,15 +63,15 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." - docker logs test-comps-lvm-llava-dependency >> ${LOG_PATH}/llava-dependency.log - docker logs test-comps-lvm-llava-server >> ${LOG_PATH}/llava-server.log + docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log + docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-lvm-llava*") + cid=$(docker ps -aq --filter "name=test-comps-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/reranks/test_reranks_video-rag-qna.sh b/tests/reranks/test_reranks_videoqna.sh old mode 100644 new mode 100755 similarity index 86% rename from tests/reranks/test_reranks_video-rag-qna.sh rename to tests/reranks/test_reranks_videoqna.sh index 631124c16c..9d6cdf0510 --- a/tests/reranks/test_reranks_video-rag-qna.sh +++ b/tests/reranks/test_reranks_videoqna.sh @@ -9,11 +9,11 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/reranking-videoragqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/video-rag-qna/Dockerfile . + docker build --no-cache -t opea/reranking-videoqna:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/videoqna/Dockerfile . } function start_service() { - docker run -d --name "test-comps-reranking-videoragqna-server" \ + docker run -d --name "test-comps-reranking-videoqna-server" \ -p 5037:8000 \ --ipc=host \ -e no_proxy=${no_proxy} \ @@ -21,10 +21,10 @@ function start_service() { -e https_proxy=${https_proxy} \ -e CHUNK_DURATION=${CHUNK_DURATION} \ -e FILE_SERVER_ENDPOINT=${FILE_SERVER_ENDPOINT} \ - opea/reranking-videoragqna:latest + opea/reranking-videoqna:comps - until docker logs test-comps-reranking-videoragqna-server 2>&1 | grep -q "Uvicorn running on"; do + until docker logs test-comps-reranking-videoqna-server 2>&1 | grep -q "Uvicorn running on"; do sleep 2 done } diff --git a/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh b/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh index 4fbc3585eb..873516ddc5 100644 --- a/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh +++ b/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh @@ -10,12 +10,12 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/multimodal-retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal/redis/langchain/Dockerfile . + docker build --no-cache -t opea/retriever-multimodal-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal/redis/langchain/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/multimodal-retriever-redis built fail" + echo "opea/retriever-multimodal-redis built fail" exit 1 else - echo "opea/multimodal-retriever-redis built successful" + echo "opea/retriever-multimodal-redis built successful" fi } @@ -29,7 +29,7 @@ function start_service() { export INDEX_NAME="rag-redis" retriever_port=5434 unset http_proxy - docker run -d --name="test-comps-multimodal-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:comps + docker run -d --name="test-comps-retriever-multimodal-redis" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-multimodal-redis:comps sleep 5m } @@ -50,18 +50,18 @@ function validate_microservice() { echo "[ retriever ] Content is as expected." else echo "[ retriever ] Content does not match the expected result: $CONTENT" - docker logs test-comps-multimodal-retriever-redis-server >> ${LOG_PATH}/retriever.log + docker logs test-comps-retriever-multimodal-redis >> ${LOG_PATH}/retriever.log exit 1 fi else echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-multimodal-retriever-redis-server >> ${LOG_PATH}/retriever.log + docker logs test-comps-retriever-multimodal-redis >> ${LOG_PATH}/retriever.log exit 1 fi } function stop_docker() { - cid_retrievers=$(docker ps -aq --filter "name=test-comps-multimodal-retriever*") + cid_retrievers=$(docker ps -aq --filter "name=test-comps-*") if [[ ! -z "$cid_retrievers" ]]; then docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s fi diff --git a/tests/retrievers/test_retrievers_neo4j_langchain.sh b/tests/retrievers/test_retrievers_neo4j_langchain.sh new file mode 100644 index 0000000000..9855fe75ff --- /dev/null +++ b/tests/retrievers/test_retrievers_neo4j_langchain.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + docker run -d -p 7474:7474 -p 7687:7687 -v ./data:/data -v ./plugins:/plugins --name test-comps-neo4j-apoc1 -e NEO4J_AUTH=neo4j/password -e NEO4J_PLUGINS=\[\"apoc\"\] neo4j:latest + sleep 30s + + docker build --no-cache -t opea/retriever-neo4j:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/neo4j/langchain/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever-neo4j built fail" + exit 1 + else + echo "opea/retriever-neo4j built successful" + fi +} + +function start_service() { + # tei endpoint + tei_endpoint=5434 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-retriever-neo4j-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model + sleep 30s + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + + # Neo4J retriever + export NEO4J_URI="bolt://${ip_address}:7687" + export NEO4J_USERNAME="neo4j" + export NEO4J_PASSWORD="password" + retriever_port=5435 + # unset http_proxy + export no_proxy="localhost,127.0.0.1,"${ip_address} + docker run -d --name="test-comps-retriever-neo4j-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URI="bolt://${ip_address}:7687" -e NEO4J_USERNAME="neo4j" -e NEO4J_PASSWORD="password" opea/retriever-neo4j:comps + + sleep 1m +} + +function validate_microservice() { + retriever_port=5435 + export PATH="${HOME}/miniforge3/bin:$PATH" + source activate + URL="http://${ip_address}:$retriever_port/v1/retrieval" + + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ retriever ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) + + if echo "$CONTENT" | grep -q "retrieved_docs"; then + echo "[ retriever ] Content is as expected." + else + echo "[ retriever ] Content does not match the expected result: $CONTENT" + docker logs test-comps-retriever-neo4j-server >> ${LOG_PATH}/retriever.log + docker logs test-comps-retriever-neo4j-tei-endpoint >> ${LOG_PATH}/tei.log + exit 1 + fi + else + echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-retriever-neo4j-server >> ${LOG_PATH}/retriever.log + docker logs test-comps-retriever-neo4j-tei-endpoint >> ${LOG_PATH}/tei.log + exit 1 + fi +} + +function stop_docker() { + cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-neo4j*") + if [[ ! -z "$cid_retrievers" ]]; then + docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + fi + cid_db=$(docker ps -aq --filter "name=test-comps-neo4j-apoc1") + if [[ ! -z "$cid_retrievers" ]]; then + docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_pinecone_langchain.sh b/tests/retrievers/test_retrievers_pinecone_langchain.sh index 4077f4f390..643523b7d7 100644 --- a/tests/retrievers/test_retrievers_pinecone_langchain.sh +++ b/tests/retrievers/test_retrievers_pinecone_langchain.sh @@ -33,7 +33,7 @@ function start_service() { export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN retriever_port=5054 unset http_proxy - docker run -d --name="test-comps-retriever-pinecone-server" -p ${retriever_port}:7000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e INDEX_NAME=$PINECONE_INDEX_NAME opea/retriever-pinecone:comps + docker run -d --name="test-comps-retriever-pinecone-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e INDEX_NAME=$PINECONE_INDEX_NAME opea/retriever-pinecone:comps sleep 2m } diff --git a/tests/retrievers/test_retrievers_redis_langchain.sh b/tests/retrievers/test_retrievers_redis_langchain.sh index 773be81e64..dd34a2a0f2 100644 --- a/tests/retrievers/test_retrievers_redis_langchain.sh +++ b/tests/retrievers/test_retrievers_redis_langchain.sh @@ -34,9 +34,10 @@ function start_service() { # redis retriever export REDIS_URL="redis://${ip_address}:5010" export INDEX_NAME="rag-redis" + export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN retriever_port=5435 # unset http_proxy - docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:comps + docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:comps sleep 3m }