From ac30d1af2b4008ca39be0f4a63c07450db562d71 Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Thu, 29 Aug 2024 22:00:32 +0000 Subject: [PATCH 01/37] updates Signed-off-by: Tiep Le --- .../multimodal_langchain/redis/README.md | 120 ++++++++++++++++++ .../multimodal_langchain/redis/__init__.py | 2 + .../redis/docker/Dockerfile | 29 +++++ .../docker/docker_compose_retriever.yaml | 33 +++++ .../redis/multimodal_config.py | 80 ++++++++++++ .../redis/redis_schema.yml | 20 +++ .../redis/requirements.txt | 14 ++ .../redis/retriever_redis.py | 100 +++++++++++++++ 8 files changed, 398 insertions(+) create mode 100644 comps/retrievers/multimodal_langchain/redis/README.md create mode 100644 comps/retrievers/multimodal_langchain/redis/__init__.py create mode 100644 comps/retrievers/multimodal_langchain/redis/docker/Dockerfile create mode 100644 comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml create mode 100644 comps/retrievers/multimodal_langchain/redis/multimodal_config.py create mode 100644 comps/retrievers/multimodal_langchain/redis/redis_schema.yml create mode 100644 comps/retrievers/multimodal_langchain/redis/requirements.txt create mode 100644 comps/retrievers/multimodal_langchain/redis/retriever_redis.py diff --git a/comps/retrievers/multimodal_langchain/redis/README.md b/comps/retrievers/multimodal_langchain/redis/README.md new file mode 100644 index 0000000000..a8d8e2fb0c --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/README.md @@ -0,0 +1,120 @@ +# Retriever Microservice + +This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors from multimodal data. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector. + +The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval. + +Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. + +## 🚀1. Start Microservice with Python (Option 1) + +To start the retriever microservice, you must first install the required python packages. + +### 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` +### 1.2 Setup VectorDB Service + +You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. + +As for Redis, you could start a docker container using the following commands. +Remember to ingest data into it manually. + +```bash +docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9 +``` +### 1.3 Ingest images or video + +Upload a video or images using the dataprep microservice, instructions can be found [here](https://github.com/opea-project/GenAIComps/tree/main/comps/dataprep/redis/multimodal_langchain/README.md). + +### 1.4 Start Retriever Service + +```bash +python langchain_multimodal/retriever_redis.py +``` + +## 🚀2. Start Microservice with Docker (Option 2) + +### 2.1 Setup Environment Variables + +```bash +export REDIS_URL="redis://${your_ip}:6379" +export INDEX_NAME=${your_index_name} +``` + +### 2.2 Build Docker Image + +```bash +cd ../../ +docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain_multimodal/redis/docker/Dockerfile . +``` + +To start a docker container, you have two options: + +- A. Run Docker with CLI +- B. Run Docker with Docker Compose + +You can choose one as needed. + +### 2.3 Run Docker with CLI (Option A) + +```bash +docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:latest +``` + +### 2.4 Run Docker with Docker Compose (Option B) + +```bash +cd langchain_multimodal/docker +docker compose -f docker_compose_retriever.yaml up -d +``` + +## 🚀3. Consume Retriever Service + +### 3.1 Consume Embedding Service + +To consume the Retriever Microservice, you can generate a mock embedding vector of length 512 with Python. + +```bash +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") +curl http://${your_ip}:7000/v1/retrieval \ + -X POST \ + -d {\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` + +You can set the parameters for the retriever. + +```bash +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity\", \"k\":4}" \ + -H 'Content-Type: application/json' +``` + +```bash +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_distance_threshold\", \"k\":4, \"distance_threshold\":1.0}" \ + -H 'Content-Type: application/json' +``` + +```bash +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_score_threshold\", \"k\":4, \"score_threshold\":0.2}" \ + -H 'Content-Type: application/json' +``` + +```bash +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4, \"fetch_k\":20, \"lambda_mult\":0.5}" \ + -H 'Content-Type: application/json' +``` diff --git a/comps/retrievers/multimodal_langchain/redis/__init__.py b/comps/retrievers/multimodal_langchain/redis/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile b/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile new file mode 100644 index 0000000000..ed1258c579 --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM langchain/langchain:latest + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +USER user + +RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/retrievers/langchain_multimodal/redis/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/retrievers/multimodal_langchain/redis + +ENTRYPOINT ["python", "retriever_redis.py"] diff --git a/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml b/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml new file mode 100644 index 0000000000..4129a4c782 --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml @@ -0,0 +1,33 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + tei_xeon_service: + image: retriever-redis:cpu-1.2 + container_name: tei-xeon-server + ports: + - "6060:80" + volumes: + - "./data:/data" + shm_size: 1g + command: --model-id "BridgeTower/bridgetower-large-itm-mlm-itc" + retriever: + image: opea/retriever-redis:latest + container_name: retriever-redis-server + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py new file mode 100644 index 0000000000..2ae183bfb8 --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py @@ -0,0 +1,80 @@ +import os + + +def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable. + Args: + var_name (str): The name of the environment variable to retrieve. + default_value (bool): The default value to return if the variable + is not found. + Returns: + bool: The value of the environment variable, interpreted as a boolean. + """ + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + + # Retrieve the environment variable's value + value = os.getenv(var_name, "").lower() + + # Decide the boolean value based on the content of the string + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + +# Check for openai API key +#if "OPENAI_API_KEY" not in os.environ: +# raise Exception("Must provide an OPENAI_API_KEY as an env var.") + + +# Whether or not to enable langchain debugging +DEBUG = get_boolean_env_var("DEBUG", False) +# Set DEBUG env var to "true" if you wish to enable LC debugging module +if DEBUG: + import langchain + + langchain.debug = True + + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BridgeTower/bridgetower-large-itm-mlm-itc") + +# Redis Connection Information +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + + +def format_redis_conn_from_env(): + redis_url = os.getenv("REDIS_URL", None) + if redis_url: + return redis_url + else: + using_ssl = get_boolean_env_var("REDIS_SSL", False) + start = "rediss://" if using_ssl else "redis://" + + # if using RBAC + password = os.getenv("REDIS_PASSWORD", None) + username = os.getenv("REDIS_USERNAME", "default") + if password is not None: + start += f"{username}:{password}@" + + return start + f"{REDIS_HOST}:{REDIS_PORT}" + + +REDIS_URL = format_redis_conn_from_env() + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "test-index") + +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(current_file_path) +REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml") +schema_path = os.path.join(parent_dir, REDIS_SCHEMA) +INDEX_SCHEMA = schema_path +TGI_ENDPOINT = os.getenv("TGI_ENDPOINT", "http://localhost:8080") +TGI_ENDPOINT_NO_RAG = os.getenv("TGI_ENDPOINT_NO_RAG", "http://localhost:8081") +NUM_RETRIEVED_RESULTS = int(os.getenv("NUM_RETRIEVED_RESULTS", 1)) +MULTIMODAL_CHAT_MODEL = os.getenv("MULTIMODAL_CHAT_MODEL", "llava-hf/llava-1.5-13b-hf") diff --git a/comps/retrievers/multimodal_langchain/redis/redis_schema.yml b/comps/retrievers/multimodal_langchain/redis/redis_schema.yml new file mode 100644 index 0000000000..f0d1bd5674 --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/redis_schema.yml @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +text: +- name: content +- name: b64_img_str +- name: video_id +- name: source_video +- name: embedding_type +- name: title +- name: description +- name: transcript_for_inference +numeric: +- name: time_of_frame_ms +vector: +- name: content_vector + algorithm: HNSW + datatype: FLOAT32 + dims: 512 + distance_metric: COSINE diff --git a/comps/retrievers/multimodal_langchain/redis/requirements.txt b/comps/retrievers/multimodal_langchain/redis/requirements.txt new file mode 100644 index 0000000000..3720190d3e --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/requirements.txt @@ -0,0 +1,14 @@ +docarray[full] +easyocr +fastapi +langchain_community +langsmith +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pymupdf +redis +sentence_transformers +shortuuid +uvicorn diff --git a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py new file mode 100644 index 0000000000..bb90623421 --- /dev/null +++ b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py @@ -0,0 +1,100 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import Union + +from comps import BridgeTowerEmbedding +from langchain_community.vectorstores import Redis +from langsmith import traceable +from multimodal_config import INDEX_NAME, REDIS_URL, REDIS_SCHEMA + +from comps import ( + EmbedMultimodalDoc, + SearchedMultimodalDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RetrievalRequest, + RetrievalResponse, + RetrievalResponseData, +) + +@register_microservice( + name="opea_service@retriever_redis", + service_type=ServiceType.RETRIEVER, + endpoint="/v1/retrieval", + host="0.0.0.0", + port=7000, +) +@traceable(run_type="retriever") +@register_statistics(names=["opea_service@retriever_redis"]) +def retrieve( + input: Union[EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest] +) -> Union[SearchedMultimodalDoc, RetrievalResponse, ChatCompletionRequest]: + + start = time.time() + # check if the Redis index has data + if vector_db.client.keys() == []: + search_res = [] + else: + if isinstance(input, EmbedMultimodalDoc): + query = input.text + else: + # for RetrievalRequest, ChatCompletionRequest + query = input.input + # if the Redis index has data, perform the search + if input.search_type == "similarity": + search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, k=input.k) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + search_res = vector_db.similarity_search_by_vector( + embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold + ) + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = vector_db.similarity_search_with_relevance_scores( + query=input.text, k=input.k, score_threshold=input.score_threshold + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "mmr": + search_res = vector_db.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + else: + raise ValueError(f"{input.search_type} not valid") + + # return different response format + retrieved_docs = [] + if isinstance(input, EmbedMultimodalDoc): + metadata_list = [] + for r in search_res: + metadata_list.append(r.metadata) + retrieved_docs.append(TextDoc(text=r.page_content)) + result = SearchedMultimodalDoc(retrieved_docs=retrieved_docs, initial_query=input.text, metadata=metadata_list) + else: + for r in search_res: + retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) + if isinstance(input, RetrievalRequest): + result = RetrievalResponse(retrieved_docs=retrieved_docs) + elif isinstance(input, ChatCompletionRequest): + input.retrieved_docs = retrieved_docs + input.documents = [doc.text for doc in retrieved_docs] + result = input + + statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) + return result + + +if __name__ == "__main__": + + embeddings = BridgeTowerEmbedding() + vector_db = Redis.from_existing_index(embedding=embeddings, schema=REDIS_SCHEMA, index_name=INDEX_NAME, redis_url=REDIS_URL) + opea_microservices["opea_service@retriever_redis"].start() From 77632f2686fbdebe723edc80b3ee6faba7532f30 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Fri, 30 Aug 2024 19:04:08 +0000 Subject: [PATCH 02/37] cosmetic Signed-off-by: siddhivelankar23 --- comps/retrievers/multimodal_langchain/redis/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/retrievers/multimodal_langchain/redis/README.md b/comps/retrievers/multimodal_langchain/redis/README.md index a8d8e2fb0c..31bdaff384 100644 --- a/comps/retrievers/multimodal_langchain/redis/README.md +++ b/comps/retrievers/multimodal_langchain/redis/README.md @@ -8,7 +8,7 @@ Overall, this microservice provides robust backend support for applications requ ## 🚀1. Start Microservice with Python (Option 1) -To start the retriever microservice, you must first install the required python packages. +To start the retriever microservice, you must first install the required python packages. ### 1.1 Install Requirements From 4c3ea33bfa9848107282561522a1b97fb61e048f Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Tue, 3 Sep 2024 00:17:07 +0000 Subject: [PATCH 03/37] update redis schema Signed-off-by: siddhivelankar23 --- .../redis/redis_schema.yml | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/comps/retrievers/multimodal_langchain/redis/redis_schema.yml b/comps/retrievers/multimodal_langchain/redis/redis_schema.yml index f0d1bd5674..32f4a79ae4 100644 --- a/comps/retrievers/multimodal_langchain/redis/redis_schema.yml +++ b/comps/retrievers/multimodal_langchain/redis/redis_schema.yml @@ -2,19 +2,18 @@ # SPDX-License-Identifier: Apache-2.0 text: -- name: content -- name: b64_img_str -- name: video_id -- name: source_video -- name: embedding_type -- name: title -- name: description -- name: transcript_for_inference + - name: content + - name: b64_img_str + - name: video_id + - name: source_video + - name: embedding_type + - name: title + - name: transcript_for_inference numeric: -- name: time_of_frame_ms + - name: time_of_frame_ms vector: -- name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 512 - distance_metric: COSINE + - name: content_vector + algorithm: HNSW + datatype: FLOAT32 + dims: 512 + distance_metric: COSINE From 168803315e27f58767ba3b7d5e3ab7aeaf2527d1 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Tue, 3 Sep 2024 00:30:48 +0000 Subject: [PATCH 04/37] update multimodal config and docker compose retriever Signed-off-by: siddhivelankar23 --- .../redis/docker/docker_compose_retriever.yaml | 12 +----------- .../multimodal_langchain/redis/multimodal_config.py | 12 ++++++------ 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml b/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml index 4129a4c782..044e723ac0 100644 --- a/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml +++ b/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml @@ -1,18 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -version: "3.8" +version: "1.0" services: - tei_xeon_service: - image: retriever-redis:cpu-1.2 - container_name: tei-xeon-server - ports: - - "6060:80" - volumes: - - "./data:/data" - shm_size: 1g - command: --model-id "BridgeTower/bridgetower-large-itm-mlm-itc" retriever: image: opea/retriever-redis:latest container_name: retriever-redis-server @@ -25,7 +16,6 @@ services: https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} restart: unless-stopped networks: diff --git a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py index 2ae183bfb8..f92d5755d7 100644 --- a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py +++ b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py @@ -1,5 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import os +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(current_file_path) def get_boolean_env_var(var_name, default_value=False): """Retrieve the boolean value of an environment variable. @@ -69,12 +74,7 @@ def format_redis_conn_from_env(): # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "test-index") -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml") schema_path = os.path.join(parent_dir, REDIS_SCHEMA) INDEX_SCHEMA = schema_path -TGI_ENDPOINT = os.getenv("TGI_ENDPOINT", "http://localhost:8080") -TGI_ENDPOINT_NO_RAG = os.getenv("TGI_ENDPOINT_NO_RAG", "http://localhost:8081") -NUM_RETRIEVED_RESULTS = int(os.getenv("NUM_RETRIEVED_RESULTS", 1)) -MULTIMODAL_CHAT_MODEL = os.getenv("MULTIMODAL_CHAT_MODEL", "llava-hf/llava-1.5-13b-hf") +NUM_RETRIEVED_RESULTS = int(os.getenv("NUM_RETRIEVED_RESULTS", 1)) \ No newline at end of file From bc1699f2b0efad7348249139af563ff54728bf19 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Tue, 3 Sep 2024 00:34:02 +0000 Subject: [PATCH 05/37] update requirements Signed-off-by: siddhivelankar23 --- comps/retrievers/multimodal_langchain/redis/requirements.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/comps/retrievers/multimodal_langchain/redis/requirements.txt b/comps/retrievers/multimodal_langchain/redis/requirements.txt index 3720190d3e..e6ceddd4ef 100644 --- a/comps/retrievers/multimodal_langchain/redis/requirements.txt +++ b/comps/retrievers/multimodal_langchain/redis/requirements.txt @@ -1,14 +1,11 @@ docarray[full] -easyocr fastapi langchain_community -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator -pymupdf redis -sentence_transformers shortuuid uvicorn +transformers \ No newline at end of file From 5ecbfdb11c56bee8b3fcca90a6404762df8a3fe1 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Tue, 3 Sep 2024 00:47:09 +0000 Subject: [PATCH 06/37] update retriever redis Signed-off-by: siddhivelankar23 --- .../redis/retriever_redis.py | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py index bb90623421..50d567f821 100644 --- a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py +++ b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py @@ -1,13 +1,11 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import os import time from typing import Union -from comps import BridgeTowerEmbedding +from comps.embeddings.multimodal_embeddings.bridgetower import BridgeTowerEmbedding from langchain_community.vectorstores import Redis -from langsmith import traceable from multimodal_config import INDEX_NAME, REDIS_URL, REDIS_SCHEMA from comps import ( @@ -28,14 +26,14 @@ ) @register_microservice( - name="opea_service@retriever_redis", + name="opea_service@multimodal_retriever_redis", service_type=ServiceType.RETRIEVER, - endpoint="/v1/retrieval", + endpoint="/v1/multimodal_retrieval", host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") -@register_statistics(names=["opea_service@retriever_redis"]) + +@register_statistics(names=["opea_service@multimodal_retriever_redis"]) def retrieve( input: Union[EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest] ) -> Union[SearchedMultimodalDoc, RetrievalResponse, ChatCompletionRequest]: @@ -45,11 +43,6 @@ def retrieve( if vector_db.client.keys() == []: search_res = [] else: - if isinstance(input, EmbedMultimodalDoc): - query = input.text - else: - # for RetrievalRequest, ChatCompletionRequest - query = input.input # if the Redis index has data, perform the search if input.search_type == "similarity": search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, k=input.k) @@ -89,7 +82,7 @@ def retrieve( input.documents = [doc.text for doc in retrieved_docs] result = input - statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) + statistics_dict["opea_service@multimodal_retriever_redis"].append_latency(time.time() - start, None) return result @@ -97,4 +90,4 @@ def retrieve( embeddings = BridgeTowerEmbedding() vector_db = Redis.from_existing_index(embedding=embeddings, schema=REDIS_SCHEMA, index_name=INDEX_NAME, redis_url=REDIS_URL) - opea_microservices["opea_service@retriever_redis"].start() + opea_microservices["opea_service@multimodal_retriever_redis"].start() From bc2329071c454db261821634ce0f85f9f3e99f0c Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Tue, 3 Sep 2024 02:17:28 +0000 Subject: [PATCH 07/37] multimodal retriever implementation Signed-off-by: siddhivelankar23 --- .../multimodal_langchain/redis/README.md | 21 ++++++++++--------- .../redis/docker/Dockerfile | 2 +- .../docker/docker_compose_retriever.yaml | 4 ++-- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/comps/retrievers/multimodal_langchain/redis/README.md b/comps/retrievers/multimodal_langchain/redis/README.md index 31bdaff384..1b7fa72584 100644 --- a/comps/retrievers/multimodal_langchain/redis/README.md +++ b/comps/retrievers/multimodal_langchain/redis/README.md @@ -40,6 +40,7 @@ python langchain_multimodal/retriever_redis.py ### 2.1 Setup Environment Variables ```bash +export your_ip=$(hostname -I | awk '{print $1}') export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} ``` @@ -47,8 +48,8 @@ export INDEX_NAME=${your_index_name} ### 2.2 Build Docker Image ```bash -cd ../../ -docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain_multimodal/redis/docker/Dockerfile . +cd ../../../../ +docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal_langchain/redis/docker/Dockerfile . ``` To start a docker container, you have two options: @@ -61,13 +62,13 @@ You can choose one as needed. ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:latest +docker run -d --name="multimodal-retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:latest ``` ### 2.4 Run Docker with Docker Compose (Option B) ```bash -cd langchain_multimodal/docker +cd docker docker compose -f docker_compose_retriever.yaml up -d ``` @@ -79,9 +80,9 @@ To consume the Retriever Microservice, you can generate a mock embedding vector ```bash your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://${your_ip}:7000/v1/retrieval \ +curl http://${your_ip}:7000/v1/multimodal_retrieval \ -X POST \ - -d {\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ -H 'Content-Type: application/json' ``` @@ -89,7 +90,7 @@ You can set the parameters for the retriever. ```bash your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ +curl http://localhost:7000/v1/multimodal_retrieval \ -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity\", \"k\":4}" \ -H 'Content-Type: application/json' @@ -97,7 +98,7 @@ curl http://localhost:7000/v1/retrieval \ ```bash your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ +curl http://localhost:7000/v1/multimodal_retrieval \ -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_distance_threshold\", \"k\":4, \"distance_threshold\":1.0}" \ -H 'Content-Type: application/json' @@ -105,7 +106,7 @@ curl http://localhost:7000/v1/retrieval \ ```bash your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ +curl http://localhost:7000/v1/multimodal_retrieval \ -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_score_threshold\", \"k\":4, \"score_threshold\":0.2}" \ -H 'Content-Type: application/json' @@ -113,7 +114,7 @@ curl http://localhost:7000/v1/retrieval \ ```bash your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://localhost:7000/v1/retrieval \ +curl http://localhost:7000/v1/multimodal_retrieval \ -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4, \"fetch_k\":20, \"lambda_mult\":0.5}" \ -H 'Content-Type: application/json' diff --git a/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile b/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile index ed1258c579..33077c3037 100644 --- a/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile +++ b/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile @@ -20,7 +20,7 @@ USER user RUN pip install --no-cache-dir --upgrade pip && \ if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/langchain_multimodal/redis/requirements.txt + pip install --no-cache-dir -r /home/user/comps/retrievers/multimodal_langchain/redis/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml b/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml index 044e723ac0..efba29a4e1 100644 --- a/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml +++ b/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml @@ -5,8 +5,8 @@ version: "1.0" services: retriever: - image: opea/retriever-redis:latest - container_name: retriever-redis-server + image: opea/multimodal-retriever-redis:latest + container_name: multimodal-retriever-redis-server ports: - "7000:7000" ipc: host From c56151d32e5143e181132495f1e9773e069accaa Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Tue, 3 Sep 2024 02:30:00 +0000 Subject: [PATCH 08/37] test for multimodal retriever Signed-off-by: siddhivelankar23 --- ...t_retrievers_multimodal_langchain_redis.sh | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tests/test_retrievers_multimodal_langchain_redis.sh diff --git a/tests/test_retrievers_multimodal_langchain_redis.sh b/tests/test_retrievers_multimodal_langchain_redis.sh new file mode 100644 index 0000000000..3d3c37ac30 --- /dev/null +++ b/tests/test_retrievers_multimodal_langchain_redis.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/multimodal-retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal_langchain/redis/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/multimodal-retriever-redis built fail" + exit 1 + else + echo "opea/multimodal-retriever-redis built successful" + fi +} + +function start_service() { + # redis + docker run -d --name test-comps-multimodal-retriever-redis-vector-db -p 5010:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9 + sleep 10s + + # redis retriever + export REDIS_URL="redis://${ip_address}:5010" + export INDEX_NAME="rag-redis" + retriever_port=5009 + unset http_proxy + docker run -d --name="test-comps-multimodal-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:comps + + sleep 3m +} + +function validate_microservice() { + retriever_port=5009 + export PATH="${HOME}/miniforge3/bin:$PATH" + source activate + URL="http://${ip_address}:$retriever_port/v1/multimodal_retrieval" + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") + + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ retriever ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) + + if echo "$CONTENT" | grep -q "retrieved_docs"; then + echo "[ retriever ] Content is as expected." + else + echo "[ retriever ] Content does not match the expected result: $CONTENT" + docker logs test-comps-multimodal-retriever-redis-server >> ${LOG_PATH}/retriever.log + exit 1 + fi + else + echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-multimodal-retriever-redis-server >> ${LOG_PATH}/retriever.log + exit 1 + fi +} + +function stop_docker() { + cid_retrievers=$(docker ps -aq --filter "name=test-comps-multimodal-retriever*") + if [[ ! -z "$cid_retrievers" ]]; then + docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + # echo y | docker system prune + +} + +main From 4c26828fdb8f0168d9f4ef14b3992ce52790c4e7 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 3 Sep 2024 20:26:02 +0000 Subject: [PATCH 09/37] include prompt preparation for multimodal rag on videos application Signed-off-by: sjagtap1803 --- comps/lvms/lvm.py | 32 +++++++++++++++++++++++++------- comps/lvms/lvm_tgi.py | 42 ++++++++++++++++++++++++++++++++---------- comps/lvms/template.py | 12 ++++++++++++ 3 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 comps/lvms/template.py diff --git a/comps/lvms/lvm.py b/comps/lvms/lvm.py index 4ae900aae2..8802564c73 100644 --- a/comps/lvms/lvm.py +++ b/comps/lvms/lvm.py @@ -5,12 +5,13 @@ import json import os import time - +from typing import Union import requests from comps import ( CustomLogger, LVMDoc, + SearchedMultimodalDoc, ServiceType, TextDoc, opea_microservices, @@ -18,6 +19,7 @@ register_statistics, statistics_dict, ) +from template import ChatTemplate logger = CustomLogger("lvm") logflag = os.getenv("LOGFLAG", False) @@ -29,17 +31,32 @@ endpoint="/v1/lvm", host="0.0.0.0", port=9399, - input_datatype=LVMDoc, - output_datatype=TextDoc, ) @register_statistics(names=["opea_service@lvm"]) -async def lvm(request: LVMDoc): +async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: if logflag: logger.info(request) start = time.time() - img_b64_str = request.image - prompt = request.prompt - max_new_tokens = request.max_new_tokens + if isinstance(request, SearchedMultimodalDoc): + if logflag: + logger.info("[SearchedMultimodalDoc ] input from retriever microservice") + retrieved_metadatas = request.metadata + if application == "MM_RAG_ON_VIDEOS": + img_b64_str = retrieved_metadatas[0]['b64_img_str'] + initial_query = request.initial_query + prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, retrieved_metadatas) + # use default lvm parameters for inferencing + new_input = LVMDoc(image=img_b64_str, prompt=prompt) + max_new_tokens = new_input.max_new_tokens + if logflag: + logger.info(f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}") + else: + raise NotImplementedError(f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!") + + else: + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens} @@ -55,6 +72,7 @@ async def lvm(request: LVMDoc): if __name__ == "__main__": lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + application = os.getenv("APPLICATION", None) logger.info("[LVM] LVM initialized.") opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py index 9492b4eafc..6074d00dba 100644 --- a/comps/lvms/lvm_tgi.py +++ b/comps/lvms/lvm_tgi.py @@ -6,18 +6,21 @@ from fastapi.responses import StreamingResponse from huggingface_hub import AsyncInferenceClient - +from typing import Union from comps import ( CustomLogger, LVMDoc, ServiceType, TextDoc, + SearchedMultimodalDoc, opea_microservices, register_microservice, register_statistics, statistics_dict, ) +from template import ChatTemplate + logger = CustomLogger("lvm_tgi") logflag = os.getenv("LOGFLAG", False) @@ -32,19 +35,37 @@ output_datatype=TextDoc, ) @register_statistics(names=["opea_service@lvm_tgi"]) -async def lvm(request: LVMDoc): +async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: if logflag: logger.info(request) start = time.time() + if isinstance(request, SearchedMultimodalDoc): + # This is to construct LVMDoc from SearchedMultimodalDoc input from retriever microservice + # for Multimodal RAG on Videos application + if logflag: + logger.info("[SearchedMultimodalDoc ] input from retriever microservice") + retrieved_metadatas = request.metadata + if application == "MM_RAG_ON_VIDEOS": + img_b64_str = retrieved_metadatas[0]['b64_img_str'] + initial_query = request.initial_query + prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, retrieved_metadatas) + # use default lvm parameters for inferencing + new_request = LVMDoc(image=img_b64_str, prompt=prompt) + if logflag: + logger.info(f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}") + else: + raise NotImplementedError(f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!") + else: + new_request = request stream_gen_time = [] - img_b64_str = request.image - prompt = request.prompt - max_new_tokens = request.max_new_tokens - streaming = request.streaming - repetition_penalty = request.repetition_penalty - temperature = request.temperature - top_k = request.top_k - top_p = request.top_p + img_b64_str = new_request.image + prompt = new_request.prompt + max_new_tokens = new_request.max_new_tokens + streaming = new_request.streaming + repetition_penalty = new_request.repetition_penalty + temperature = new_request.temperature + top_k = new_request.top_k + top_p = new_request.top_p image = f"data:image/png;base64,{img_b64_str}" image_prompt = f"![]({image})\n{prompt}\nASSISTANT:" @@ -92,6 +113,7 @@ async def stream_generator(): if __name__ == "__main__": lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + application = os.getenv("APPLICATION", None) lvm_client = AsyncInferenceClient(lvm_endpoint) logger.info("[LVM] LVM initialized.") opea_microservices["opea_service@lvm_tgi"].start() diff --git a/comps/lvms/template.py b/comps/lvms/template.py new file mode 100644 index 0000000000..60f3831ace --- /dev/null +++ b/comps/lvms/template.py @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +class ChatTemplate: + + @staticmethod + def generate_multimodal_rag_on_videos_prompt(initial_query: str, retrieved_metadatas): + context = retrieved_metadatas[0]["transcript_for_inference"] + template = """USER: +The transcript associated with the image is '{context}'. {question} +ASSISTANT:""" + return template.format(context=context, question=initial_query) \ No newline at end of file From fefcf72459eda792f0614508d5d7fd23a3999817 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 3 Sep 2024 20:52:39 +0000 Subject: [PATCH 10/37] fix template Signed-off-by: sjagtap1803 --- comps/lvms/lvm.py | 1 - comps/lvms/template.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/comps/lvms/lvm.py b/comps/lvms/lvm.py index 8802564c73..a788fdf36e 100644 --- a/comps/lvms/lvm.py +++ b/comps/lvms/lvm.py @@ -59,7 +59,6 @@ async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: max_new_tokens = request.max_new_tokens inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens} - # forward to the LLaVA server response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}) diff --git a/comps/lvms/template.py b/comps/lvms/template.py index 60f3831ace..615cb2c9a1 100644 --- a/comps/lvms/template.py +++ b/comps/lvms/template.py @@ -6,7 +6,5 @@ class ChatTemplate: @staticmethod def generate_multimodal_rag_on_videos_prompt(initial_query: str, retrieved_metadatas): context = retrieved_metadatas[0]["transcript_for_inference"] - template = """USER: -The transcript associated with the image is '{context}'. {question} -ASSISTANT:""" + template = """The transcript associated with the image is '{context}'. {question}""" return template.format(context=context, question=initial_query) \ No newline at end of file From 42803933a7c58468f39a4b8a116dee8060026e54 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 3 Sep 2024 20:56:15 +0000 Subject: [PATCH 11/37] add test for llava for mm_rag_on_videos Signed-off-by: sjagtap1803 --- tests/test_lvms_llava_mm_rag_on_videos.sh | 69 +++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/test_lvms_llava_mm_rag_on_videos.sh diff --git a/tests/test_lvms_llava_mm_rag_on_videos.sh b/tests/test_lvms_llava_mm_rag_on_videos.sh new file mode 100644 index 0000000000..c6283a5254 --- /dev/null +++ b/tests/test_lvms_llava_mm_rag_on_videos.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/llava:comps -f comps/lvms/llava/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llava built fail" + exit 1 + else + echo "opea/llava built successful" + fi + docker build --no-cache -t opea/lvm:comps -f comps/lvms/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/lvm built fail" + exit 1 + else + echo "opea/lvm built successful" + fi +} + +function start_service() { + unset http_proxy + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps + docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e APPLICATION="MM_RAG_ON_VIDEOS" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm:comps + sleep 8m +} + +function validate_microservice() { + + result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image"}]}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-lvm-llava + docker logs test-comps-lvm + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-lvm*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + # echo y | docker system prune + +} + +main From 3c4b5052ca78d0b25f58faef0437b45a826fd9f5 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 3 Sep 2024 21:18:55 +0000 Subject: [PATCH 12/37] update test Signed-off-by: sjagtap1803 --- tests/test_dataprep_redis_multimodal_langchain.sh | 2 +- tests/test_lvms_llava_mm_rag_on_videos.sh | 2 +- tests/test_retrievers_multimodal_langchain_redis.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_dataprep_redis_multimodal_langchain.sh b/tests/test_dataprep_redis_multimodal_langchain.sh index e5a75f8604..17d6985a17 100644 --- a/tests/test_dataprep_redis_multimodal_langchain.sh +++ b/tests/test_dataprep_redis_multimodal_langchain.sh @@ -271,7 +271,7 @@ function main() { validate_microservice delete_data stop_docker - # echo y | docker system prune + echo y | docker system prune } diff --git a/tests/test_lvms_llava_mm_rag_on_videos.sh b/tests/test_lvms_llava_mm_rag_on_videos.sh index c6283a5254..5d3fdc2113 100644 --- a/tests/test_lvms_llava_mm_rag_on_videos.sh +++ b/tests/test_lvms_llava_mm_rag_on_videos.sh @@ -62,7 +62,7 @@ function main() { validate_microservice stop_docker - # echo y | docker system prune + echo y | docker system prune } diff --git a/tests/test_retrievers_multimodal_langchain_redis.sh b/tests/test_retrievers_multimodal_langchain_redis.sh index 3d3c37ac30..504ec085c0 100644 --- a/tests/test_retrievers_multimodal_langchain_redis.sh +++ b/tests/test_retrievers_multimodal_langchain_redis.sh @@ -77,7 +77,7 @@ function main() { validate_microservice stop_docker - # echo y | docker system prune + echo y | docker system prune } From 5aa4c021e4af1dcabed96a1bd9e288a3945cabcc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 21:31:49 +0000 Subject: [PATCH 13/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/lvms/lvm.py | 13 ++++++++---- comps/lvms/lvm_tgi.py | 20 +++++++++++-------- comps/lvms/template.py | 3 ++- .../multimodal_langchain/redis/README.md | 6 ++++-- .../redis/multimodal_config.py | 6 ++++-- .../redis/requirements.txt | 2 +- .../redis/retriever_redis.py | 10 ++++++---- 7 files changed, 38 insertions(+), 22 deletions(-) diff --git a/comps/lvms/lvm.py b/comps/lvms/lvm.py index a788fdf36e..41ee79e25f 100644 --- a/comps/lvms/lvm.py +++ b/comps/lvms/lvm.py @@ -6,7 +6,9 @@ import os import time from typing import Union + import requests +from template import ChatTemplate from comps import ( CustomLogger, @@ -19,7 +21,6 @@ register_statistics, statistics_dict, ) -from template import ChatTemplate logger = CustomLogger("lvm") logflag = os.getenv("LOGFLAG", False) @@ -42,16 +43,20 @@ async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: logger.info("[SearchedMultimodalDoc ] input from retriever microservice") retrieved_metadatas = request.metadata if application == "MM_RAG_ON_VIDEOS": - img_b64_str = retrieved_metadatas[0]['b64_img_str'] + img_b64_str = retrieved_metadatas[0]["b64_img_str"] initial_query = request.initial_query prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, retrieved_metadatas) # use default lvm parameters for inferencing new_input = LVMDoc(image=img_b64_str, prompt=prompt) max_new_tokens = new_input.max_new_tokens if logflag: - logger.info(f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}") + logger.info( + f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}" + ) else: - raise NotImplementedError(f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!") + raise NotImplementedError( + f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!" + ) else: img_b64_str = request.image diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py index 6074d00dba..e8ea86cf43 100644 --- a/comps/lvms/lvm_tgi.py +++ b/comps/lvms/lvm_tgi.py @@ -3,24 +3,24 @@ import os import time +from typing import Union from fastapi.responses import StreamingResponse from huggingface_hub import AsyncInferenceClient -from typing import Union +from template import ChatTemplate + from comps import ( CustomLogger, LVMDoc, + SearchedMultimodalDoc, ServiceType, TextDoc, - SearchedMultimodalDoc, opea_microservices, register_microservice, register_statistics, statistics_dict, ) -from template import ChatTemplate - logger = CustomLogger("lvm_tgi") logflag = os.getenv("LOGFLAG", False) @@ -41,20 +41,24 @@ async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: start = time.time() if isinstance(request, SearchedMultimodalDoc): # This is to construct LVMDoc from SearchedMultimodalDoc input from retriever microservice - # for Multimodal RAG on Videos application + # for Multimodal RAG on Videos application if logflag: logger.info("[SearchedMultimodalDoc ] input from retriever microservice") retrieved_metadatas = request.metadata if application == "MM_RAG_ON_VIDEOS": - img_b64_str = retrieved_metadatas[0]['b64_img_str'] + img_b64_str = retrieved_metadatas[0]["b64_img_str"] initial_query = request.initial_query prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, retrieved_metadatas) # use default lvm parameters for inferencing new_request = LVMDoc(image=img_b64_str, prompt=prompt) if logflag: - logger.info(f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}") + logger.info( + f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}" + ) else: - raise NotImplementedError(f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!") + raise NotImplementedError( + f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!" + ) else: new_request = request stream_gen_time = [] diff --git a/comps/lvms/template.py b/comps/lvms/template.py index 615cb2c9a1..91648edabf 100644 --- a/comps/lvms/template.py +++ b/comps/lvms/template.py @@ -1,10 +1,11 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + class ChatTemplate: @staticmethod def generate_multimodal_rag_on_videos_prompt(initial_query: str, retrieved_metadatas): context = retrieved_metadatas[0]["transcript_for_inference"] template = """The transcript associated with the image is '{context}'. {question}""" - return template.format(context=context, question=initial_query) \ No newline at end of file + return template.format(context=context, question=initial_query) diff --git a/comps/retrievers/multimodal_langchain/redis/README.md b/comps/retrievers/multimodal_langchain/redis/README.md index 1b7fa72584..2ba6d3296f 100644 --- a/comps/retrievers/multimodal_langchain/redis/README.md +++ b/comps/retrievers/multimodal_langchain/redis/README.md @@ -8,13 +8,14 @@ Overall, this microservice provides robust backend support for applications requ ## 🚀1. Start Microservice with Python (Option 1) -To start the retriever microservice, you must first install the required python packages. +To start the retriever microservice, you must first install the required python packages. ### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` + ### 1.2 Setup VectorDB Service You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. @@ -25,9 +26,10 @@ Remember to ingest data into it manually. ```bash docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9 ``` + ### 1.3 Ingest images or video -Upload a video or images using the dataprep microservice, instructions can be found [here](https://github.com/opea-project/GenAIComps/tree/main/comps/dataprep/redis/multimodal_langchain/README.md). +Upload a video or images using the dataprep microservice, instructions can be found [here](https://github.com/opea-project/GenAIComps/tree/main/comps/dataprep/redis/multimodal_langchain/README.md). ### 1.4 Start Retriever Service diff --git a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py index f92d5755d7..f9345c1dee 100644 --- a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py +++ b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py @@ -6,8 +6,10 @@ current_file_path = os.path.abspath(__file__) parent_dir = os.path.dirname(current_file_path) + def get_boolean_env_var(var_name, default_value=False): """Retrieve the boolean value of an environment variable. + Args: var_name (str): The name of the environment variable to retrieve. default_value (bool): The default value to return if the variable @@ -31,7 +33,7 @@ def get_boolean_env_var(var_name, default_value=False): # Check for openai API key -#if "OPENAI_API_KEY" not in os.environ: +# if "OPENAI_API_KEY" not in os.environ: # raise Exception("Must provide an OPENAI_API_KEY as an env var.") @@ -77,4 +79,4 @@ def format_redis_conn_from_env(): REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml") schema_path = os.path.join(parent_dir, REDIS_SCHEMA) INDEX_SCHEMA = schema_path -NUM_RETRIEVED_RESULTS = int(os.getenv("NUM_RETRIEVED_RESULTS", 1)) \ No newline at end of file +NUM_RETRIEVED_RESULTS = int(os.getenv("NUM_RETRIEVED_RESULTS", 1)) diff --git a/comps/retrievers/multimodal_langchain/redis/requirements.txt b/comps/retrievers/multimodal_langchain/redis/requirements.txt index e6ceddd4ef..b7cef28669 100644 --- a/comps/retrievers/multimodal_langchain/redis/requirements.txt +++ b/comps/retrievers/multimodal_langchain/redis/requirements.txt @@ -7,5 +7,5 @@ opentelemetry-sdk prometheus-fastapi-instrumentator redis shortuuid +transformers uvicorn -transformers \ No newline at end of file diff --git a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py index 50d567f821..98d5bb7d57 100644 --- a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py +++ b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py @@ -4,9 +4,8 @@ import time from typing import Union -from comps.embeddings.multimodal_embeddings.bridgetower import BridgeTowerEmbedding from langchain_community.vectorstores import Redis -from multimodal_config import INDEX_NAME, REDIS_URL, REDIS_SCHEMA +from multimodal_config import INDEX_NAME, REDIS_SCHEMA, REDIS_URL from comps import ( EmbedMultimodalDoc, @@ -24,6 +23,8 @@ RetrievalResponse, RetrievalResponseData, ) +from comps.embeddings.multimodal_embeddings.bridgetower import BridgeTowerEmbedding + @register_microservice( name="opea_service@multimodal_retriever_redis", @@ -32,7 +33,6 @@ host="0.0.0.0", port=7000, ) - @register_statistics(names=["opea_service@multimodal_retriever_redis"]) def retrieve( input: Union[EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest] @@ -89,5 +89,7 @@ def retrieve( if __name__ == "__main__": embeddings = BridgeTowerEmbedding() - vector_db = Redis.from_existing_index(embedding=embeddings, schema=REDIS_SCHEMA, index_name=INDEX_NAME, redis_url=REDIS_URL) + vector_db = Redis.from_existing_index( + embedding=embeddings, schema=REDIS_SCHEMA, index_name=INDEX_NAME, redis_url=REDIS_URL + ) opea_microservices["opea_service@multimodal_retriever_redis"].start() From 542e6cdbd21cb7e31ffedebae00ec09fa770efed Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 3 Sep 2024 23:51:57 +0000 Subject: [PATCH 14/37] first update on gateaway Signed-off-by: sjagtap1803 --- comps/__init__.py | 1 + comps/cores/mega/constants.py | 1 + comps/cores/mega/gateway.py | 3 +++ 3 files changed, 5 insertions(+) diff --git a/comps/__init__.py b/comps/__init__.py index c58ae42fe1..597b279353 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -45,6 +45,7 @@ RetrievalToolGateway, FaqGenGateway, VisualQnAGateway, + MultimodalRAGQnAWithVideosGateway, ) # Telemetry diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index 10863c1495..aae54fd179 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -42,6 +42,7 @@ class MegaServiceEndpoint(Enum): CODE_TRANS = "/v1/codetrans" DOC_SUMMARY = "/v1/docsum" SEARCH_QNA = "/v1/searchqna" + MULTIMODAL_RAG_QNA_WITH_VIDEOS = "/v1/mmragvideoqna" TRANSLATION = "/v1/translation" RETRIEVALTOOL = "/v1/retrievaltool" FAQ_GEN = "/v1/faqgen" diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index cc8eaf5d22..a92c8aa8e6 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -568,3 +568,6 @@ def parser_input(data, TypeClass, key): response = result_dict[last_node] print("response is ", response) return response + +class MultimodalRAGQnAWithVideosGateway(Gateway): + \ No newline at end of file From 01b213847da0bd1e95a865e9b1fc5f7c5a58c2c7 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Wed, 4 Sep 2024 05:24:03 +0000 Subject: [PATCH 15/37] fix index not found Signed-off-by: sjagtap1803 --- .../retrievers/multimodal_langchain/redis/retriever_redis.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py index 98d5bb7d57..e4d9174aed 100644 --- a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py +++ b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py @@ -89,7 +89,5 @@ def retrieve( if __name__ == "__main__": embeddings = BridgeTowerEmbedding() - vector_db = Redis.from_existing_index( - embedding=embeddings, schema=REDIS_SCHEMA, index_name=INDEX_NAME, redis_url=REDIS_URL - ) + vector_db = Redis(embedding=embeddings, index_name=INDEX_NAME, redis_url=REDIS_URL) opea_microservices["opea_service@multimodal_retriever_redis"].start() From 4d353e80cb7438352577df2bd3e488d952ca4532 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Wed, 4 Sep 2024 09:35:15 +0000 Subject: [PATCH 16/37] add LVMSearchedMultimodalDoc Signed-off-by: sjagtap1803 --- comps/__init__.py | 1 + comps/cores/proto/docarray.py | 16 ++++++ comps/lvms/lvm.py | 41 ++++++++------ comps/lvms/lvm_tgi.py | 46 ++++----------- comps/lvms/requirements.txt | 1 + comps/lvms/template.py | 5 +- tests/test_lvms_llava.sh | 22 +++++++- tests/test_lvms_llava_mm_rag_on_videos.sh | 69 ----------------------- 8 files changed, 74 insertions(+), 127 deletions(-) delete mode 100644 tests/test_lvms_llava_mm_rag_on_videos.sh diff --git a/comps/__init__.py b/comps/__init__.py index c58ae42fe1..bc7c8fe805 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -13,6 +13,7 @@ LLMParamsDoc, SearchedDoc, SearchedMultimodalDoc, + LVMSearchedMultimodalDoc, RerankedDoc, TextDoc, RAGASParams, diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 587278ec13..9e5d4b1042 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -106,6 +106,22 @@ class Config: class SearchedMultimodalDoc(SearchedDoc): metadata: List[Dict[str, Any]] +class LVMSearchedMultimodalDoc(SearchedMultimodalDoc): + max_new_tokens: conint(ge=0, le=1024) = 512 + top_k: int = 10 + top_p: float = 0.95 + typical_p: float = 0.95 + temperature: float = 0.01 + streaming: bool = False + repetition_penalty: float = 1.03 + chat_template: Optional[str] = Field( + default=None, + description=( + "A template to use for this conversion. " + "If this is not passed, the model's default chat template will be " + "used instead. We recommend that the template contains {context} and {question} for multimodal-rag on videos." + ), + ) class GeneratedDoc(BaseDoc): text: str diff --git a/comps/lvms/lvm.py b/comps/lvms/lvm.py index 41ee79e25f..67e7b10486 100644 --- a/comps/lvms/lvm.py +++ b/comps/lvms/lvm.py @@ -9,11 +9,12 @@ import requests from template import ChatTemplate +from langchain_core.prompts import PromptTemplate from comps import ( CustomLogger, LVMDoc, - SearchedMultimodalDoc, + LVMSearchedMultimodalDoc, ServiceType, TextDoc, opea_microservices, @@ -34,35 +35,40 @@ port=9399, ) @register_statistics(names=["opea_service@lvm"]) -async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: +async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc: if logflag: logger.info(request) start = time.time() - if isinstance(request, SearchedMultimodalDoc): + if isinstance(request, LVMSearchedMultimodalDoc): if logflag: - logger.info("[SearchedMultimodalDoc ] input from retriever microservice") + logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice") retrieved_metadatas = request.metadata - if application == "MM_RAG_ON_VIDEOS": - img_b64_str = retrieved_metadatas[0]["b64_img_str"] - initial_query = request.initial_query - prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, retrieved_metadatas) - # use default lvm parameters for inferencing - new_input = LVMDoc(image=img_b64_str, prompt=prompt) - max_new_tokens = new_input.max_new_tokens - if logflag: + img_b64_str = retrieved_metadatas[0]["b64_img_str"] + initial_query = request.initial_query + context = retrieved_metadatas[0]["transcript_for_inference"] + prompt = initial_query + if request.chat_template is None: + prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context) + else: + prompt_template = PromptTemplate.from_template(request.chat_template) + input_variables = prompt_template.input_variables + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=initial_query, context=context) + else: logger.info( - f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}" + f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" ) - else: - raise NotImplementedError( - f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!" + max_new_tokens = request.max_new_tokens + if logflag: + logger.info( + f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}" ) else: img_b64_str = request.image prompt = request.prompt max_new_tokens = request.max_new_tokens - + inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens} # forward to the LLaVA server response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}) @@ -76,7 +82,6 @@ async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: if __name__ == "__main__": lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") - application = os.getenv("APPLICATION", None) logger.info("[LVM] LVM initialized.") opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py index e8ea86cf43..09ff229f61 100644 --- a/comps/lvms/lvm_tgi.py +++ b/comps/lvms/lvm_tgi.py @@ -3,16 +3,13 @@ import os import time -from typing import Union from fastapi.responses import StreamingResponse from huggingface_hub import AsyncInferenceClient -from template import ChatTemplate from comps import ( CustomLogger, LVMDoc, - SearchedMultimodalDoc, ServiceType, TextDoc, opea_microservices, @@ -35,41 +32,19 @@ output_datatype=TextDoc, ) @register_statistics(names=["opea_service@lvm_tgi"]) -async def lvm(request: Union[LVMDoc, SearchedMultimodalDoc]) -> TextDoc: +async def lvm(request: LVMDoc): if logflag: logger.info(request) start = time.time() - if isinstance(request, SearchedMultimodalDoc): - # This is to construct LVMDoc from SearchedMultimodalDoc input from retriever microservice - # for Multimodal RAG on Videos application - if logflag: - logger.info("[SearchedMultimodalDoc ] input from retriever microservice") - retrieved_metadatas = request.metadata - if application == "MM_RAG_ON_VIDEOS": - img_b64_str = retrieved_metadatas[0]["b64_img_str"] - initial_query = request.initial_query - prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, retrieved_metadatas) - # use default lvm parameters for inferencing - new_request = LVMDoc(image=img_b64_str, prompt=prompt) - if logflag: - logger.info( - f"prompt generated for [SearchedMultimodalDoc ] input from retriever microservice: {prompt}" - ) - else: - raise NotImplementedError( - f"For application {application}: it has NOT implemented SearchedMultimodalDoc input from retriever microservice!" - ) - else: - new_request = request stream_gen_time = [] - img_b64_str = new_request.image - prompt = new_request.prompt - max_new_tokens = new_request.max_new_tokens - streaming = new_request.streaming - repetition_penalty = new_request.repetition_penalty - temperature = new_request.temperature - top_k = new_request.top_k - top_p = new_request.top_p + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens + streaming = request.streaming + repetition_penalty = request.repetition_penalty + temperature = request.temperature + top_k = request.top_k + top_p = request.top_p image = f"data:image/png;base64,{img_b64_str}" image_prompt = f"![]({image})\n{prompt}\nASSISTANT:" @@ -117,7 +92,6 @@ async def stream_generator(): if __name__ == "__main__": lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") - application = os.getenv("APPLICATION", None) lvm_client = AsyncInferenceClient(lvm_endpoint) logger.info("[LVM] LVM initialized.") - opea_microservices["opea_service@lvm_tgi"].start() + opea_microservices["opea_service@lvm_tgi"].start() \ No newline at end of file diff --git a/comps/lvms/requirements.txt b/comps/lvms/requirements.txt index 556dfb0c15..e7fc24fc96 100644 --- a/comps/lvms/requirements.txt +++ b/comps/lvms/requirements.txt @@ -11,3 +11,4 @@ pydantic==2.7.2 pydub shortuuid uvicorn +langchain-core diff --git a/comps/lvms/template.py b/comps/lvms/template.py index 91648edabf..71c2b26679 100644 --- a/comps/lvms/template.py +++ b/comps/lvms/template.py @@ -5,7 +5,6 @@ class ChatTemplate: @staticmethod - def generate_multimodal_rag_on_videos_prompt(initial_query: str, retrieved_metadatas): - context = retrieved_metadatas[0]["transcript_for_inference"] + def generate_multimodal_rag_on_videos_prompt(question: str, context: str): template = """The transcript associated with the image is '{context}'. {question}""" - return template.format(context=context, question=initial_query) + return template.format(context=context, question=question) diff --git a/tests/test_lvms_llava.sh b/tests/test_lvms_llava.sh index 08f138e2f6..aeae888663 100644 --- a/tests/test_lvms_llava.sh +++ b/tests/test_lvms_llava.sh @@ -30,7 +30,7 @@ function start_service() { unset http_proxy docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm:comps - sleep 8m + sleep 3m } function validate_microservice() { @@ -45,6 +45,26 @@ function validate_microservice() { exit 1 fi + result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image"}]}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-lvm-llava + docker logs test-comps-lvm + exit 1 + fi + + result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-lvm-llava + docker logs test-comps-lvm + exit 1 + fi + } function stop_docker() { diff --git a/tests/test_lvms_llava_mm_rag_on_videos.sh b/tests/test_lvms_llava_mm_rag_on_videos.sh deleted file mode 100644 index 5d3fdc2113..0000000000 --- a/tests/test_lvms_llava_mm_rag_on_videos.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/llava:comps -f comps/lvms/llava/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/llava built fail" - exit 1 - else - echo "opea/llava built successful" - fi - docker build --no-cache -t opea/lvm:comps -f comps/lvms/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/lvm built fail" - exit 1 - else - echo "opea/lvm built successful" - fi -} - -function start_service() { - unset http_proxy - docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps - docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e APPLICATION="MM_RAG_ON_VIDEOS" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm:comps - sleep 8m -} - -function validate_microservice() { - - result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image"}]}' -H 'Content-Type: application/json') - if [[ $result == *"yellow"* ]]; then - echo "Result correct." - else - echo "Result wrong." - docker logs test-comps-lvm-llava - docker logs test-comps-lvm - exit 1 - fi - -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-lvm*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main From ac57ca083970512b9bcbf0e7700e243337fec625 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 09:36:17 +0000 Subject: [PATCH 17/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/cores/proto/docarray.py | 2 ++ comps/lvms/lvm.py | 8 +++----- comps/lvms/lvm_tgi.py | 2 +- comps/lvms/requirements.txt | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 9e5d4b1042..4f915885d6 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -106,6 +106,7 @@ class Config: class SearchedMultimodalDoc(SearchedDoc): metadata: List[Dict[str, Any]] + class LVMSearchedMultimodalDoc(SearchedMultimodalDoc): max_new_tokens: conint(ge=0, le=1024) = 512 top_k: int = 10 @@ -123,6 +124,7 @@ class LVMSearchedMultimodalDoc(SearchedMultimodalDoc): ), ) + class GeneratedDoc(BaseDoc): text: str prompt: str diff --git a/comps/lvms/lvm.py b/comps/lvms/lvm.py index 67e7b10486..d84e366377 100644 --- a/comps/lvms/lvm.py +++ b/comps/lvms/lvm.py @@ -8,8 +8,8 @@ from typing import Union import requests -from template import ChatTemplate from langchain_core.prompts import PromptTemplate +from template import ChatTemplate from comps import ( CustomLogger, @@ -60,15 +60,13 @@ async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc: ) max_new_tokens = request.max_new_tokens if logflag: - logger.info( - f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}" - ) + logger.info(f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}") else: img_b64_str = request.image prompt = request.prompt max_new_tokens = request.max_new_tokens - + inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens} # forward to the LLaVA server response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}) diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py index 09ff229f61..9492b4eafc 100644 --- a/comps/lvms/lvm_tgi.py +++ b/comps/lvms/lvm_tgi.py @@ -94,4 +94,4 @@ async def stream_generator(): lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") lvm_client = AsyncInferenceClient(lvm_endpoint) logger.info("[LVM] LVM initialized.") - opea_microservices["opea_service@lvm_tgi"].start() \ No newline at end of file + opea_microservices["opea_service@lvm_tgi"].start() diff --git a/comps/lvms/requirements.txt b/comps/lvms/requirements.txt index e7fc24fc96..be1c23a6dc 100644 --- a/comps/lvms/requirements.txt +++ b/comps/lvms/requirements.txt @@ -2,6 +2,7 @@ datasets docarray[full] fastapi huggingface_hub +langchain-core opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk @@ -11,4 +12,3 @@ pydantic==2.7.2 pydub shortuuid uvicorn -langchain-core From 2c1f0ba1e95943d51c67725f657553b98c0c77dc Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Wed, 4 Sep 2024 10:27:17 +0000 Subject: [PATCH 18/37] implement gateway for MultimodalRagQnAWithVideos Signed-off-by: siddhivelankar23 --- comps/cores/mega/gateway.py | 51 ++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index a92c8aa8e6..d0caae1af0 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -570,4 +570,53 @@ def parser_input(data, TypeClass, key): return response class MultimodalRAGQnAWithVideosGateway(Gateway): - \ No newline at end of file + def __init__(self, megaservice, host="0.0.0.0", port=9999): + super().__init__( + megaservice, host, port, str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), ChatCompletionRequest, ChatCompletionResponse + ) + + async def handle_request(self, request: Request): + data = await request.json() + stream_opt = bool(data.get("stream", False)) + if stream_opt == True: + print( + f"[ MultimodalRAGQnAWithVideosGateway ] stream=True not used, this has not support streaming yet!" + ) + stream_opt = False + chat_request = ChatCompletionRequest.model_validate(data) + # Multimodal RAG QnA With Videos has not yet accepts image as input during QnA. + prompt = self._handle_message(chat_request.messages) + parameters = LLMParams( + max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, + top_k=chat_request.top_k if chat_request.top_k else 10, + top_p=chat_request.top_p if chat_request.top_p else 0.95, + temperature=chat_request.temperature if chat_request.temperature else 0.01, + repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03, + streaming=stream_opt, + chat_template=chat_request.chat_template if chat_request.chat_template else None, + ) + result_dict, runtime_graph = await self.megaservice.schedule( + initial_inputs={"text": prompt}, llm_parameters=parameters + ) + for node, response in result_dict.items(): + # the last microservice in this megaservice is LVM. + # checking if LVM returns StreamingResponse + # Currently, LVM with LLAVA has not yet supported streaming. + # @TODO: Will need to test this once LVM with LLAVA supports streaming + if ( isinstance(response, StreamingResponse) + and node == runtime_graph.all_leaves()[-1] + and self.megaservice.services[node].service_type == ServiceType.LVM + ): + return response + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node]["text"] + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=response), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="multimodalragqnawithvideos", choices=choices, usage=usage) \ No newline at end of file From dc631db31bf8a1dc1468bf3cceb79d640fa77033 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Wed, 4 Sep 2024 23:44:32 +0000 Subject: [PATCH 19/37] remove INDEX_SCHEMA Signed-off-by: siddhivelankar23 --- .../multimodal_langchain/redis/multimodal_config.py | 5 ----- .../retrievers/multimodal_langchain/redis/retriever_redis.py | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py index f9345c1dee..211851b7e9 100644 --- a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py +++ b/comps/retrievers/multimodal_langchain/redis/multimodal_config.py @@ -75,8 +75,3 @@ def format_redis_conn_from_env(): # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "test-index") - -REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml") -schema_path = os.path.join(parent_dir, REDIS_SCHEMA) -INDEX_SCHEMA = schema_path -NUM_RETRIEVED_RESULTS = int(os.getenv("NUM_RETRIEVED_RESULTS", 1)) diff --git a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py index e4d9174aed..f998ee28eb 100644 --- a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py +++ b/comps/retrievers/multimodal_langchain/redis/retriever_redis.py @@ -5,7 +5,7 @@ from typing import Union from langchain_community.vectorstores import Redis -from multimodal_config import INDEX_NAME, REDIS_SCHEMA, REDIS_URL +from multimodal_config import INDEX_NAME, REDIS_URL from comps import ( EmbedMultimodalDoc, From 5a3c764910498f41cfc95900d4271d98f7a844e4 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Thu, 5 Sep 2024 06:03:50 +0000 Subject: [PATCH 20/37] update MultimodalRAGQnAWithVideosGateway with 2 megaservices Signed-off-by: sjagtap1803 --- comps/cores/mega/gateway.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index d0caae1af0..5b7a9bd858 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -570,10 +570,11 @@ def parser_input(data, TypeClass, key): return response class MultimodalRAGQnAWithVideosGateway(Gateway): - def __init__(self, megaservice, host="0.0.0.0", port=9999): + def __init__(self, multimodal_rag_megaservice, lvm_megaservice, host="0.0.0.0", port=9999): super().__init__( - megaservice, host, port, str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), ChatCompletionRequest, ChatCompletionResponse + multimodal_rag_megaservice, host, port, str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), ChatCompletionRequest, ChatCompletionResponse ) + self.lvm_megaservice = lvm_megaservice async def handle_request(self, request: Request): data = await request.json() @@ -585,7 +586,16 @@ async def handle_request(self, request: Request): stream_opt = False chat_request = ChatCompletionRequest.model_validate(data) # Multimodal RAG QnA With Videos has not yet accepts image as input during QnA. - prompt = self._handle_message(chat_request.messages) + prompt_and_image = self._handle_message(chat_request.messages) + if isinstance(prompt_and_image, tuple): + print(f"This request include image, thus it is a follow-up query. Using lvm megaservice") + prompt, images = prompt_and_image + cur_megaservice = self.lvm_megaservice + else: + print(f"This is the first query, requiring multimodal retrieval. Using multimodal rag megaservice") + prompt = prompt_and_image + cur_megaservice = self.megaservice + parameters = LLMParams( max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, @@ -595,7 +605,7 @@ async def handle_request(self, request: Request): streaming=stream_opt, chat_template=chat_request.chat_template if chat_request.chat_template else None, ) - result_dict, runtime_graph = await self.megaservice.schedule( + result_dict, runtime_graph = await cur_megaservice.schedule( initial_inputs={"text": prompt}, llm_parameters=parameters ) for node, response in result_dict.items(): From 5842ed1a77f55d9fc6553d47773d7ab4ecdb9391 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 06:18:49 +0000 Subject: [PATCH 21/37] revise folder structure to comps/retrievers/langchain/redis_multimodal Signed-off-by: siddhivelankar23 --- .../redis_multimodal}/README.md | 4 ++-- .../redis_multimodal}/__init__.py | 0 .../redis_multimodal}/docker/Dockerfile | 4 ++-- .../docker/docker_compose_retriever.yaml | 0 .../redis_multimodal}/multimodal_config.py | 0 .../redis_multimodal}/requirements.txt | 0 .../redis_multimodal}/retriever_redis.py | 0 .../redis/redis_schema.yml | 19 ------------------- ..._retrievers_langchain_multimodal_redis.sh} | 2 +- 9 files changed, 5 insertions(+), 24 deletions(-) rename comps/retrievers/{multimodal_langchain/redis => langchain/redis_multimodal}/README.md (97%) rename comps/retrievers/{multimodal_langchain/redis => langchain/redis_multimodal}/__init__.py (100%) rename comps/retrievers/{multimodal_langchain/redis => langchain/redis_multimodal}/docker/Dockerfile (79%) rename comps/retrievers/{multimodal_langchain/redis => langchain/redis_multimodal}/docker/docker_compose_retriever.yaml (100%) rename comps/retrievers/{multimodal_langchain/redis => langchain/redis_multimodal}/multimodal_config.py (100%) rename comps/retrievers/{multimodal_langchain/redis => langchain/redis_multimodal}/requirements.txt (100%) rename comps/retrievers/{multimodal_langchain/redis => langchain/redis_multimodal}/retriever_redis.py (100%) delete mode 100644 comps/retrievers/multimodal_langchain/redis/redis_schema.yml rename tests/{test_retrievers_multimodal_langchain_redis.sh => test_retrievers_langchain_multimodal_redis.sh} (97%) diff --git a/comps/retrievers/multimodal_langchain/redis/README.md b/comps/retrievers/langchain/redis_multimodal/README.md similarity index 97% rename from comps/retrievers/multimodal_langchain/redis/README.md rename to comps/retrievers/langchain/redis_multimodal/README.md index 2ba6d3296f..edc2523a31 100644 --- a/comps/retrievers/multimodal_langchain/redis/README.md +++ b/comps/retrievers/langchain/redis_multimodal/README.md @@ -34,7 +34,7 @@ Upload a video or images using the dataprep microservice, instructions can be fo ### 1.4 Start Retriever Service ```bash -python langchain_multimodal/retriever_redis.py +python retriever_redis.py ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -51,7 +51,7 @@ export INDEX_NAME=${your_index_name} ```bash cd ../../../../ -docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal_langchain/redis/docker/Dockerfile . +docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis_multimodal/docker/Dockerfile . ``` To start a docker container, you have two options: diff --git a/comps/retrievers/multimodal_langchain/redis/__init__.py b/comps/retrievers/langchain/redis_multimodal/__init__.py similarity index 100% rename from comps/retrievers/multimodal_langchain/redis/__init__.py rename to comps/retrievers/langchain/redis_multimodal/__init__.py diff --git a/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile b/comps/retrievers/langchain/redis_multimodal/docker/Dockerfile similarity index 79% rename from comps/retrievers/multimodal_langchain/redis/docker/Dockerfile rename to comps/retrievers/langchain/redis_multimodal/docker/Dockerfile index 33077c3037..f211005b94 100644 --- a/comps/retrievers/multimodal_langchain/redis/docker/Dockerfile +++ b/comps/retrievers/langchain/redis_multimodal/docker/Dockerfile @@ -20,10 +20,10 @@ USER user RUN pip install --no-cache-dir --upgrade pip && \ if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/multimodal_langchain/redis/requirements.txt + pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/redis_multimodal/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/retrievers/multimodal_langchain/redis +WORKDIR /home/user/comps/retrievers/langchain/redis_multimodal ENTRYPOINT ["python", "retriever_redis.py"] diff --git a/comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml b/comps/retrievers/langchain/redis_multimodal/docker/docker_compose_retriever.yaml similarity index 100% rename from comps/retrievers/multimodal_langchain/redis/docker/docker_compose_retriever.yaml rename to comps/retrievers/langchain/redis_multimodal/docker/docker_compose_retriever.yaml diff --git a/comps/retrievers/multimodal_langchain/redis/multimodal_config.py b/comps/retrievers/langchain/redis_multimodal/multimodal_config.py similarity index 100% rename from comps/retrievers/multimodal_langchain/redis/multimodal_config.py rename to comps/retrievers/langchain/redis_multimodal/multimodal_config.py diff --git a/comps/retrievers/multimodal_langchain/redis/requirements.txt b/comps/retrievers/langchain/redis_multimodal/requirements.txt similarity index 100% rename from comps/retrievers/multimodal_langchain/redis/requirements.txt rename to comps/retrievers/langchain/redis_multimodal/requirements.txt diff --git a/comps/retrievers/multimodal_langchain/redis/retriever_redis.py b/comps/retrievers/langchain/redis_multimodal/retriever_redis.py similarity index 100% rename from comps/retrievers/multimodal_langchain/redis/retriever_redis.py rename to comps/retrievers/langchain/redis_multimodal/retriever_redis.py diff --git a/comps/retrievers/multimodal_langchain/redis/redis_schema.yml b/comps/retrievers/multimodal_langchain/redis/redis_schema.yml deleted file mode 100644 index 32f4a79ae4..0000000000 --- a/comps/retrievers/multimodal_langchain/redis/redis_schema.yml +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -text: - - name: content - - name: b64_img_str - - name: video_id - - name: source_video - - name: embedding_type - - name: title - - name: transcript_for_inference -numeric: - - name: time_of_frame_ms -vector: - - name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 512 - distance_metric: COSINE diff --git a/tests/test_retrievers_multimodal_langchain_redis.sh b/tests/test_retrievers_langchain_multimodal_redis.sh similarity index 97% rename from tests/test_retrievers_multimodal_langchain_redis.sh rename to tests/test_retrievers_langchain_multimodal_redis.sh index 504ec085c0..496f5bf199 100644 --- a/tests/test_retrievers_multimodal_langchain_redis.sh +++ b/tests/test_retrievers_langchain_multimodal_redis.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/multimodal-retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/multimodal_langchain/redis/docker/Dockerfile . + docker build --no-cache -t opea/multimodal-retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis_multimodal/docker/Dockerfile . if [ $? -ne 0 ]; then echo "opea/multimodal-retriever-redis built fail" exit 1 From afb38401a0f8c0fb99bec1489f9eed904a0c3718 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 07:43:10 +0000 Subject: [PATCH 22/37] update test Signed-off-by: siddhivelankar23 --- tests/test_retrievers_langchain_multimodal_redis.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_retrievers_langchain_multimodal_redis.sh b/tests/test_retrievers_langchain_multimodal_redis.sh index 496f5bf199..aa86d4ace0 100644 --- a/tests/test_retrievers_langchain_multimodal_redis.sh +++ b/tests/test_retrievers_langchain_multimodal_redis.sh @@ -31,7 +31,7 @@ function start_service() { unset http_proxy docker run -d --name="test-comps-multimodal-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:comps - sleep 3m + sleep 5m } function validate_microservice() { From 426ed04e8a44cc2e713c9e653184349fc39ba8c9 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 07:59:47 +0000 Subject: [PATCH 23/37] add unittest for multimodalrag_qna_with_videos_gateway Signed-off-by: siddhivelankar23 --- ...t_multimodalrag_qna_with_videos_gateway.py | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py new file mode 100644 index 0000000000..e023de09f7 --- /dev/null +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -0,0 +1,91 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import unittest + +from comps import Gateway, ServiceOrchestrator, TextDoc, opea_microservices, register_microservice +from comps import ( + MultimodalDoc, + EmbedDoc, + EmbedMultimodalDoc, + SearchedMultimodalDoc, + LVMSearchedMultimodalDoc, + MultimodalRAGQnAWithVideosGateway + ) + + +@register_microservice(name="mm_embedding", host="0.0.0.0", port=8083, endpoint="/v1/mm_embedding") +async def mm_embedding_add(request: MultimodalDoc) -> EmbedDoc: + req = request.model_dump_json() + req_dict = json.loads(req) + print('req_dict_embedding', req_dict) + text = req_dict["text"] + res = {} + res["text"] = text + res["embedding"] = [0.12, 0.45] + return res + + +@register_microservice(name="mm_retriever", host="0.0.0.0", port=8084, endpoint="/v1/mm_retriever") +async def mm_retriever_add(request: EmbedMultimodalDoc) -> SearchedMultimodalDoc: + req = request.model_dump_json() + req_dict = json.loads(req) + print('req_dict_retriever', req_dict) + text = req_dict["text"] + res = {} + res['retrieved_docs'] = [] + res['initial_query'] = text + res['top_n'] = 1 + res['metadata'] = [{ + "b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", + "transcript_for_inference": "yellow image" + }] + res['chat_template'] = "The caption of the image is: '{context}'. {question}" + return res + +@register_microservice(name="lvm", host="0.0.0.0", port=8085, endpoint="/v1/lvm") +async def lvm_add(request: LVMSearchedMultimodalDoc) -> TextDoc: + req = request.model_dump_json() + req_dict = json.loads(req) + print('req_dict_lvm', req_dict) + text = req_dict["initial_query"] + text += "opea project!" + res = {} + res['text'] = text + return res + + +class TestServiceOrchestrator(unittest.IsolatedAsyncioTestCase): + def setUp(self): + self.mm_embedding = opea_microservices["mm_embedding"] + self.mm_retriever = opea_microservices["mm_retriever"] + self.lvm = opea_microservices["lvm"] + self.mm_embedding.start() + self.mm_retriever.start() + self.lvm.start() + + self.service_builder = ServiceOrchestrator() + + self.service_builder.add(opea_microservices["mm_embedding"]).add(opea_microservices["mm_retriever"]).add(opea_microservices["lvm"]) + self.service_builder.flow_to(self.mm_embedding, self.mm_retriever) + self.service_builder.flow_to(self.mm_retriever, self.lvm) + + self.follow_up_query_service_builder = ServiceOrchestrator() + self.follow_up_query_service_builder.add(self.lvm) + + self.gateway = MultimodalRAGQnAWithVideosGateway(self.service_builder, self.follow_up_query_service_builder, port=9898) + + def tearDown(self): + self.mm_embedding.stop() + self.mm_retriever.stop() + self.lvm.stop() + self.gateway.stop() + + async def test_schedule(self): + result_dict, _ = await self.service_builder.schedule(initial_inputs={"text": "hello, "}) + self.assertEqual(result_dict[self.lvm.name]["text"], "hello, opea project!") + + +if __name__ == "__main__": + unittest.main() From 030120ed38f7592df968977bf709faad3833ec52 Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Thu, 5 Sep 2024 08:22:37 +0000 Subject: [PATCH 24/37] update test mmrag qna with videos Signed-off-by: Tiep Le --- ...t_multimodalrag_qna_with_videos_gateway.py | 74 +++++++++++-------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index e023de09f7..0b771b9edb 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -3,9 +3,10 @@ import json import unittest - -from comps import Gateway, ServiceOrchestrator, TextDoc, opea_microservices, register_microservice +from typing import Union +from comps import ServiceOrchestrator, TextDoc, opea_microservices, register_microservice from comps import ( + LVMDoc, MultimodalDoc, EmbedDoc, EmbedMultimodalDoc, @@ -45,11 +46,16 @@ async def mm_retriever_add(request: EmbedMultimodalDoc) -> SearchedMultimodalDoc return res @register_microservice(name="lvm", host="0.0.0.0", port=8085, endpoint="/v1/lvm") -async def lvm_add(request: LVMSearchedMultimodalDoc) -> TextDoc: +async def lvm_add(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc: req = request.model_dump_json() req_dict = json.loads(req) print('req_dict_lvm', req_dict) - text = req_dict["initial_query"] + if isinstance(request, LVMSearchedMultimodalDoc): + print("request is the output of multimodal retriever") + text = req_dict["initial_query"] + else: + print("request is from user.") + text = req_dict["prompt"] text += "opea project!" res = {} res['text'] = text @@ -57,35 +63,43 @@ async def lvm_add(request: LVMSearchedMultimodalDoc) -> TextDoc: class TestServiceOrchestrator(unittest.IsolatedAsyncioTestCase): - def setUp(self): - self.mm_embedding = opea_microservices["mm_embedding"] - self.mm_retriever = opea_microservices["mm_retriever"] - self.lvm = opea_microservices["lvm"] - self.mm_embedding.start() - self.mm_retriever.start() - self.lvm.start() - - self.service_builder = ServiceOrchestrator() - - self.service_builder.add(opea_microservices["mm_embedding"]).add(opea_microservices["mm_retriever"]).add(opea_microservices["lvm"]) - self.service_builder.flow_to(self.mm_embedding, self.mm_retriever) - self.service_builder.flow_to(self.mm_retriever, self.lvm) - - self.follow_up_query_service_builder = ServiceOrchestrator() - self.follow_up_query_service_builder.add(self.lvm) - - self.gateway = MultimodalRAGQnAWithVideosGateway(self.service_builder, self.follow_up_query_service_builder, port=9898) - - def tearDown(self): - self.mm_embedding.stop() - self.mm_retriever.stop() - self.lvm.stop() - self.gateway.stop() - - async def test_schedule(self): + @classmethod + def setUpClass(cls): + cls.mm_embedding = opea_microservices["mm_embedding"] + cls.mm_retriever = opea_microservices["mm_retriever"] + cls.lvm = opea_microservices["lvm"] + cls.mm_embedding.start() + cls.mm_retriever.start() + cls.lvm.start() + + cls.service_builder = ServiceOrchestrator() + + cls.service_builder.add(opea_microservices["mm_embedding"]).add(opea_microservices["mm_retriever"]).add(opea_microservices["lvm"]) + cls.service_builder.flow_to(cls.mm_embedding, cls.mm_retriever) + cls.service_builder.flow_to(cls.mm_retriever, cls.lvm) + + cls.follow_up_query_service_builder = ServiceOrchestrator() + cls.follow_up_query_service_builder.add(cls.lvm) + + cls.gateway = MultimodalRAGQnAWithVideosGateway(cls.service_builder, cls.follow_up_query_service_builder, port=9898) + + @classmethod + def tearDownClass(cls): + cls.mm_embedding.stop() + cls.mm_retriever.stop() + cls.lvm.stop() + cls.gateway.stop() + + async def test_service_builder_schedule(self): result_dict, _ = await self.service_builder.schedule(initial_inputs={"text": "hello, "}) self.assertEqual(result_dict[self.lvm.name]["text"], "hello, opea project!") + async def test_follow_up_query_service_builder_schedule(self): + result_dict, _ = await self.follow_up_query_service_builder.schedule(initial_inputs={"prompt": "chao, ", "image" : "some image"}) + print(result_dict) + self.assertEqual(result_dict[self.lvm.name]["text"], "chao, opea project!") + + if __name__ == "__main__": unittest.main() From 1a49d88500b578d28e68e806eda79cd71e5f1fa2 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 08:39:32 +0000 Subject: [PATCH 25/37] change port of redis to resolve CI test Signed-off-by: siddhivelankar23 --- tests/test_retrievers_langchain_multimodal_redis.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_retrievers_langchain_multimodal_redis.sh b/tests/test_retrievers_langchain_multimodal_redis.sh index aa86d4ace0..8823684ad4 100644 --- a/tests/test_retrievers_langchain_multimodal_redis.sh +++ b/tests/test_retrievers_langchain_multimodal_redis.sh @@ -21,11 +21,11 @@ function build_docker_images() { function start_service() { # redis - docker run -d --name test-comps-multimodal-retriever-redis-vector-db -p 5010:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9 + docker run -d --name test-comps-multimodal-retriever-redis-vector-db -p 5688:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9 sleep 10s # redis retriever - export REDIS_URL="redis://${ip_address}:5010" + export REDIS_URL="redis://${ip_address}:5688" export INDEX_NAME="rag-redis" retriever_port=5009 unset http_proxy From 44194bd8e1a35609b3eb5ecbce54a2bb9c66b954 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 08:50:14 +0000 Subject: [PATCH 26/37] update test Signed-off-by: siddhivelankar23 --- tests/test_retrievers_langchain_multimodal_redis.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_retrievers_langchain_multimodal_redis.sh b/tests/test_retrievers_langchain_multimodal_redis.sh index 8823684ad4..f2de9cc9ea 100644 --- a/tests/test_retrievers_langchain_multimodal_redis.sh +++ b/tests/test_retrievers_langchain_multimodal_redis.sh @@ -21,11 +21,11 @@ function build_docker_images() { function start_service() { # redis - docker run -d --name test-comps-multimodal-retriever-redis-vector-db -p 5688:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9 + docker run -d --name test-comps-multimodal-retriever-redis-vector-db -p 5689:6379 -p 5011:8001 -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy redis/redis-stack:7.2.0-v9 sleep 10s # redis retriever - export REDIS_URL="redis://${ip_address}:5688" + export REDIS_URL="redis://${ip_address}:5689" export INDEX_NAME="rag-redis" retriever_port=5009 unset http_proxy From 9c485e966e33504b63541fe49ef445836fb1f9f4 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 10:43:47 +0000 Subject: [PATCH 27/37] update lvms test Signed-off-by: siddhivelankar23 --- tests/test_lvms_llava.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_lvms_llava.sh b/tests/test_lvms_llava.sh index aeae888663..0f6ec26023 100644 --- a/tests/test_lvms_llava.sh +++ b/tests/test_lvms_llava.sh @@ -30,7 +30,7 @@ function start_service() { unset http_proxy docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm:comps - sleep 3m + sleep 8m } function validate_microservice() { From 1d8598e005caf89563f06b9e2f11e647210ab1d2 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 11:06:17 +0000 Subject: [PATCH 28/37] update test Signed-off-by: siddhivelankar23 --- comps/cores/mega/gateway.py | 8 +++++-- ...t_multimodalrag_qna_with_videos_gateway.py | 22 +++++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index 959a073984..db0174ad93 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -108,6 +108,7 @@ def _handle_message(self, messages): messages_dict[msg_role] = message["content"] else: raise ValueError(f"Unknown role: {msg_role}") + print('::::::::', messages_dict) if system_prompt: prompt = system_prompt + "\n" for role, message in messages_dict.items(): @@ -585,10 +586,11 @@ def parser_input(data, TypeClass, key): class MultimodalRAGQnAWithVideosGateway(Gateway): def __init__(self, multimodal_rag_megaservice, lvm_megaservice, host="0.0.0.0", port=9999): + self.lvm_megaservice = lvm_megaservice super().__init__( multimodal_rag_megaservice, host, port, str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), ChatCompletionRequest, ChatCompletionResponse ) - self.lvm_megaservice = lvm_megaservice + async def handle_request(self, request: Request): data = await request.json() @@ -605,10 +607,12 @@ async def handle_request(self, request: Request): print(f"This request include image, thus it is a follow-up query. Using lvm megaservice") prompt, images = prompt_and_image cur_megaservice = self.lvm_megaservice + initial_inputs={"prompt": prompt, "image": images[0]} else: print(f"This is the first query, requiring multimodal retrieval. Using multimodal rag megaservice") prompt = prompt_and_image cur_megaservice = self.megaservice + initial_inputs={"text": prompt} parameters = LLMParams( max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, @@ -620,7 +624,7 @@ async def handle_request(self, request: Request): chat_template=chat_request.chat_template if chat_request.chat_template else None, ) result_dict, runtime_graph = await cur_megaservice.schedule( - initial_inputs={"text": prompt}, llm_parameters=parameters + initial_inputs=initial_inputs, llm_parameters=parameters ) for node, response in result_dict.items(): # the last microservice in this megaservice is LVM. diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index 0b771b9edb..eb4109a65d 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -14,7 +14,7 @@ LVMSearchedMultimodalDoc, MultimodalRAGQnAWithVideosGateway ) - +import requests @register_microservice(name="mm_embedding", host="0.0.0.0", port=8083, endpoint="/v1/mm_embedding") async def mm_embedding_add(request: MultimodalDoc) -> EmbedDoc: @@ -90,16 +90,20 @@ def tearDownClass(cls): cls.lvm.stop() cls.gateway.stop() - async def test_service_builder_schedule(self): - result_dict, _ = await self.service_builder.schedule(initial_inputs={"text": "hello, "}) - self.assertEqual(result_dict[self.lvm.name]["text"], "hello, opea project!") + # async def test_service_builder_schedule(self): + # result_dict, _ = await self.service_builder.schedule(initial_inputs={"text": "hello, "}) + # self.assertEqual(result_dict[self.lvm.name]["text"], "hello, opea project!") - async def test_follow_up_query_service_builder_schedule(self): - result_dict, _ = await self.follow_up_query_service_builder.schedule(initial_inputs={"prompt": "chao, ", "image" : "some image"}) - print(result_dict) - self.assertEqual(result_dict[self.lvm.name]["text"], "chao, opea project!") + # async def test_follow_up_query_service_builder_schedule(self): + # result_dict, _ = await self.follow_up_query_service_builder.schedule(initial_inputs={"prompt": "chao, ", "image" : "some image"}) + # print(result_dict) + # self.assertEqual(result_dict[self.lvm.name]["text"], "chao, opea project!") - + def test_multimodal_rag_qna_with_videos_gateway(self): + json_data = {"messages" : "hello, "} + response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) + response = response.json() + self.assertEqual(response['choices'][-1]['message']['content'], "hello, opea project!") if __name__ == "__main__": unittest.main() From b757b39a1d22bf3584e5636c4406c2602a3a90e3 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 11:21:02 +0000 Subject: [PATCH 29/37] update test Signed-off-by: siddhivelankar23 --- comps/cores/mega/gateway.py | 2 +- ...t_multimodalrag_qna_with_videos_gateway.py | 34 ++++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index db0174ad93..bb7cb51f51 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -613,7 +613,7 @@ async def handle_request(self, request: Request): prompt = prompt_and_image cur_megaservice = self.megaservice initial_inputs={"text": prompt} - + print("HELPPPPPP", prompt) parameters = LLMParams( max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index eb4109a65d..04fbd22743 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -20,7 +20,6 @@ async def mm_embedding_add(request: MultimodalDoc) -> EmbedDoc: req = request.model_dump_json() req_dict = json.loads(req) - print('req_dict_embedding', req_dict) text = req_dict["text"] res = {} res["text"] = text @@ -32,7 +31,6 @@ async def mm_embedding_add(request: MultimodalDoc) -> EmbedDoc: async def mm_retriever_add(request: EmbedMultimodalDoc) -> SearchedMultimodalDoc: req = request.model_dump_json() req_dict = json.loads(req) - print('req_dict_retriever', req_dict) text = req_dict["text"] res = {} res['retrieved_docs'] = [] @@ -49,7 +47,6 @@ async def mm_retriever_add(request: EmbedMultimodalDoc) -> SearchedMultimodalDoc async def lvm_add(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc: req = request.model_dump_json() req_dict = json.loads(req) - print('req_dict_lvm', req_dict) if isinstance(request, LVMSearchedMultimodalDoc): print("request is the output of multimodal retriever") text = req_dict["initial_query"] @@ -99,8 +96,35 @@ def tearDownClass(cls): # print(result_dict) # self.assertEqual(result_dict[self.lvm.name]["text"], "chao, opea project!") - def test_multimodal_rag_qna_with_videos_gateway(self): - json_data = {"messages" : "hello, "} + # def test_multimodal_rag_qna_with_videos_gateway(self): + # json_data = {"messages" : "hello, "} + # response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) + # response = response.json() + # self.assertEqual(response['choices'][-1]['message']['content'], "hello, opea project!") + + def test_follow_up_qna_with_videos_gateway(self): + json_data = { + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "hello, " + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + }, + {'role': 'assistant', 'content': 'opea project! '}, + {'role': 'user', 'content': 'chao, '} + ], + "max_tokens": 300 + } response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) response = response.json() self.assertEqual(response['choices'][-1]['message']['content'], "hello, opea project!") From 98afd975659284e0c24b0f30410332c150388c83 Mon Sep 17 00:00:00 2001 From: siddhivelankar23 Date: Thu, 5 Sep 2024 17:53:13 +0000 Subject: [PATCH 30/37] update test for multimodal rag qna with videos gateway Signed-off-by: siddhivelankar23 --- comps/cores/mega/gateway.py | 95 ++++++++++++++++++- ...t_multimodalrag_qna_with_videos_gateway.py | 31 +++--- 2 files changed, 107 insertions(+), 19 deletions(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index bb7cb51f51..a7e580c05e 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -108,7 +108,7 @@ def _handle_message(self, messages): messages_dict[msg_role] = message["content"] else: raise ValueError(f"Unknown role: {msg_role}") - print('::::::::', messages_dict) + if system_prompt: prompt = system_prompt + "\n" for role, message in messages_dict.items(): @@ -590,8 +590,93 @@ def __init__(self, multimodal_rag_megaservice, lvm_megaservice, host="0.0.0.0", super().__init__( multimodal_rag_megaservice, host, port, str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), ChatCompletionRequest, ChatCompletionResponse ) + # this overrides _handle_message method of Gateway + def _handle_message(self, messages): + images = [] + messages_dicts = [] + if isinstance(messages, str): + prompt = messages + else: + messages_dict = {} + system_prompt = "" + prompt = "" + for message in messages: + msg_role = message["role"] + messages_dict = {} + if msg_role == "system": + system_prompt = message["content"] + elif msg_role == "user": + if type(message["content"]) == list: + text = "" + text_list = [item["text"] for item in message["content"] if item["type"] == "text"] + text += "\n".join(text_list) + image_list = [ + item["image_url"]["url"] for item in message["content"] if item["type"] == "image_url" + ] + if image_list: + messages_dict[msg_role] = (text, image_list) + else: + messages_dict[msg_role] = text + else: + messages_dict[msg_role] = message["content"] + messages_dicts.append(messages_dict) + elif msg_role == "assistant": + messages_dict[msg_role] = message["content"] + messages_dicts.append(messages_dict) + else: + raise ValueError(f"Unknown role: {msg_role}") + + if system_prompt: + prompt = system_prompt + "\n" + for messages_dict in messages_dicts: + for i, (role, message) in enumerate(messages_dict.items()): + if isinstance(message, tuple): + text, image_list = message + if i == 0: + # do not add role for the very first message. + # this will be added by llava_server + if text: + prompt += text + "\n" + else: + if text: + prompt += role.upper() + ": " + text + "\n" + else: + prompt += role.upper() + ":" + for img in image_list: + # URL + if img.startswith("http://") or img.startswith("https://"): + response = requests.get(img) + image = Image.open(BytesIO(response.content)).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Local Path + elif os.path.exists(img): + image = Image.open(img).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Bytes + else: + img_b64_str = img + + images.append(img_b64_str) + else: + if i == 0: + # do not add role for the very first message. + # this will be added by llava_server + if message: + prompt += role.upper() + ": " + message + "\n" + else: + if message: + prompt += role.upper() + ": " + message + "\n" + else: + prompt += role.upper() + ":" + if images: + return prompt, images + else: + return prompt - async def handle_request(self, request: Request): data = await request.json() stream_opt = bool(data.get("stream", False)) @@ -604,16 +689,16 @@ async def handle_request(self, request: Request): # Multimodal RAG QnA With Videos has not yet accepts image as input during QnA. prompt_and_image = self._handle_message(chat_request.messages) if isinstance(prompt_and_image, tuple): - print(f"This request include image, thus it is a follow-up query. Using lvm megaservice") + # print(f"This request include image, thus it is a follow-up query. Using lvm megaservice") prompt, images = prompt_and_image cur_megaservice = self.lvm_megaservice initial_inputs={"prompt": prompt, "image": images[0]} else: - print(f"This is the first query, requiring multimodal retrieval. Using multimodal rag megaservice") + # print(f"This is the first query, requiring multimodal retrieval. Using multimodal rag megaservice") prompt = prompt_and_image cur_megaservice = self.megaservice initial_inputs={"text": prompt} - print("HELPPPPPP", prompt) + parameters = LLMParams( max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index 04fbd22743..ccff2a75f1 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -50,10 +50,13 @@ async def lvm_add(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc: if isinstance(request, LVMSearchedMultimodalDoc): print("request is the output of multimodal retriever") text = req_dict["initial_query"] + text += "opea project!" + else: print("request is from user.") text = req_dict["prompt"] - text += "opea project!" + text = f"\nUSER: {text}\nASSISTANT:" + res = {} res['text'] = text return res @@ -87,20 +90,20 @@ def tearDownClass(cls): cls.lvm.stop() cls.gateway.stop() - # async def test_service_builder_schedule(self): - # result_dict, _ = await self.service_builder.schedule(initial_inputs={"text": "hello, "}) - # self.assertEqual(result_dict[self.lvm.name]["text"], "hello, opea project!") + async def test_service_builder_schedule(self): + result_dict, _ = await self.service_builder.schedule(initial_inputs={"text": "hello, "}) + self.assertEqual(result_dict[self.lvm.name]["text"], "hello, opea project!") - # async def test_follow_up_query_service_builder_schedule(self): - # result_dict, _ = await self.follow_up_query_service_builder.schedule(initial_inputs={"prompt": "chao, ", "image" : "some image"}) - # print(result_dict) - # self.assertEqual(result_dict[self.lvm.name]["text"], "chao, opea project!") + async def test_follow_up_query_service_builder_schedule(self): + result_dict, _ = await self.follow_up_query_service_builder.schedule(initial_inputs={"prompt": "chao, ", "image" : "some image"}) + # print(result_dict) + self.assertEqual(result_dict[self.lvm.name]["text"], "\nUSER: chao, \nASSISTANT:") - # def test_multimodal_rag_qna_with_videos_gateway(self): - # json_data = {"messages" : "hello, "} - # response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) - # response = response.json() - # self.assertEqual(response['choices'][-1]['message']['content'], "hello, opea project!") + def test_multimodal_rag_qna_with_videos_gateway(self): + json_data = {"messages" : "hello, "} + response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) + response = response.json() + self.assertEqual(response['choices'][-1]['message']['content'], "hello, opea project!") def test_follow_up_qna_with_videos_gateway(self): json_data = { @@ -127,7 +130,7 @@ def test_follow_up_qna_with_videos_gateway(self): } response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) response = response.json() - self.assertEqual(response['choices'][-1]['message']['content'], "hello, opea project!") + self.assertEqual(response['choices'][-1]['message']['content'], "\nUSER: hello, \nASSISTANT: opea project! \nUSER: chao, \n\nASSISTANT:") if __name__ == "__main__": unittest.main() From 407ff2bf6a709559281d6190a52975a0005496b7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:16:48 +0000 Subject: [PATCH 31/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/cores/mega/gateway.py | 36 ++++--- ...t_multimodalrag_qna_with_videos_gateway.py | 100 ++++++++++-------- 2 files changed, 78 insertions(+), 58 deletions(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index a7e580c05e..54b54a6740 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -584,13 +584,20 @@ def parser_input(data, TypeClass, key): print("response is ", response) return response + class MultimodalRAGQnAWithVideosGateway(Gateway): def __init__(self, multimodal_rag_megaservice, lvm_megaservice, host="0.0.0.0", port=9999): self.lvm_megaservice = lvm_megaservice super().__init__( - multimodal_rag_megaservice, host, port, str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), ChatCompletionRequest, ChatCompletionResponse + multimodal_rag_megaservice, + host, + port, + str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), + ChatCompletionRequest, + ChatCompletionResponse, ) - # this overrides _handle_message method of Gateway + + # this overrides _handle_message method of Gateway def _handle_message(self, messages): images = [] messages_dicts = [] @@ -625,7 +632,7 @@ def _handle_message(self, messages): messages_dicts.append(messages_dict) else: raise ValueError(f"Unknown role: {msg_role}") - + if system_prompt: prompt = system_prompt + "\n" for messages_dict in messages_dicts: @@ -633,7 +640,7 @@ def _handle_message(self, messages): if isinstance(message, tuple): text, image_list = message if i == 0: - # do not add role for the very first message. + # do not add role for the very first message. # this will be added by llava_server if text: prompt += text + "\n" @@ -663,7 +670,7 @@ def _handle_message(self, messages): images.append(img_b64_str) else: if i == 0: - # do not add role for the very first message. + # do not add role for the very first message. # this will be added by llava_server if message: prompt += role.upper() + ": " + message + "\n" @@ -676,14 +683,12 @@ def _handle_message(self, messages): return prompt, images else: return prompt - + async def handle_request(self, request: Request): data = await request.json() stream_opt = bool(data.get("stream", False)) if stream_opt == True: - print( - f"[ MultimodalRAGQnAWithVideosGateway ] stream=True not used, this has not support streaming yet!" - ) + print("[ MultimodalRAGQnAWithVideosGateway ] stream=True not used, this has not support streaming yet!") stream_opt = False chat_request = ChatCompletionRequest.model_validate(data) # Multimodal RAG QnA With Videos has not yet accepts image as input during QnA. @@ -692,12 +697,12 @@ async def handle_request(self, request: Request): # print(f"This request include image, thus it is a follow-up query. Using lvm megaservice") prompt, images = prompt_and_image cur_megaservice = self.lvm_megaservice - initial_inputs={"prompt": prompt, "image": images[0]} + initial_inputs = {"prompt": prompt, "image": images[0]} else: # print(f"This is the first query, requiring multimodal retrieval. Using multimodal rag megaservice") prompt = prompt_and_image cur_megaservice = self.megaservice - initial_inputs={"text": prompt} + initial_inputs = {"text": prompt} parameters = LLMParams( max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, @@ -712,11 +717,12 @@ async def handle_request(self, request: Request): initial_inputs=initial_inputs, llm_parameters=parameters ) for node, response in result_dict.items(): - # the last microservice in this megaservice is LVM. + # the last microservice in this megaservice is LVM. # checking if LVM returns StreamingResponse - # Currently, LVM with LLAVA has not yet supported streaming. + # Currently, LVM with LLAVA has not yet supported streaming. # @TODO: Will need to test this once LVM with LLAVA supports streaming - if ( isinstance(response, StreamingResponse) + if ( + isinstance(response, StreamingResponse) and node == runtime_graph.all_leaves()[-1] and self.megaservice.services[node].service_type == ServiceType.LVM ): @@ -732,4 +738,4 @@ async def handle_request(self, request: Request): finish_reason="stop", ) ) - return ChatCompletionResponse(model="multimodalragqnawithvideos", choices=choices, usage=usage) \ No newline at end of file + return ChatCompletionResponse(model="multimodalragqnawithvideos", choices=choices, usage=usage) diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index ccff2a75f1..e2e731de0b 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -4,18 +4,24 @@ import json import unittest from typing import Union -from comps import ServiceOrchestrator, TextDoc, opea_microservices, register_microservice -from comps import ( - LVMDoc, - MultimodalDoc, - EmbedDoc, - EmbedMultimodalDoc, - SearchedMultimodalDoc, - LVMSearchedMultimodalDoc, - MultimodalRAGQnAWithVideosGateway - ) + import requests +from comps import ( + EmbedDoc, + EmbedMultimodalDoc, + LVMDoc, + LVMSearchedMultimodalDoc, + MultimodalDoc, + MultimodalRAGQnAWithVideosGateway, + SearchedMultimodalDoc, + ServiceOrchestrator, + TextDoc, + opea_microservices, + register_microservice, +) + + @register_microservice(name="mm_embedding", host="0.0.0.0", port=8083, endpoint="/v1/mm_embedding") async def mm_embedding_add(request: MultimodalDoc) -> EmbedDoc: req = request.model_dump_json() @@ -33,16 +39,19 @@ async def mm_retriever_add(request: EmbedMultimodalDoc) -> SearchedMultimodalDoc req_dict = json.loads(req) text = req_dict["text"] res = {} - res['retrieved_docs'] = [] - res['initial_query'] = text - res['top_n'] = 1 - res['metadata'] = [{ - "b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", - "transcript_for_inference": "yellow image" - }] - res['chat_template'] = "The caption of the image is: '{context}'. {question}" + res["retrieved_docs"] = [] + res["initial_query"] = text + res["top_n"] = 1 + res["metadata"] = [ + { + "b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", + "transcript_for_inference": "yellow image", + } + ] + res["chat_template"] = "The caption of the image is: '{context}'. {question}" return res + @register_microservice(name="lvm", host="0.0.0.0", port=8085, endpoint="/v1/lvm") async def lvm_add(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc: req = request.model_dump_json() @@ -51,14 +60,14 @@ async def lvm_add(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc: print("request is the output of multimodal retriever") text = req_dict["initial_query"] text += "opea project!" - + else: print("request is from user.") text = req_dict["prompt"] text = f"\nUSER: {text}\nASSISTANT:" - + res = {} - res['text'] = text + res["text"] = text return res @@ -74,14 +83,18 @@ def setUpClass(cls): cls.service_builder = ServiceOrchestrator() - cls.service_builder.add(opea_microservices["mm_embedding"]).add(opea_microservices["mm_retriever"]).add(opea_microservices["lvm"]) + cls.service_builder.add(opea_microservices["mm_embedding"]).add(opea_microservices["mm_retriever"]).add( + opea_microservices["lvm"] + ) cls.service_builder.flow_to(cls.mm_embedding, cls.mm_retriever) cls.service_builder.flow_to(cls.mm_retriever, cls.lvm) cls.follow_up_query_service_builder = ServiceOrchestrator() cls.follow_up_query_service_builder.add(cls.lvm) - cls.gateway = MultimodalRAGQnAWithVideosGateway(cls.service_builder, cls.follow_up_query_service_builder, port=9898) + cls.gateway = MultimodalRAGQnAWithVideosGateway( + cls.service_builder, cls.follow_up_query_service_builder, port=9898 + ) @classmethod def tearDownClass(cls): @@ -95,15 +108,17 @@ async def test_service_builder_schedule(self): self.assertEqual(result_dict[self.lvm.name]["text"], "hello, opea project!") async def test_follow_up_query_service_builder_schedule(self): - result_dict, _ = await self.follow_up_query_service_builder.schedule(initial_inputs={"prompt": "chao, ", "image" : "some image"}) + result_dict, _ = await self.follow_up_query_service_builder.schedule( + initial_inputs={"prompt": "chao, ", "image": "some image"} + ) # print(result_dict) self.assertEqual(result_dict[self.lvm.name]["text"], "\nUSER: chao, \nASSISTANT:") - + def test_multimodal_rag_qna_with_videos_gateway(self): - json_data = {"messages" : "hello, "} + json_data = {"messages": "hello, "} response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) response = response.json() - self.assertEqual(response['choices'][-1]['message']['content'], "hello, opea project!") + self.assertEqual(response["choices"][-1]["message"]["content"], "hello, opea project!") def test_follow_up_qna_with_videos_gateway(self): json_data = { @@ -111,26 +126,25 @@ def test_follow_up_qna_with_videos_gateway(self): { "role": "user", "content": [ - { - "type": "text", - "text": "hello, " - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - }, - {'role': 'assistant', 'content': 'opea project! '}, - {'role': 'user', 'content': 'chao, '} + {"type": "text", "text": "hello, "}, + { + "type": "image_url", + "image_url": {"url": "https://www.ilankelman.org/stopsigns/australia.jpg"}, + }, + ], + }, + {"role": "assistant", "content": "opea project! "}, + {"role": "user", "content": "chao, "}, ], - "max_tokens": 300 + "max_tokens": 300, } response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) response = response.json() - self.assertEqual(response['choices'][-1]['message']['content'], "\nUSER: hello, \nASSISTANT: opea project! \nUSER: chao, \n\nASSISTANT:") + self.assertEqual( + response["choices"][-1]["message"]["content"], + "\nUSER: hello, \nASSISTANT: opea project! \nUSER: chao, \n\nASSISTANT:", + ) + if __name__ == "__main__": unittest.main() From 7478b06c84517c87c6c204d35b137589745bfe8f Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Thu, 5 Sep 2024 18:40:50 +0000 Subject: [PATCH 32/37] add more test to increase coverage Signed-off-by: Tiep Le --- ...t_multimodalrag_qna_with_videos_gateway.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index e2e731de0b..03c5fcc52e 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -145,6 +145,42 @@ def test_follow_up_qna_with_videos_gateway(self): "\nUSER: hello, \nASSISTANT: opea project! \nUSER: chao, \n\nASSISTANT:", ) + def test_handle_message(self): + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "hello, "}, + { + "type": "image_url", + "image_url": {"url": "https://www.ilankelman.org/stopsigns/australia.jpg"}, + }, + ], + }, + {"role": "assistant", "content": "opea project! "}, + {"role": "user", "content": "chao, "}, + ] + prompt, images = self.gateway._handle_message(messages) + self.assertEqual(prompt, "hello, \nASSISTANT: opea project! \nUSER: chao, \n") + + def test_handle_message_with_system_prompt(self): + messages = [ + {"role" : "system", "content" : "System Prompt"}, + { + "role": "user", + "content": [ + {"type": "text", "text": "hello, "}, + { + "type": "image_url", + "image_url": {"url": "https://www.ilankelman.org/stopsigns/australia.jpg"}, + }, + ], + }, + {"role": "assistant", "content": "opea project! "}, + {"role": "user", "content": "chao, "}, + ] + prompt, images = self.gateway._handle_message(messages) + self.assertEqual(prompt, "System Prompt\nhello, \nASSISTANT: opea project! \nUSER: chao, \n") if __name__ == "__main__": unittest.main() From 117b8f24e5dfc0a93889cd7188ade6435d94506f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:42:04 +0000 Subject: [PATCH 33/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index 03c5fcc52e..0289c0b95f 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -165,7 +165,7 @@ def test_handle_message(self): def test_handle_message_with_system_prompt(self): messages = [ - {"role" : "system", "content" : "System Prompt"}, + {"role": "system", "content": "System Prompt"}, { "role": "user", "content": [ @@ -182,5 +182,6 @@ def test_handle_message_with_system_prompt(self): prompt, images = self.gateway._handle_message(messages) self.assertEqual(prompt, "System Prompt\nhello, \nASSISTANT: opea project! \nUSER: chao, \n") + if __name__ == "__main__": unittest.main() From b787eda7ab13cc1c5927055f07b87b785c996ae5 Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Thu, 5 Sep 2024 18:58:38 +0000 Subject: [PATCH 34/37] cosmetic Signed-off-by: Tiep Le --- comps/cores/mega/gateway.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index 54b54a6740..fe97a1e241 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -687,7 +687,7 @@ def _handle_message(self, messages): async def handle_request(self, request: Request): data = await request.json() stream_opt = bool(data.get("stream", False)) - if stream_opt == True: + if stream_opt: print("[ MultimodalRAGQnAWithVideosGateway ] stream=True not used, this has not support streaming yet!") stream_opt = False chat_request = ChatCompletionRequest.model_validate(data) From f9fd4b4a0896d5ef85c85d7351e0358ba2db8a2e Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Thu, 5 Sep 2024 19:43:07 +0000 Subject: [PATCH 35/37] add more test Signed-off-by: Tiep Le --- ...t_multimodalrag_qna_with_videos_gateway.py | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index 0289c0b95f..8ab917d78d 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -4,7 +4,7 @@ import json import unittest from typing import Union - +from fastapi import Request import requests from comps import ( @@ -182,6 +182,32 @@ def test_handle_message_with_system_prompt(self): prompt, images = self.gateway._handle_message(messages) self.assertEqual(prompt, "System Prompt\nhello, \nASSISTANT: opea project! \nUSER: chao, \n") + async def test_handle_request(self): + json_data = { + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "hello, "}, + { + "type": "image_url", + "image_url": {"url": "https://www.ilankelman.org/stopsigns/australia.jpg"}, + }, + ], + }, + {"role": "assistant", "content": "opea project! "}, + {"role": "user", "content": "chao, "}, + ], + "max_tokens": 300, + } + mock_request = Request(scope={"type":"http"}) + mock_request._json = json_data + res = await self.gateway.handle_request(mock_request) + res = json.loads(res.json()) + self.assertEqual( + res["choices"][-1]["message"]["content"], + "\nUSER: hello, \nASSISTANT: opea project! \nUSER: chao, \n\nASSISTANT:", + ) if __name__ == "__main__": unittest.main() From 50dcd3185f4b178062c23402e44869b4753c75ba Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 19:44:39 +0000 Subject: [PATCH 36/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../mega/test_multimodalrag_qna_with_videos_gateway.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py index 8ab917d78d..b858cd99c4 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py @@ -4,8 +4,9 @@ import json import unittest from typing import Union -from fastapi import Request + import requests +from fastapi import Request from comps import ( EmbedDoc, @@ -200,7 +201,7 @@ async def test_handle_request(self): ], "max_tokens": 300, } - mock_request = Request(scope={"type":"http"}) + mock_request = Request(scope={"type": "http"}) mock_request._json = json_data res = await self.gateway.handle_request(mock_request) res = json.loads(res.json()) @@ -209,5 +210,6 @@ async def test_handle_request(self): "\nUSER: hello, \nASSISTANT: opea project! \nUSER: chao, \n\nASSISTANT:", ) + if __name__ == "__main__": unittest.main() From 693b1c9e9c813a442027670f7e174fb97e71b858 Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Fri, 6 Sep 2024 01:16:22 +0000 Subject: [PATCH 37/37] update name of gateway Signed-off-by: Tiep Le --- comps/__init__.py | 2 +- comps/cores/mega/constants.py | 2 +- comps/cores/mega/gateway.py | 8 ++++---- ...teway.py => test_multimodalrag_with_videos_gateway.py} | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) rename tests/cores/mega/{test_multimodalrag_qna_with_videos_gateway.py => test_multimodalrag_with_videos_gateway.py} (97%) diff --git a/comps/__init__.py b/comps/__init__.py index 280fc8510a..85c8456c03 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -46,7 +46,7 @@ RetrievalToolGateway, FaqGenGateway, VisualQnAGateway, - MultimodalRAGQnAWithVideosGateway, + MultimodalRAGWithVideosGateway, ) # Telemetry diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index aae54fd179..b95a56b081 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -42,7 +42,7 @@ class MegaServiceEndpoint(Enum): CODE_TRANS = "/v1/codetrans" DOC_SUMMARY = "/v1/docsum" SEARCH_QNA = "/v1/searchqna" - MULTIMODAL_RAG_QNA_WITH_VIDEOS = "/v1/mmragvideoqna" + MULTIMODAL_RAG_WITH_VIDEOS = "/v1/mmragvideoqna" TRANSLATION = "/v1/translation" RETRIEVALTOOL = "/v1/retrievaltool" FAQ_GEN = "/v1/faqgen" diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index fe97a1e241..97f169c426 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -585,14 +585,14 @@ def parser_input(data, TypeClass, key): return response -class MultimodalRAGQnAWithVideosGateway(Gateway): +class MultimodalRAGWithVideosGateway(Gateway): def __init__(self, multimodal_rag_megaservice, lvm_megaservice, host="0.0.0.0", port=9999): self.lvm_megaservice = lvm_megaservice super().__init__( multimodal_rag_megaservice, host, port, - str(MegaServiceEndpoint.MULTIMODAL_RAG_QNA_WITH_VIDEOS), + str(MegaServiceEndpoint.MULTIMODAL_RAG_WITH_VIDEOS), ChatCompletionRequest, ChatCompletionResponse, ) @@ -688,7 +688,7 @@ async def handle_request(self, request: Request): data = await request.json() stream_opt = bool(data.get("stream", False)) if stream_opt: - print("[ MultimodalRAGQnAWithVideosGateway ] stream=True not used, this has not support streaming yet!") + print("[ MultimodalRAGWithVideosGateway ] stream=True not used, this has not support streaming yet!") stream_opt = False chat_request = ChatCompletionRequest.model_validate(data) # Multimodal RAG QnA With Videos has not yet accepts image as input during QnA. @@ -738,4 +738,4 @@ async def handle_request(self, request: Request): finish_reason="stop", ) ) - return ChatCompletionResponse(model="multimodalragqnawithvideos", choices=choices, usage=usage) + return ChatCompletionResponse(model="multimodalragwithvideos", choices=choices, usage=usage) diff --git a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py b/tests/cores/mega/test_multimodalrag_with_videos_gateway.py similarity index 97% rename from tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py rename to tests/cores/mega/test_multimodalrag_with_videos_gateway.py index 8ab917d78d..3926decd55 100644 --- a/tests/cores/mega/test_multimodalrag_qna_with_videos_gateway.py +++ b/tests/cores/mega/test_multimodalrag_with_videos_gateway.py @@ -13,7 +13,7 @@ LVMDoc, LVMSearchedMultimodalDoc, MultimodalDoc, - MultimodalRAGQnAWithVideosGateway, + MultimodalRAGWithVideosGateway, SearchedMultimodalDoc, ServiceOrchestrator, TextDoc, @@ -92,7 +92,7 @@ def setUpClass(cls): cls.follow_up_query_service_builder = ServiceOrchestrator() cls.follow_up_query_service_builder.add(cls.lvm) - cls.gateway = MultimodalRAGQnAWithVideosGateway( + cls.gateway = MultimodalRAGWithVideosGateway( cls.service_builder, cls.follow_up_query_service_builder, port=9898 ) @@ -114,13 +114,13 @@ async def test_follow_up_query_service_builder_schedule(self): # print(result_dict) self.assertEqual(result_dict[self.lvm.name]["text"], "\nUSER: chao, \nASSISTANT:") - def test_multimodal_rag_qna_with_videos_gateway(self): + def test_multimodal_rag_with_videos_gateway(self): json_data = {"messages": "hello, "} response = requests.post("http://0.0.0.0:9898/v1/mmragvideoqna", json=json_data) response = response.json() self.assertEqual(response["choices"][-1]["message"]["content"], "hello, opea project!") - def test_follow_up_qna_with_videos_gateway(self): + def test_follow_up_mm_rag_with_videos_gateway(self): json_data = { "messages": [ {