opea-project · kevinintel · Sep 11, 2024 · Aug 19, 2024 · Aug 20, 2024 · Aug 30, 2024
diff --git a/MultiModalRAGQnA/VideoRAGQnA/README.md b/MultiModalRAGQnA/VideoRAGQnA/README.md
@@ -0,0 +1 @@
+# Placeholder
diff --git a/MultiModalRAGQnA/VideoRAGQnA/docker/Dockerfile b/MultiModalRAGQnA/VideoRAGQnA/docker/Dockerfile
@@ -0,0 +1,33 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY ./videoragqna.py /home/user/videoragqna.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+ENTRYPOINT ["python", "videoragqna.py"]
diff --git a/MultiModalRAGQnA/VideoRAGQnA/docker/docker_build_compose.yaml b/MultiModalRAGQnA/VideoRAGQnA/docker/docker_build_compose.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  videoragqna:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/videoragqna:${TAG:-latest}
+  videoragqna-ui:
+    build:
+      context: ui
+      dockerfile: ./docker/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/videoragqna-xeon-ui:${TAG:-latest}
+  dataprep: # TODO: test this when merged
+    build:
+      context: GenAIComps
+      dockerfile: comps/dataprep/vdms/multimodal_langchain/docker/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/dataprep-vdms:${TAG:-latest}
+  embedding:
+    build:
+      context: GenAIComps
+      dockerfile: comps/embeddings/multimodal_clip/docker/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
+  retriever:
+    build:
+      context: GenAIComps
+      dockerfile: comps/retrievers/langchain/vdms/docker/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/retriever-vdms:${TAG:-latest}
+  reranking:
+    build:
+      context: GenAIComps
+      dockerfile: comps/reranks/video-rag-qna/docker/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/reranking-videoragqna:${TAG:-latest}
+  lvm-video-llama:
+    build:
+      context: GenAIComps
+      dockerfile: comps/lvms/video-llama/server/docker/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/video-llama-lvm-server:${TAG:-latest}
+  lvm:
+    build:
+      context: GenAIComps
+      dockerfile: comps/lvms/video-llama/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest}
diff --git a/MultiModalRAGQnA/VideoRAGQnA/docker/ui/docker/Dockerfile b/MultiModalRAGQnA/VideoRAGQnA/docker/ui/docker/Dockerfile
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.9-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y curl && \
+    rm -rf /var/lib/apt/lists/*
+
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir streamlit
+
+COPY ui.py /app/ui.py
+
+EXPOSE 5173
+
+HEALTHCHECK CMD curl --fail http://localhost:5173/_stcore/health
+
+ENTRYPOINT ["streamlit", "run", "ui.py", "--server.port=5173", "--server.address=0.0.0.0"]
diff --git a/MultiModalRAGQnA/VideoRAGQnA/docker/ui/ui.py b/MultiModalRAGQnA/VideoRAGQnA/docker/ui/ui.py
@@ -0,0 +1,207 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+import time
+from io import BytesIO
+
+import requests
+import streamlit as st
+
+BACKEND_SERVICE_ENDPOINT = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/videoragqna")
+BACKEND_HEALTH_CHECK_ENDPOINT = os.getenv("BACKEND_HEALTH_CHECK_ENDPOINT", "http://localhost:8888/v1/health_check")
+
+
+def perform_health_check():
+    url = BACKEND_HEALTH_CHECK_ENDPOINT
+    response = requests.get(url, headers={"accept": "application/json"})
+    return response
+
+
+def download_video(url):
+    """Download video from URL and return as bytes."""
+    response = requests.get(url)
+    if response.status_code == 200:
+        return BytesIO(response.content)
+    else:
+        st.error(f"Failed to download video. Status code: {response.status_code}")
+        return None
+
+
+def play_video(url, offset):
+    """Play video from URL with specified offset."""
+    with st.spinner("Loading Video ..."):
+        video_bytes = download_video(url)
+    if video_bytes:
+        st.video(video_bytes, start_time=int(offset))
+
+
+def clear_chat_history():
+    st.session_state.example_video = "Enter Text"
+    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
+
+
+def handle_selectbox_change():
+    prompt = st.session_state.example_video
+
+    if prompt is not None:
+        st.session_state["prompt"] = prompt
+        st.session_state.messages.append({"role": "user", "content": prompt})
+
+
+def handle_chat_input():
+    print("st.session_state.custom_prompt update", st.session_state.custom_prompt)
+    prompt = st.session_state.custom_prompt
+
+    st.session_state["prompt"] = prompt
+    st.session_state.messages.append({"role": "user", "content": prompt})
+
+
+def handle_message(col):
+    params = None
+    full_response = ""
+
+    # Generate a new response if last message is not from assistant
+    if st.session_state.messages[-1]["role"] != "assistant":
+        # Handle user messages here
+        with st.chat_message("assistant"):
+            placeholder = st.empty()
+            start = time.time()
+            prompt = st.session_state["prompt"]
+            request_data = {"messages": prompt, "stream": "True"}
+            try:
+                response = requests.post(BACKEND_SERVICE_ENDPOINT, data=json.dumps(request_data), stream=True)
+                response.raise_for_status()
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        if params is None:
+                            try:
+                                chunk_str = chunk.decode("utf-8").replace("'", '"')
+                                params = json.loads(chunk_str)
+
+                                video_url = params["video_url"]
+                                chunk_start = params["chunk_start"]
+                                print("VIDEO NAME USED IN PLAYBACK: ", video_url)
+
+                                video_name = video_url.split("/")[-1]
+                                full_response += f"Most relevant retrieved video is **{video_name}** \n\n"
+                                placeholder.markdown(full_response)
+
+                                with col:
+                                    play_video(video_url, chunk_start)
+
+                            except json.JSONDecodeError:
+                                print("In the param decode error branch")
+                                print(chunk.decode("utf-8"))
+                        else:
+                            new_text = chunk.decode("utf-8")
+                            # print(new_text, end=" ", flush=True)
+                            full_response += new_text
+                            placeholder.markdown(full_response)
+                # Fake response
+                # video_url = "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4"
+                # chunk_start=0
+                # video_name = video_url.split('/')[-1]
+                # full_response += f"Most relevant retrieved video is **{video_name}** \n\n"
+                # placeholder.markdown(full_response)
+                # with col:
+                #     play_video(video_url, chunk_start)
+                # for i in range(10):
+                #     full_response += f"new_text {i} "
+                #     time.sleep(1)
+                #     placeholder.markdown(full_response)
+
+            except requests.HTTPError as http_err:
+                st.error(f"HTTP error occurred: {http_err}")
+            except requests.RequestException as req_err:
+                st.error(f"Error occurred: {req_err}")
+            except Exception as err:
+                st.error(f"An unexpected error occurred: {err}")
+
+            end = time.time()
+            full_response += f"\n\n🚀 Generated in {(end - start):.4f} seconds."
+            placeholder.markdown(full_response)
+
+        message = {"role": "assistant", "content": full_response}
+
+        st.session_state.messages.append(message)
+
+
+def display_messages():
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
+
+
+def main():
+    st.set_page_config(initial_sidebar_state="collapsed", layout="wide")
+    st.title("Video RAG QnA")
+    title_alignment = """
+    <style>
+    h1 {
+    text-align: center
+    }
+
+    video.stVideo {
+        width: 200px;
+        height: 500px;
+    }
+    </style>
+    """
+    st.markdown(title_alignment, unsafe_allow_html=True)
+    st.sidebar.button("Clear Chat History", on_click=clear_chat_history)
+
+    placeholder = st.empty()
+
+    # check server health
+    if "health_check" not in st.session_state.keys():
+        with st.spinner("Checking health of the server..."):
+            time.sleep(1)
+            response = perform_health_check()
+        if response.status_code == 200:
+            placeholder.success("Server is healthy!", icon="✅")
+            time.sleep(1)
+            placeholder.empty()  # Remove the message
+            st.session_state["health_check"] = True
+        else:
+            st.error(f"Server health check failed with status code {response.status_code}")
+            st.stop()
+
+    # Initialize conversation state
+    if "messages" not in st.session_state.keys():
+        st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
+    if "prompt" not in st.session_state.keys():
+        st.session_state["prompt"] = ""
+
+    col1, col2 = st.columns([2, 1])
+
+    with col1:
+        st.selectbox(
+            "Example Prompts",
+            (
+                "Man wearing glasses",
+                "People reading item description",
+                "Man holding red shopping basket",
+                "Was there any person wearing a blue shirt seen today?",
+                "Was there any person wearing a blue shirt seen in the last 6 hours?",
+                "Was there any person wearing a blue shirt seen last Sunday?",
+                "Was a person wearing glasses seen in the last 30 minutes?",
+                "Was a person wearing glasses seen in the last 72 hours?",
+            ),
+            key="example_video",
+            index=None,
+            placeholder="--- Options ---",
+            on_change=handle_selectbox_change,
+        )
+
+    st.chat_input(disabled=False, key="custom_prompt", on_submit=handle_chat_input)
+
+    with col1:
+        display_messages()
+        handle_message(col2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/MultiModalRAGQnA/VideoRAGQnA/docker/videoragqna.py b/MultiModalRAGQnA/VideoRAGQnA/docker/videoragqna.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from comps import MicroService, ServiceOrchestrator, ServiceType, VideoRAGQnAGateway
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
+EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
+RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
+RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
+RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
+RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
+LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0")
+LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9000))
+
+
+class VideoRAGQnAService:
+    def __init__(self, host="0.0.0.0", port=8888):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVICE_HOST_IP,
+            port=EMBEDDING_SERVICE_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+        rerank = MicroService(
+            name="rerank",
+            host=RERANK_SERVICE_HOST_IP,
+            port=RERANK_SERVICE_PORT,
+            endpoint="/v1/reranking",
+            use_remote_service=True,
+            service_type=ServiceType.RERANK,
+        )
+        lvm = MicroService(
+            name="lvm",
+            host=LVM_SERVICE_HOST_IP,
+            port=LVM_SERVICE_PORT,
+            endpoint="/v1/lvm",
+            use_remote_service=True,
+            service_type=ServiceType.LVM,
+        )
+        self.megaservice.add(embedding).add(retriever).add(rerank).add(lvm)
+        self.megaservice.flow_to(embedding, retriever)
+        self.megaservice.flow_to(retriever, rerank)
+        self.megaservice.flow_to(rerank, lvm)
+        self.gateway = VideoRAGQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    videoragqna = VideoRAGQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    videoragqna.add_remote_service()