diff --git a/VideoRAGQnA/Dockerfile b/VideoRAGQnA/Dockerfile
new file mode 100644
index 0000000000..7eadbfb8bf
--- /dev/null
+++ b/VideoRAGQnA/Dockerfile
@@ -0,0 +1,33 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY ./videoragqna.py /home/user/videoragqna.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+ENTRYPOINT ["python", "videoragqna.py"]
diff --git a/VideoRAGQnA/README.md b/VideoRAGQnA/README.md
new file mode 100644
index 0000000000..ed5066221c
--- /dev/null
+++ b/VideoRAGQnA/README.md
@@ -0,0 +1,107 @@
+# VideoRAGQnA Application
+
+Video RAG QnA is a framework that retrieves video based on provided user prompt. It uses only the video embeddings to perform vector similarity search in Intel's VDMS vector database and performs all operations on Intel Xeon CPU. The pipeline supports long form videos and time-based search.
+
+VideoRAGQnA is implemented on top of [GenAIComps](https://github.com/opea-project/GenAIComps), with the architecture flow chart shows below:
+
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 100
+    rankSpacing: 100
+    curve: linear
+  theme: base
+  themeVariables:
+    fontSize: 42px
+---
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style VideoRAGQnA-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph VideoRAGQnA-MegaService["VideoRAGQnA-MegaService"]
+        direction LR
+        EM([Embedding <br>]):::blue
+        RET([Retrieval <br>]):::blue
+        RER([Rerank <br>]):::blue
+        LLM([LLM <br>]):::blue
+    end
+    subgraph User Interface
+        direction TB
+        a([User Input Query]):::orchid
+        UI([UI server<br>]):::orchid
+        Ingest([Ingest<br>]):::orchid
+    end
+    subgraph VideoRAGQnA GateWay
+        direction LR
+        invisible1[ ]:::invisible
+        GW([VideoRAGQnA GateWay<br>]):::orange
+    end
+    subgraph .
+        X([OPEA Micsrservice]):::blue
+        Y{{Open Source Service}}
+        Z([OPEA Gateway]):::orange
+        Z1([UI]):::orchid
+    end
+
+    LOCAL_RER{{Reranking service<br>}}
+    CLIP_EM{{Embedding service <br>}}
+    VDB{{Vector DB<br><br>}}
+    V_RET{{Retriever service <br>}}
+    Ingest{{Ingest data <br>}}
+    DP([Data Preparation<br>]):::blue
+    LVM_gen{{LLM Service <br>}}
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] -->|a| DP
+    DP <-.->|b| CLIP_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] -->|1| UI
+    UI -->|2| GW
+    GW <==>|3| VideoRAGQnA-MegaService
+    EM ==>|4| RET
+    RET ==>|5| RER
+    RER ==>|6| LLM
+
+
+    %% Embedding service flow
+    direction TB
+    EM <-.->|3'| CLIP_EM
+    RET <-.->|4'| V_RET
+    RER <-.->|5'| LOCAL_RER
+    LLM <-.->|6'| LVM_gen
+
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->|d|VDB
+    DP <-.->|d|VDB
+```
+
+This VideoRAGQnA use case performs RAG using LangChain, Intel VDMS VectorDB and Text Generation Inference on Intel XEON Scalable Processors.
+
+## Deploy VideoRAGQnA Service
+
+The VideoRAGQnA service can be effortlessly deployed on Intel XEON Scalable Processors.
+
+### Required Models
+
+By default, the embedding and LVM models are set to a default value as listed below:
+
+| Service   | Model                        |
+| --------- | ---------------------------- |
+| Embedding | openai/clip-vit-base-patch32 |
+| LVM       | DAMO-NLP-SG/Video-LLaMA      |
+
+### Deploy VideoRAGQnA on Xeon
+
+For full instruction of deployment, please check [Guide](docker/xeon/README.md)
+
+Currently we support deploying VideoRAGQnA services with docker compose, using the docker images `built from source`. Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).
diff --git a/VideoRAGQnA/assets/img/video-rag-qna.gif b/VideoRAGQnA/assets/img/video-rag-qna.gif
new file mode 100644
index 0000000000..45bf7a462a
Binary files /dev/null and b/VideoRAGQnA/assets/img/video-rag-qna.gif differ
diff --git a/VideoRAGQnA/docker_compose/intel/cpu/xeon/README.md b/VideoRAGQnA/docker_compose/intel/cpu/xeon/README.md
new file mode 100644
index 0000000000..830e686dec
--- /dev/null
+++ b/VideoRAGQnA/docker_compose/intel/cpu/xeon/README.md
@@ -0,0 +1,347 @@
+# Build Mega Service of videoragqna on Xeon
+
+This document outlines the deployment process for a videoragqna application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `lvm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
+
+VideoRAGQnA is a pipeline that retrieves video based on provided user prompt. It uses only the video embeddings to perform vector similarity search in Intel's VDMS vector database and performs all operations on Intel Xeon CPU. The pipeline supports long form videos and time-based search.
+
+## 🚀 Port used for the microservices
+
+```
+dataprep
+========
+Port 6007 - Open to 0.0.0.0/0
+
+vdms-vector-db
+===============
+Port 8001 - Open to 0.0.0.0/0
+
+embedding
+=========
+Port 6000 - Open to 0.0.0.0/0
+
+retriever
+=========
+Port 7000 - Open to 0.0.0.0/0
+
+reranking
+=========
+Port 8000 - Open to 0.0.0.0/0
+
+lvm video-llama
+===============
+Port 9009 - Open to 0.0.0.0/0
+
+lvm
+===
+Port 9000 - Open to 0.0.0.0/0
+
+chaqna-xeon-backend-server
+==========================
+Port 8888 - Open to 0.0.0.0/0
+
+chaqna-xeon-ui-server
+=====================
+Port 5173 - Open to 0.0.0.0/0
+```
+
+## 🚀 Build Docker Images
+
+First of all, you need to build Docker Images locally and install the python package of it.
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+### 1. Build Embedding Image
+
+```bash
+docker build -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_clip/Dockerfile .
+```
+
+### 2. Build Retriever Image
+
+```bash
+docker build -t opea/retriever-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/vdms/langchain/Dockerfile .
+```
+
+### 3. Build Rerank Image
+
+```bash
+docker build -t opea/reranking-videoragqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy  -f comps/reranks/video-rag-qna/Dockerfile .
+```
+
+### 4. Build LVM Image (Xeon)
+
+```bash
+docker build -t opea/video-llama-lvm-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/dependency/Dockerfile .
+
+# LVM Service Image
+docker build -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/Dockerfile .
+```
+
+### 5. Build Dataprep Image
+
+```bash
+docker build -t opea/dataprep-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/multimodal_langchain/Dockerfile .
+cd ..
+```
+
+### 6. Build MegaService Docker Image
+
+To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `videoragqna.py` Python script.
+
+Build MegaService Docker image via below command:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/VideoRAGQnA/
+docker build -t opea/videoragqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+### 7. Build UI Docker Image
+
+Build frontend Docker image via below command:
+
+```bash
+cd ui
+docker build -t opea/videoragqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
+```
+
+Then run the command `docker images`, you will have the following 8 Docker Images:
+
+1. `opea/dataprep-vdms:latest`
+2. `opea/embedding-multimodal:latest`
+3. `opea/retriever-vdms:latest`
+4. `opea/reranking-videoragqna:latest`
+5. `opea/video-llama-lvm-server:latest`
+6. `opea/lvm-video-llama:latest`
+7. `opea/videoragqna:latest`
+8. `opea/videoragqna-ui:latest`
+
+## 🚀 Start Microservices
+
+### Setup Environment Variables
+
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
+
+> Change the External_Public_IP below with the actual IPV4 value
+
+```
+export host_ip="External_Public_IP"
+```
+
+**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+
+> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
+
+```
+export your_hf_api_token="Your_Huggingface_API_Token"
+```
+
+**Append the value of the public IP address to the no_proxy list**
+
+```
+export your_no_proxy=${your_no_proxy},"External_Public_IP"
+```
+
+Then you can run below commands or `source set_env.sh` to set all the variables
+
+```bash
+export no_proxy=${your_no_proxy}
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LVM_SERVICE_HOST_IP=${host_ip}
+
+export LVM_ENDPOINT="http://${host_ip}:9009"
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
+
+export VDMS_HOST=${host_ip}
+export VDMS_PORT=8001
+export INDEX_NAME="mega-videoragqna"
+export LLM_DOWNLOAD="True"
+export USECLIP=1
+
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+```
+
+Note: Please replace with `host_ip` with you external IP address, do not use localhost.
+
+### Start all the services with Docker Containers
+
+Before running the docker compose command, you need to be in the folder that has the docker compose yaml file. To avoid model re-download, we manage the volume separately using [external volume](https://docs.docker.com/reference/compose-file/volumes/#external).
+
+There are 2 parts of the pipeline:
+
+- The first is the data preparation, with which you could add your videos into the database.
+- The second is the megaservice, serves as the main service, takes the user query, consumes the microservices to give the response. Including embedding, retrieving, reranking and LVM.
+
+In the deploy steps, you need to start the VDMS DB and dataprep firstly, then insert some sample data into it. After that you could get the megaservice up.
+
+```bash
+cd GenAIExamples/VideoRAGQnA/docker_compose/intel/cpu/xeon/
+
+docker volume create video-llama-model
+docker compose up vdms-vector-db dataprep -d
+sleep 1m # wait for the services ready
+
+# Insert some sample data to the DB
+curl -X POST http://${host_ip}:6007/v1/dataprep \
+      -H "Content-Type: multipart/form-data" \
+      -F "files=@./data/op_1_0320241830.mp4"
+
+# Bring all the others
+docker compose up -d
+# wait until all the services is up. The LVM server will download models, so it take ~1.5hr to get ready.
+```
+
+### Validate Microservices
+
+1. Dataprep Microservice
+
+Once the microservice is up, please ingest the videos files into vector store using dataprep microservice. Both single and multiple file(s) upload are supported.
+
+```bash
+# Single file upload
+curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./file1.mp4"
+# Multiple file upload
+curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./file1.mp4" \
+    -F "files=@./file2.mp4" \
+    -F "files=@./file3.mp4"
+```
+
+Use below method to check and download available videos the microservice. The download endpoint is also used for LVM and UI.
+
+```bash
+# List available videos
+curl -X 'GET' ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} -H 'accept: application/json'
+# Download available video
+curl -X 'GET' ${DATAPREP_GET_FILE_ENDPOINT}/video_name.mp4' -H 'accept: application/json'
+```
+
+2. Embedding Microservice
+
+```bash
+curl http://${host_ip}:6000/v1/embeddings \
+    -X POST \
+    -d '{"text":"Sample text"}' \
+    -H 'Content-Type: application/json'
+```
+
+3. Retriever Microservice
+
+To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+is determined by the embedding model.
+Here we use the model `openai/clip-vit-base-patch32`, which vector size is 512.
+
+Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
+
+```bash
+export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
+curl http://${host_ip}:7000/v1/retrieval \
+  -X POST \
+  -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
+  -H 'Content-Type: application/json'
+```
+
+4. Reranking Microservice
+
+```bash
+curl http://${host_ip}:8000/v1/reranking \
+  -X 'POST' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "retrieved_docs": [{"doc": [{"text": "this is the retrieved text"}]}],
+    "initial_query": "this is the query",
+    "top_n": 1,
+    "metadata": [
+        {"other_key": "value", "video":"top_video_name", "timestamp":"20"}
+    ]
+  }'
+```
+
+5. LVM backend Service
+
+In first startup, this service will take times to download the LLM file. After it's finished, the service will be ready.
+
+Use `docker logs video-llama-lvm-server` to check if the download is finished.
+
+```bash
+curl -X POST \
+  "http://${host_ip}:9009/generate?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \
+  -H "accept: */*" \
+  -d ''
+```
+
+> To avoid re-download for the model in case of restart, please see [here](#clean-microservices)
+
+6. LVM Microservice
+
+This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
+
+```bash
+curl http://${host_ip}:9000/v1/lvm\
+  -X POST \
+  -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' \
+  -H 'Content-Type: application/json'
+```
+
+> Please note that the local video file will be deleted after completion to conserve disk space.
+
+7. MegaService
+
+```bash
+curl http://${host_ip}:8888/v1/videoragqna -H "Content-Type: application/json" -d '{
+      "messages": "What is the man doing?",
+      "stream": "True"
+      }'
+```
+
+> Please note that the megaservice support only stream output.
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  videoragqna-xeon-ui-server:
+    image: opea/videoragqna-ui:latest
+    ...
+    ports:
+      - "80:5173" # port map to host port 80
+```
+
+Here is an example of running videoragqna:
+
+![project-screenshot](../../assets/img/video-rag-qna.gif)
+
+## Clean Microservices
+
+All the allocated resources could be easily removed by:
+
+```bash
+docker compose -f compose.yaml down
+```
+
+If you plan to restart the service in the future, the above command is enough. The model file is saved in docker volume `video-llama-model` and will be reserved on your server. Next time when you restart the service, set `export LLM_DOWNLOAD="False"` before start to reuse the volume.
+
+To clean the volume:
+
+```bash
+docker volume rm video-llama-model
+```
diff --git a/VideoRAGQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VideoRAGQnA/docker_compose/intel/cpu/xeon/compose.yaml
new file mode 100644
index 0000000000..a379e08af1
--- /dev/null
+++ b/VideoRAGQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -0,0 +1,151 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+services:
+  vdms-vector-db:
+    image: intellabs/vdms:v2.8.0
+    container_name: vdms-vector-db
+    ports:
+      - "8001:55555"
+  dataprep:
+    image: ${REGISTRY:-opea}/dataprep-vdms:${TAG:-latest}
+    container_name: dataprep-vdms-server
+    depends_on:
+      - vdms-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      VDMS_HOST: ${VDMS_HOST}
+      VDMS_PORT: ${VDMS_PORT}
+      INDEX_NAME: ${INDEX_NAME}
+    entrypoint: sh -c 'sleep 15 && python ingest_videos.py'
+    volumes:
+      - /home/$USER/.cache/clip:/home/user/.cache/clip
+      - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
+    container_name: embedding-multimodal-server
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    volumes:
+      - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
+    restart: unless-stopped
+  retriever:
+    image: ${REGISTRY:-opea}/retriever-vdms:${TAG:-latest}
+    container_name: retriever-vdms-server
+    depends_on:
+      - vdms-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      INDEX_NAME: ${INDEX_NAME}
+      VDMS_HOST: ${VDMS_HOST}
+      VDMS_PORT: ${VDMS_PORT}
+      USECLIP: ${USECLIP}
+    entrypoint: sh -c 'sleep 30 && python retriever_vdms.py'
+    restart: unless-stopped
+    volumes:
+      - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
+  reranking:
+    image: ${REGISTRY:-opea}/reranking-videoragqna:${TAG:-latest}
+    container_name: reranking-videoragqna-server
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      CHUNK_DURATION: ${CHUNK_DURATION}
+      FILE_SERVER_ENDPOINT: ${DATAPREP_GET_FILE_ENDPOINT}
+      DATAPREP_GET_VIDEO_LIST_ENDPOINT: ${DATAPREP_GET_VIDEO_LIST_ENDPOINT}
+    restart: unless-stopped
+  lvm-video-llama:
+    image: ${REGISTRY:-opea}/video-llama-lvm-server:${TAG:-latest}
+    container_name: video-llama-lvm-server
+    ports:
+      - "9009:9009"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      llm_download: ${LLM_DOWNLOAD}
+    volumes:
+      - "/home/$USER/.cache:/home/user/.cache"
+      - video-llama-model:/home/user/model
+    restart: unless-stopped
+  lvm:
+    image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest}
+    container_name: lvm-video-llama
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      LVM_ENDPOINT: ${LVM_ENDPOINT}
+    restart: unless-stopped
+    depends_on:
+      - lvm-video-llama
+  videoragqna-xeon-backend-server:
+    image: ${REGISTRY:-opea}/videoragqna:${TAG:-latest}
+    container_name: videoragqna-xeon-backend-server
+    depends_on:
+      - vdms-vector-db
+      - dataprep
+      - embedding
+      - retriever
+      - reranking
+      - lvm-video-llama
+      - lvm
+    ports:
+      - "8888:8888"
+    entrypoint: sh -c 'sleep 45 && python videoragqna.py'
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
+      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
+    ipc: host
+    restart: always
+  videoragqna-xeon-ui-server:
+    image: ${REGISTRY:-opea}/videoragqna-ui:${TAG:-latest}
+    container_name: videoragqna-xeon-ui-server
+    depends_on:
+      - videoragqna-xeon-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      no_proxy: ${no_proxy}
+      BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT}
+      BACKEND_HEALTH_CHECK_ENDPOINT: ${BACKEND_HEALTH_CHECK_ENDPOINT}
+    ipc: host
+    restart: always
+volumes:
+  video-llama-model:
+    external: true
+networks:
+  default:
+    driver: bridge
diff --git a/VideoRAGQnA/docker_compose/intel/cpu/xeon/data/op_1_0320241830.mp4 b/VideoRAGQnA/docker_compose/intel/cpu/xeon/data/op_1_0320241830.mp4
new file mode 100644
index 0000000000..29c5dffcdb
Binary files /dev/null and b/VideoRAGQnA/docker_compose/intel/cpu/xeon/data/op_1_0320241830.mp4 differ
diff --git a/VideoRAGQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VideoRAGQnA/docker_compose/intel/cpu/xeon/set_env.sh
new file mode 100644
index 0000000000..8897e57099
--- /dev/null
+++ b/VideoRAGQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+host_ip=$(hostname -I | awk '{print $1}')
+
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LVM_SERVICE_HOST_IP=${host_ip}
+
+export LVM_ENDPOINT="http://${host_ip}:9009"
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoragqna"
+export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
+
+export VDMS_HOST=${host_ip}
+export VDMS_PORT=8001
+export INDEX_NAME="mega-videoragqna"
+export USECLIP=1
+export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
diff --git a/VideoRAGQnA/docker_image_build/build.yaml b/VideoRAGQnA/docker_image_build/build.yaml
new file mode 100644
index 0000000000..cbcf03ccd0
--- /dev/null
+++ b/VideoRAGQnA/docker_image_build/build.yaml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  videoragqna:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: ../
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/videoragqna:${TAG:-latest}
+  videoragqna-xeon-ui-server:
+    build:
+      context: ../ui
+      dockerfile: ./docker/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/videoragqna-ui:${TAG:-latest}
+  dataprep:
+    build:
+      context: GenAIComps
+      dockerfile: comps/dataprep/vdms/multimodal_langchain/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/dataprep-vdms:${TAG:-latest}
+  embedding:
+    build:
+      context: GenAIComps
+      dockerfile: comps/embeddings/multimodal_clip/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
+  retriever:
+    build:
+      context: GenAIComps
+      dockerfile: comps/retrievers/vdms/langchain/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/retriever-vdms:${TAG:-latest}
+  reranking:
+    build:
+      context: GenAIComps
+      dockerfile: comps/reranks/video-rag-qna/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/reranking-videoragqna:${TAG:-latest}
+  lvm-video-llama:
+    build:
+      context: GenAIComps
+      dockerfile: comps/lvms/video-llama/dependency/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/video-llama-lvm-server:${TAG:-latest}
+  lvm:
+    build:
+      context: GenAIComps
+      dockerfile: comps/lvms/video-llama/Dockerfile
+    extends: videoragqna
+    image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest}
diff --git a/VideoRAGQnA/tests/test_compose_on_xeon.sh b/VideoRAGQnA/tests/test_compose_on_xeon.sh
new file mode 100755
index 0000000000..7dc4df845a
--- /dev/null
+++ b/VideoRAGQnA/tests/test_compose_on_xeon.sh
@@ -0,0 +1,252 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH/docker_image_build
+    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    service_list="videoragqna videoragqna-xeon-ui-server dataprep embedding retriever reranking lvm-video-llama lvm"
+    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
+
+    docker pull intellabs/vdms:v2.8.0
+    docker images && sleep 1s
+}
+
+
+function start_services() {
+    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+
+    source set_env.sh
+    docker volume create video-llama-model
+    docker compose up vdms-vector-db dataprep -d
+    sleep 30s
+
+    # Insert some sample data to the DB
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./data/op_1_0320241830.mp4")
+
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "Inserted some data at the beginning."
+    else
+        echo "Inserted failed at the beginning. Received status was $HTTP_STATUS"
+        docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep.log
+        exit 1
+    fi
+    # Bring all the others
+    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
+    sleep 1m
+
+    # List of containers running uvicorn
+    list=("dataprep-vdms-server" "embedding-multimodal-server" "retriever-vdms-server" "reranking-videoragqna-server" "video-llama-lvm-server" "lvm-video-llama" "videoragqna-xeon-backend-server")
+
+    # Define the maximum time limit in seconds
+    TIME_LIMIT=5400
+    start_time=$(date +%s)
+
+    check_condition() {
+        local item=$1
+
+        if docker logs $item 2>&1 | grep -q "Uvicorn running on"; then
+            return 0
+        else
+            return 1
+        fi
+    }
+
+    # Main loop
+    while [[ ${#list[@]} -gt 0 ]]; do
+        # Get the current time
+        current_time=$(date +%s)
+        elapsed_time=$((current_time - start_time))
+
+        # Exit if time exceeds the limit
+        if (( elapsed_time >= TIME_LIMIT )); then
+            echo "Time limit exceeded."
+            break
+        fi
+
+        # Iterate through the list
+        for i in "${!list[@]}"; do
+            item=${list[i]}
+            if check_condition "$item"; then
+                echo "Condition met for $item, removing from list."
+                unset list[i]
+            else
+                echo "Condition not met for $item, keeping in list."
+            fi
+        done
+
+        # Clean up the list to remove empty elements
+        list=("${list[@]}")
+
+        # Check if the list is empty
+        if [[ ${#list[@]} -eq 0 ]]; then
+            echo "List is empty. Exiting."
+            break
+        fi
+        sleep 5m
+    done
+
+    if docker logs videoragqna-xeon-ui-server 2>&1 | grep -q "Streamlit app"; then
+        return 0
+    else
+        return 1
+    fi
+
+}
+
+function validate_services() {
+    local URL="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+    local DOCKER_NAME="$4"
+    local INPUT_DATA="$5"
+
+    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+
+        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+            echo "[ $SERVICE_NAME ] Content is as expected."
+        else
+            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+            exit 1
+        fi
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
+    fi
+    sleep 1s
+}
+
+function validate_microservices() {
+    # Check if the microservices are running correctly.
+    cd $WORKPATH/docker_compose/intel/cpu/xeon//data
+
+    # dataprep microservice
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./op_1_0320241830.mp4")
+
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "Dataprep microservice is running correctly."
+    else
+        echo "Dataprep microservice is not running correctly. Received status was $HTTP_STATUS"
+        docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep.log
+        exit 1
+    fi
+
+    # Embedding Microservice
+    validate_services \
+        "${ip_address}:6000/v1/embeddings" \
+        "Sample text" \
+        "embedding" \
+        "embedding-multimodal-server" \
+        '{"text":"Sample text"}'
+
+    # Retriever Microservice
+    export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
+    validate_services \
+        "${ip_address}:7000/v1/retrieval" \
+        "retrieved_docs" \
+        "retriever" \
+        "retriever-vdms-server" \
+        "{\"text\":\"test\",\"embedding\":${your_embedding}}"
+
+    # Reranking Microservice
+    validate_services \
+        "${ip_address}:8000/v1/reranking" \
+        "video_url" \
+        "reranking" \
+        "reranking-videoragqna-server" \
+        '{
+            "retrieved_docs": [{"doc": [{"text": "retrieved text"}]}],
+            "initial_query": "query",
+            "top_n": 1,
+            "metadata": [
+                {"other_key": "value", "video":"top_video_name", "timestamp":"20"}
+            ]
+        }'
+
+    # LVM Microservice
+    validate_services \
+        "${ip_address}:9000/v1/lvm" \
+        "silence" \
+        "lvm" \
+        "lvm-video-llama" \
+        '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}'
+
+    sleep 1s
+}
+
+function validate_megaservice() {
+    validate_services \
+    "${ip_address}:8888/v1/videoragqna" \
+    "man" \
+    "videoragqna-xeon-backend-server" \
+    "videoragqna-xeon-backend-server" \
+    '{"messages":"What is the man doing?","stream":"True"}'
+}
+
+function validate_frontend() {
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET http://${ip_address}:5173/_stcore/health)
+
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "Frontend is running correctly."
+        local CONTENT=$(curl -s -X GET http://${ip_address}:5173/_stcore/health)
+        if echo "$CONTENT" | grep -q "ok"; then
+            echo "Frontend Content is as expected."
+        else
+            echo "Frontend Content does not match the expected result: $CONTENT"
+            docker logs videoragqna-xeon-ui-server >> ${LOG_PATH}/ui.log
+            exit 1
+        fi
+    else
+        echo "Frontend is not running correctly. Received status was $HTTP_STATUS"
+        docker logs videoragqna-xeon-ui-server >> ${LOG_PATH}/ui.log
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+    docker compose stop && docker compose rm -f
+    docker volume rm video-llama-model
+}
+
+function main() {
+
+    stop_docker
+
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    start_services
+
+    validate_microservices
+    validate_megaservice
+    validate_frontend
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/VideoRAGQnA/ui/docker/Dockerfile b/VideoRAGQnA/ui/docker/Dockerfile
new file mode 100644
index 0000000000..7220b28ac6
--- /dev/null
+++ b/VideoRAGQnA/ui/docker/Dockerfile
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.9-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y curl && \
+    rm -rf /var/lib/apt/lists/*
+
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir streamlit
+
+COPY ui.py /app/ui.py
+
+EXPOSE 5173
+
+HEALTHCHECK CMD curl --fail http://localhost:5173/_stcore/health
+
+ENTRYPOINT ["streamlit", "run", "ui.py", "--server.port=5173", "--server.address=0.0.0.0"]
\ No newline at end of file
diff --git a/VideoRAGQnA/ui/ui.py b/VideoRAGQnA/ui/ui.py
new file mode 100644
index 0000000000..9f9c8724e4
--- /dev/null
+++ b/VideoRAGQnA/ui/ui.py
@@ -0,0 +1,207 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+import time
+from io import BytesIO
+
+import requests
+import streamlit as st
+
+BACKEND_SERVICE_ENDPOINT = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/videoragqna")
+BACKEND_HEALTH_CHECK_ENDPOINT = os.getenv("BACKEND_HEALTH_CHECK_ENDPOINT", "http://localhost:8888/v1/health_check")
+
+
+def perform_health_check():
+    url = BACKEND_HEALTH_CHECK_ENDPOINT
+    response = requests.get(url, headers={"accept": "application/json"})
+    return response
+
+
+def download_video(url):
+    """Download video from URL and return as bytes."""
+    response = requests.get(url)
+    if response.status_code == 200:
+        return BytesIO(response.content)
+    else:
+        st.error(f"Failed to download video. Status code: {response.status_code}")
+        return None
+
+
+def play_video(url, offset):
+    """Play video from URL with specified offset."""
+    with st.spinner("Loading Video ..."):
+        video_bytes = download_video(url)
+    if video_bytes:
+        st.video(video_bytes, start_time=int(offset))
+
+
+def clear_chat_history():
+    st.session_state.example_video = "Enter Text"
+    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
+
+
+def handle_selectbox_change():
+    prompt = st.session_state.example_video
+
+    if prompt is not None:
+        st.session_state["prompt"] = prompt
+        st.session_state.messages.append({"role": "user", "content": prompt})
+
+
+def handle_chat_input():
+    print("st.session_state.custom_prompt update", st.session_state.custom_prompt)
+    prompt = st.session_state.custom_prompt
+
+    st.session_state["prompt"] = prompt
+    st.session_state.messages.append({"role": "user", "content": prompt})
+
+
+def handle_message(col):
+    params = None
+    full_response = ""
+
+    # Generate a new response if last message is not from assistant
+    if st.session_state.messages[-1]["role"] != "assistant":
+        # Handle user messages here
+        with st.chat_message("assistant"):
+            placeholder = st.empty()
+            start = time.time()
+            prompt = st.session_state["prompt"]
+            request_data = {"messages": prompt, "stream": "True"}
+            try:
+                response = requests.post(BACKEND_SERVICE_ENDPOINT, data=json.dumps(request_data), stream=True)
+                response.raise_for_status()
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        if params is None:
+                            try:
+                                chunk_str = chunk.decode("utf-8").replace("'", '"')
+                                params = json.loads(chunk_str)
+
+                                video_url = params["video_url"]
+                                chunk_start = params["chunk_start"]
+                                print("VIDEO NAME USED IN PLAYBACK: ", video_url)
+
+                                video_name = video_url.split("/")[-1]
+                                full_response += f"Most relevant retrieved video is **{video_name}** \n\n"
+                                placeholder.markdown(full_response)
+
+                                with col:
+                                    play_video(video_url, chunk_start)
+
+                            except json.JSONDecodeError:
+                                print("In the param decode error branch")
+                                print(chunk.decode("utf-8"))
+                        else:
+                            new_text = chunk.decode("utf-8")
+                            # print(new_text, end=" ", flush=True)
+                            full_response += new_text
+                            placeholder.markdown(full_response)
+                # Fake response
+                # video_url = "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4"
+                # chunk_start=0
+                # video_name = video_url.split('/')[-1]
+                # full_response += f"Most relevant retrieved video is **{video_name}** \n\n"
+                # placeholder.markdown(full_response)
+                # with col:
+                #     play_video(video_url, chunk_start)
+                # for i in range(10):
+                #     full_response += f"new_text {i} "
+                #     time.sleep(1)
+                #     placeholder.markdown(full_response)
+
+            except requests.HTTPError as http_err:
+                st.error(f"HTTP error occurred: {http_err}")
+            except requests.RequestException as req_err:
+                st.error(f"Error occurred: {req_err}")
+            except Exception as err:
+                st.error(f"An unexpected error occurred: {err}")
+
+            end = time.time()
+            full_response += f"\n\n🚀 Generated in {(end - start):.4f} seconds."
+            placeholder.markdown(full_response)
+
+        message = {"role": "assistant", "content": full_response}
+
+        st.session_state.messages.append(message)
+
+
+def display_messages():
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
+
+
+def main():
+    st.set_page_config(initial_sidebar_state="collapsed", layout="wide")
+    st.title("Video RAG QnA")
+    title_alignment = """
+    <style>
+    h1 {
+    text-align: center
+    }
+
+    video.stVideo {
+        width: 200px;
+        height: 500px;
+    }
+    </style>
+    """
+    st.markdown(title_alignment, unsafe_allow_html=True)
+    st.sidebar.button("Clear Chat History", on_click=clear_chat_history)
+
+    placeholder = st.empty()
+
+    # check server health
+    if "health_check" not in st.session_state.keys():
+        with st.spinner("Checking health of the server..."):
+            time.sleep(1)
+            response = perform_health_check()
+        if response.status_code == 200:
+            placeholder.success("Server is healthy!", icon="✅")
+            time.sleep(1)
+            placeholder.empty()  # Remove the message
+            st.session_state["health_check"] = True
+        else:
+            st.error(f"Server health check failed with status code {response.status_code}")
+            st.stop()
+
+    # Initialize conversation state
+    if "messages" not in st.session_state.keys():
+        st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
+    if "prompt" not in st.session_state.keys():
+        st.session_state["prompt"] = ""
+
+    col1, col2 = st.columns([2, 1])
+
+    with col1:
+        st.selectbox(
+            "Example Prompts",
+            (
+                "Man wearing glasses",
+                "People reading item description",
+                "Man holding red shopping basket",
+                "Was there any person wearing a blue shirt seen today?",
+                "Was there any person wearing a blue shirt seen in the last 6 hours?",
+                "Was there any person wearing a blue shirt seen last Sunday?",
+                "Was a person wearing glasses seen in the last 30 minutes?",
+                "Was a person wearing glasses seen in the last 72 hours?",
+            ),
+            key="example_video",
+            index=None,
+            placeholder="--- Options ---",
+            on_change=handle_selectbox_change,
+        )
+
+    st.chat_input(disabled=False, key="custom_prompt", on_submit=handle_chat_input)
+
+    with col1:
+        display_messages()
+        handle_message(col2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/VideoRAGQnA/videoragqna.py b/VideoRAGQnA/videoragqna.py
new file mode 100644
index 0000000000..9288821c11
--- /dev/null
+++ b/VideoRAGQnA/videoragqna.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from comps import MicroService, ServiceOrchestrator, ServiceType, VideoRAGQnAGateway
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
+EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
+RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
+RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
+RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
+RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
+LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0")
+LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9000))
+
+
+class VideoRAGQnAService:
+    def __init__(self, host="0.0.0.0", port=8888):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVICE_HOST_IP,
+            port=EMBEDDING_SERVICE_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+        rerank = MicroService(
+            name="rerank",
+            host=RERANK_SERVICE_HOST_IP,
+            port=RERANK_SERVICE_PORT,
+            endpoint="/v1/reranking",
+            use_remote_service=True,
+            service_type=ServiceType.RERANK,
+        )
+        lvm = MicroService(
+            name="lvm",
+            host=LVM_SERVICE_HOST_IP,
+            port=LVM_SERVICE_PORT,
+            endpoint="/v1/lvm",
+            use_remote_service=True,
+            service_type=ServiceType.LVM,
+        )
+        self.megaservice.add(embedding).add(retriever).add(rerank).add(lvm)
+        self.megaservice.flow_to(embedding, retriever)
+        self.megaservice.flow_to(retriever, rerank)
+        self.megaservice.flow_to(rerank, lvm)
+        self.gateway = VideoRAGQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    videoragqna = VideoRAGQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    videoragqna.add_remote_service()