diff --git a/VisualQnA/docker/gaudi/README.md b/VisualQnA/docker/gaudi/README.md
index 72d5efb07..27974bdff 100644
--- a/VisualQnA/docker/gaudi/README.md
+++ b/VisualQnA/docker/gaudi/README.md
@@ -24,9 +24,8 @@ docker build --no-cache -t opea/lvm-tgi:latest --build-arg https_proxy=$https_pr
 Since TGI Gaudi has not supported llava-next in main branch, we'll need to build it from a PR branch for now.
 
 ```bash
-git clone https://github.com/yuanwu2017/tgi-gaudi.git
+git clone https://github.com/huggingface/tgi-gaudi.git
 cd tgi-gaudi/
-git checkout v2.0.4
 docker build -t opea/llava-tgi:latest .
 cd ../
 ```
diff --git a/VisualQnA/serving/Dockerfile b/VisualQnA/serving/Dockerfile
deleted file mode 100644
index 63712b9db..000000000
--- a/VisualQnA/serving/Dockerfile
+++ /dev/null
@@ -1,44 +0,0 @@
-
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# SCRIPT USAGE NOTICE:  By downloading and using any script file included
-# with the associated software package (such as files with .bat, .cmd, or 
-# .JS extensions, Docker files, or any other type of file that, when executed,
-# automatically downloads and/or installs files onto your system) (the “Script File”),
-# it is your obligation to review the Script File to understand what files (e.g.,
-# other software, AI models, AI Datasets) the Script File will download to your system
-# (“Downloaded Files”). Furthermore, by downloading and using the Downloaded Files,
-# even if they are installed through a silent install, you agree to any and all
-# terms and conditions associated with such files, including but not limited to,
-# license terms, notices, or disclaimers.
-
-
-# HABANA environment
-FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1 AS hpu
-RUN rm -rf /etc/ssh/ssh_host*
-
-# Set environment variables
-ENV LANG=en_US.UTF-8
-ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana
-
-# Install required branch
-RUN git clone https://github.com/lkk12014402/optimum-habana.git /optimum-habana -b enable_llava_generation
-
-RUN useradd -m -s /bin/bash user && \
-    mkdir -p /home/user && \
-    chown -R user /home/user/
-
-USER user
-
-COPY requirements.txt /tmp/requirements.txt
-
-# Install dependency
-RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
-
-# work dir should contains the server
-WORKDIR /llava_server
-COPY llava_server /llava_server
-
-ENTRYPOINT ["python", "llava_server.py"]
diff --git a/VisualQnA/serving/llava_server/llava_server.py b/VisualQnA/serving/llava_server/llava_server.py
deleted file mode 100644
index 483a1a9c3..000000000
--- a/VisualQnA/serving/llava_server/llava_server.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import argparse
-import base64
-import time
-from io import BytesIO
-
-import PIL.Image
-import requests
-import torch
-import uvicorn
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse, Response, StreamingResponse
-from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
-from transformers import pipeline
-
-model_name_or_path = None
-model_dtype = None
-use_hpu_graphs = True
-
-generator = None
-
-app = FastAPI()
-
-
-@app.get("/health")
-async def health() -> Response:
-    """Health check."""
-    return Response(status_code=200)
-
-
-@app.post("/generate")
-async def generate(request: Request) -> Response:  # FIXME batch_size=1 for now, only accept single image
-    request_dict = await request.json()
-    prompt = request_dict.pop("prompt")
-    # image_path = request_dict.pop("image_path")
-    img_b64_str = request_dict.pop("image")  # image is an encoded base64 string
-    max_new_tokens = request_dict.pop("max_new_tokens", 100)
-    # image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)
-    image = PIL.Image.open(BytesIO(base64.b64decode(img_b64_str)))
-
-    generate_kwargs = {
-        "lazy_mode": True,
-        "hpu_graphs": use_hpu_graphs,
-        "max_new_tokens": max_new_tokens,
-        "ignore_eos": False,
-    }
-
-    start = time.time()
-    result = generator(image, prompt=prompt, batch_size=1, generate_kwargs=generate_kwargs)
-    end = time.time()
-    result = result[0]["generated_text"].split("ASSISTANT: ")[-1]
-    print(f"result = {result}, time = {(end-start) * 1000 }ms")
-    ret = {"text": result}
-    return JSONResponse(ret)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int, default=8000)
-    parser.add_argument("--model_name_or_path", type=str, default="llava-hf/llava-1.5-7b-hf")
-    parser.add_argument("--use_hpu_graphs", default=True, action="store_true")
-    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
-    parser.add_argument("--bf16", default=True, action="store_true")
-
-    args = parser.parse_args()
-    adapt_transformers_to_gaudi()
-
-    if args.bf16:
-        model_dtype = torch.bfloat16
-    else:
-        model_dtype = torch.float32
-
-    model_name_or_path = args.model_name_or_path
-
-    generator = pipeline(
-        "image-to-text",
-        model=args.model_name_or_path,
-        torch_dtype=model_dtype,
-        device="hpu",
-    )
-
-    # warmup
-    generate_kwargs = {
-        "lazy_mode": True,
-        "hpu_graphs": args.use_hpu_graphs,
-        "max_new_tokens": 100,
-        "ignore_eos": False,
-    }
-    if args.use_hpu_graphs:
-        from habana_frameworks.torch.hpu import wrap_in_hpu_graph
-
-        generator.model = wrap_in_hpu_graph(generator.model)
-    image_paths = ["https://llava-vl.github.io/static/images/view.jpg"]
-    images = []
-    for image_path in image_paths:
-        images.append(PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw))
-    for i in range(args.warmup):
-        generator(
-            images,
-            prompt="<image>\nUSER: What's the content of the image?\nASSISTANT:",
-            batch_size=1,
-            generate_kwargs=generate_kwargs,
-        )
-
-    uvicorn.run(
-        app,
-        host=args.host,
-        port=args.port,
-        log_level="debug",
-    )
diff --git a/VisualQnA/serving/requirements.txt b/VisualQnA/serving/requirements.txt
deleted file mode 100644
index a93c455d6..000000000
--- a/VisualQnA/serving/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-eager
-fastapi
-optimum[habana]
-uvicorn
diff --git a/VisualQnA/ui/gradio/app.py b/VisualQnA/ui/gradio/app.py
deleted file mode 100644
index ecd4f5a78..000000000
--- a/VisualQnA/ui/gradio/app.py
+++ /dev/null
@@ -1,290 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# Some code adapted from https://github.com/haotian-liu/LLaVA/blob/main/llava/serve/gradio_web_server.py
-# and https://github.com/haotian-liu/LLaVA/blob/main/llava/conversation.py
-
-import argparse
-import base64
-import os
-from io import BytesIO
-
-import gradio as gr
-import requests
-
-title_markdown = """
-# 🌋 LLaVA demo on Gaudi2
-"""
-
-tos_markdown = """
-### Terms of use
-By using this service, users are required to agree to the following terms:
-The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
-Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
-For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
-"""
-
-title_markdown_cn = """
-# 🌋 在 Gaudi2 上展示 LLaVA
-"""
-
-tos_markdown_cn = """
-### 使用条款
-使用本服务即表示用户同意以下条款：
-本服务为研究预览版，仅供非商业用途。它仅提供有限的安全措施，可能会生成冒犯性内容。严禁将本服务用于任何非法、有害、暴力、种族歧视或色情的目的。本服务可能会收集用户对话数据以用于未来研究。
-为获得最佳体验，请使用台式电脑访问本演示，因为移动设备可能会影响其质量。
-"""
-
-block_css = """
-
-#buttons button {
-    min-width: min(120px,100%);
-}
-
-.upload-container .wrap,
-.upload-container .wrap .or {
-  color: #1f2937;
-}
-
-
-.upload-container .wrap .icon-wrap {
-  color: #e5e7eb;
-  margin-top: 4rem;
-  width: 4rem;
-  height: 3rem;
-}
-
-
-"""
-
-no_change_btn = gr.Button()
-enable_btn = gr.Button(interactive=True)
-disable_btn = gr.Button(interactive=False)
-
-
-def process_image(image, return_pil=False, image_format="PNG", max_len=1344, min_len=672):
-    if max(image.size) > max_len:
-        max_hw, min_hw = max(image.size), min(image.size)
-        aspect_ratio = max_hw / min_hw
-        shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
-        longest_edge = int(shortest_edge * aspect_ratio)
-        W, H = image.size
-        if H > W:
-            H, W = longest_edge, shortest_edge
-        else:
-            H, W = shortest_edge, longest_edge
-        image = image.resize((W, H))
-    if return_pil:
-        return image
-    else:
-        buffered = BytesIO()
-        image.save(buffered, format=image_format)
-        img_b64_str = base64.b64encode(buffered.getvalue()).decode()
-        return img_b64_str
-
-
-def handle_llava_request(text, image, max_new_tokens, chat_history):
-    print(f"text: {text}, image: {image}, max_new_tokens: {max_new_tokens}\n")
-    img_b64_str = process_image(image, return_pil=False, image_format="JPEG")
-    img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
-
-    # skip embedding the image in latter messages
-    if len(chat_history) < 1:
-        msg = img_str + text.replace("<image>", "").strip()
-    else:
-        msg = text.replace("<image>", "").strip()
-
-    req_dict = {"prompt": f"<image>\nUSER: {text}\nASSISTANT:", "image": img_b64_str, "max_new_tokens": max_new_tokens}
-    result = requests.post(f"{args.worker_addr}/generate", json=req_dict, proxies={"http": None})
-    answer = result.json()["text"]
-
-    chat_history.append([msg, answer])
-    return [chat_history] + [enable_btn]
-
-
-def clear_history(chat_history, image, text):
-    chat_history = []
-    image = None
-    text = None
-    return [chat_history, image, text] + [disable_btn]
-
-
-def build_demo_cn(embed_mode, cur_dir=None, concurrency_count=10):
-    textbox = gr.Textbox(show_label=False, placeholder="输入文字并按回车键", container=False)
-    with gr.Blocks(title="LLaVA", theme=gr.themes.Default(), css=block_css) as demo:
-        # demo.add(custom_html)
-
-        state = gr.State()
-
-        if not embed_mode:
-            gr.Markdown(title_markdown_cn)
-
-        with gr.Row():
-            with gr.Column(scale=3):
-                imagebox = gr.Image(type="pil", label="图片", interactive=True, elem_id="my_imagebox")
-
-                if cur_dir is None:
-                    cur_dir = os.path.dirname(os.path.abspath(__file__))
-                gr.Examples(
-                    examples=[
-                        [f"{cur_dir}/resources/extreme_ironing.jpg", "这张图片有什么不寻常之处？"],
-                        [
-                            f"{cur_dir}/resources/waterview.jpg",
-                            "当我去那里访问时，我应该注意哪些事情？",
-                        ],
-                    ],
-                    label="请选择一个示例",
-                    inputs=[imagebox, textbox],
-                )
-
-                with gr.Accordion("参数", open=False) as parameter_row:
-                    max_output_tokens = gr.Slider(
-                        minimum=0,
-                        maximum=1024,
-                        value=512,
-                        step=64,
-                        interactive=True,
-                        label="最大输出标记数",
-                    )
-
-            with gr.Column(scale=8):
-                chatbot = gr.Chatbot(
-                    elem_id="chatbot",
-                    label="LLaVA聊天机器人",
-                    height=650,
-                    layout="panel",
-                )
-                with gr.Row():
-                    with gr.Column(scale=8):
-                        textbox.render()
-                    with gr.Column(scale=1, min_width=50):
-                        submit_btn = gr.Button(value="发送", variant="primary")
-                with gr.Row(elem_id="buttons") as button_row:
-                    clear_btn = gr.Button(value="🗑️  清除", interactive=False)
-
-        if not embed_mode:
-            gr.Markdown(tos_markdown_cn)
-
-        btn_list = [clear_btn]
-
-        clear_btn.click(
-            clear_history,
-            [chatbot, imagebox, textbox],
-            [chatbot, imagebox, textbox] + btn_list,
-        )
-
-        textbox.submit(
-            handle_llava_request,
-            [textbox, imagebox, max_output_tokens, chatbot],
-            [chatbot] + btn_list,
-        )
-
-        submit_btn.click(
-            handle_llava_request,
-            [textbox, imagebox, max_output_tokens, chatbot],
-            [chatbot] + btn_list,
-        )
-
-    return demo
-
-
-def build_demo(embed_mode, cur_dir=None, concurrency_count=10):
-    textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
-    with gr.Blocks(title="LLaVA", theme=gr.themes.Default(), css=block_css) as demo:
-        state = gr.State()
-
-        if not embed_mode:
-            gr.Markdown(title_markdown)
-
-        with gr.Row():
-            with gr.Column(scale=3):
-                imagebox = gr.Image(type="pil")
-
-                if cur_dir is None:
-                    cur_dir = os.path.dirname(os.path.abspath(__file__))
-                gr.Examples(
-                    examples=[
-                        [f"{cur_dir}/resources/extreme_ironing.jpg", "What is unusual about this image?"],
-                        [
-                            f"{cur_dir}/resources/waterview.jpg",
-                            "What are the things I should be cautious about when I visit here?",
-                        ],
-                    ],
-                    inputs=[imagebox, textbox],
-                )
-
-                with gr.Accordion("Parameters", open=False) as parameter_row:
-                    max_output_tokens = gr.Slider(
-                        minimum=0,
-                        maximum=1024,
-                        value=512,
-                        step=64,
-                        interactive=True,
-                        label="Max output tokens",
-                    )
-
-            with gr.Column(scale=8):
-                chatbot = gr.Chatbot(
-                    elem_id="chatbot",
-                    label="LLaVA Chatbot",
-                    height=650,
-                    layout="panel",
-                )
-                with gr.Row():
-                    with gr.Column(scale=8):
-                        textbox.render()
-                    with gr.Column(scale=1, min_width=50):
-                        submit_btn = gr.Button(value="Send", variant="primary")
-                with gr.Row(elem_id="buttons") as button_row:
-                    clear_btn = gr.Button(value="🗑️  Clear", interactive=False)
-
-        if not embed_mode:
-            gr.Markdown(tos_markdown)
-
-        btn_list = [clear_btn]
-
-        clear_btn.click(
-            clear_history,
-            [chatbot, imagebox, textbox],
-            [chatbot, imagebox, textbox] + btn_list,
-        )
-
-        textbox.submit(
-            handle_llava_request,
-            [textbox, imagebox, max_output_tokens, chatbot],
-            [chatbot] + btn_list,
-        )
-
-        submit_btn.click(
-            handle_llava_request,
-            [textbox, imagebox, max_output_tokens, chatbot],
-            [chatbot] + btn_list,
-        )
-
-    return demo
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    # frontend host and port
-    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int)
-    parser.add_argument("--lang", type=str, default="En")
-
-    # backend worker address
-    parser.add_argument(
-        "--worker-addr", type=str, default="http://localhost:8085", help="The worker address of the LLaVA server."
-    )
-    parser.add_argument("--share", action="store_true")
-    parser.add_argument("--embed", action="store_true")
-    parser.add_argument("--concurrency-count", type=int, default=16)
-
-    args = parser.parse_args()
-    print(args)
-    selectedLang = args.lang
-    if selectedLang == "CN":
-        demo = build_demo_cn(args.embed, concurrency_count=args.concurrency_count)
-    else:
-        demo = build_demo(args.embed, concurrency_count=args.concurrency_count)
-
-    demo.queue(api_open=False).launch(server_name=args.host, server_port=args.port, share=args.share)
diff --git a/VisualQnA/ui/gradio/requirements.txt b/VisualQnA/ui/gradio/requirements.txt
deleted file mode 100644
index 39d06f8dc..000000000
--- a/VisualQnA/ui/gradio/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-gradio
-gradio_client
-requests
diff --git a/VisualQnA/ui/gradio/resources/extreme_ironing.jpg b/VisualQnA/ui/gradio/resources/extreme_ironing.jpg
deleted file mode 100644
index 638b07883..000000000
Binary files a/VisualQnA/ui/gradio/resources/extreme_ironing.jpg and /dev/null differ
diff --git a/VisualQnA/ui/gradio/resources/waterview.jpg b/VisualQnA/ui/gradio/resources/waterview.jpg
deleted file mode 100644
index 6f44ebaba..000000000
Binary files a/VisualQnA/ui/gradio/resources/waterview.jpg and /dev/null differ