diff --git a/examples/AudioQnA/README.md b/examples/AudioQnA/README.md
deleted file mode 100644
index 918a7997..00000000
--- a/examples/AudioQnA/README.md
+++ /dev/null
@@ -1,48 +0,0 @@
-# AudioQnA accuracy Evaluation
-
-## Dataset 
-
-
-We evaluate the ASR accuracy on the test set of librispeech [dataset](andreagasparini/librispeech_test_only), which contains 2620 records of audio and texts.
-
-## Metrics
-
-We evaluate the WER (Word Error Rate) metric of the ASR microservice.
-
-## Evaluation
-
-### Launch ASR microservice
-
-Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
-
-```bash
-git clone https://github.com/opea-project/GenAIComps
-cd GenAIComps
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
-# change the name of model by editing model_name_or_path you want to evaluate
-docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
-```
-
-### Evaluate
-
-Install dependencies:
-
-```
-pip install -r requirements.txt
-```
-
-Evaluate the performance with the LLM:
-```py
-# validate the offline model
-# python offline_evaluate.py
-# validate the online asr microservice accuracy
-python online_evaluate.py
-```
-
-### Performance Result
-Here is the tested result for your reference
-||  WER   |
-| --- |  ----  |
-|whisper-large-v2| 2.87|
-|whisper-large| 2.7	|
-|whisper-medium| 3.45 |
diff --git a/examples/AudioQnA/local_eval.py b/examples/AudioQnA/local_eval.py
deleted file mode 100644
index 1ef7b6df..00000000
--- a/examples/AudioQnA/local_eval.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import torch
-from datasets import load_dataset
-from evaluate import load
-from transformers import WhisperForConditionalGeneration, WhisperProcessor
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-MODEL_NAME = "openai/whisper-large-v2"
-
-librispeech_test_clean = load_dataset(
-    "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
-)
-processor = WhisperProcessor.from_pretrained(MODEL_NAME)
-model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
-
-
-def map_to_pred(batch):
-    audio = batch["audio"]
-    input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
-    batch["reference"] = processor.tokenizer._normalize(batch["text"])
-
-    with torch.no_grad():
-        predicted_ids = model.generate(input_features.to(device))[0]
-    transcription = processor.decode(predicted_ids)
-    batch["prediction"] = processor.tokenizer._normalize(transcription)
-    return batch
-
-
-result = librispeech_test_clean.map(map_to_pred)
-
-wer = load("wer")
-print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
diff --git a/examples/AudioQnA/online_eval.py b/examples/AudioQnA/online_eval.py
deleted file mode 100644
index a7854c95..00000000
--- a/examples/AudioQnA/online_eval.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import base64
-import json
-
-import requests
-import torch
-from datasets import load_dataset
-from evaluate import load
-from pydub import AudioSegment
-from transformers import WhisperForConditionalGeneration, WhisperProcessor
-
-MODEL_NAME = "openai/whisper-large-v2"
-processor = WhisperProcessor.from_pretrained(MODEL_NAME)
-
-librispeech_test_clean = load_dataset(
-    "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
-)
-
-
-def map_to_pred(batch):
-    batch["reference"] = processor.tokenizer._normalize(batch["text"])
-
-    file_path = batch["file"]
-    # process the file_path
-    pidx = file_path.rfind("/")
-    sidx = file_path.rfind(".")
-
-    file_path_prefix = file_path[: pidx + 1]
-    file_path_suffix = file_path[sidx:]
-    file_path_mid = file_path[pidx + 1 : sidx]
-    splits = file_path_mid.split("-")
-    file_path_mid = f"LibriSpeech/test-clean/{splits[0]}/{splits[1]}/{file_path_mid}"
-
-    file_path = file_path_prefix + file_path_mid + file_path_suffix
-
-    audio = AudioSegment.from_file(file_path)
-    audio.export("tmp.wav")
-    with open("tmp.wav", "rb") as f:
-        test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
-
-    inputs = {"audio": test_audio_base64_str}
-    endpoint = "http://localhost:7066/v1/asr"
-    response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
-
-    result_str = response.json()["asr_result"]
-
-    batch["prediction"] = processor.tokenizer._normalize(result_str)
-    return batch
-
-
-result = librispeech_test_clean.map(map_to_pred)
-
-wer = load("wer")
-print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
diff --git a/examples/AudioQnA/requirements.txt b/examples/AudioQnA/requirements.txt
deleted file mode 100644
index c3f6c51a..00000000
--- a/examples/AudioQnA/requirements.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-datasets
-evaluate
-jiwer
-librosa
-pydub
-soundfile
-torch
-transformers
diff --git a/examples/CodeGen/README.md b/examples/CodeGen/README.md
deleted file mode 100644
index 5d118967..00000000
--- a/examples/CodeGen/README.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# CodeGen accuracy Evaluation
-
-## Evaluation Framework
-We evaluate accuracy by [bigcode-evaluation-harness](https://github.com/bigcode-project/bigcode-evaluation-harness). It  is a framework for the evaluation of code generation models. 
-
-
-## Evaluation FAQs
-
-### Launch CodeGen microservice
-Please refer to [CodeGen Examples](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen), follow the guide to deploy CodeGen megeservice.
-
-Use cURL command to test codegen service and ensure that it has started properly
-```bash
-export CODEGEN_ENDPOINT = "http://${your_ip}:7778/v1/codegen"
-curl $CODEGEN_ENDPOINT \
-    -H "Content-Type: application/json" \
-    -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
-
-```
-
-
-### Generation and Evaluation 
-
-For evaluating the models on coding tasks or specifically coding LLMs, we follow the [bigcode-evaluation-harness](https://github.com/bigcode-project/bigcode-evaluation-harness) and provide the command line usage and function call usage. [HumanEval](https://huggingface.co/datasets/openai_humaneval), [HumanEval+](https://huggingface.co/datasets/evalplus/humanevalplus), [InstructHumanEval](https://huggingface.co/datasets/codeparrot/instructhumaneval), [APPS](https://huggingface.co/datasets/codeparrot/apps), [MBPP](https://huggingface.co/datasets/mbpp), [MBPP+](https://huggingface.co/datasets/evalplus/mbppplus), and [DS-1000](https://github.com/HKUNLP/DS-1000/) for both completion (left-to-right) and insertion (FIM) mode are available.
-#### command line usage
-
-```shell
-cd evals/evaluation/bigcode_evaluation_harness/examples
-python main.py --model Qwen/CodeQwen1.5-7B-Chat \ 
-  --tasks humaneval \
-  --codegen_url $CODEGEN_ENDPOINT \
-  --max_length_generation 2048 \
-  --batch_size 1  \
-  --save_generations \
-  --save_references \
-  --allow_code_execution
-```
-
-***Note:*** Currently, our framework is designed to execute tasks in full. To ensure the accuracy of results, we advise against using the 'limit' or 'limit_start' parameters to restrict the number of test samples.
-
-
-### accuracy Result
-Here is the tested result for your reference
-```json
-{
-  "humaneval": {
-    "pass@1": 0.7195121951219512
-  },
-  "config": {
-    "prefix": "",
-    "do_sample": true,
-    "temperature": 0.2,
-    "top_k": 0,
-    "top_p": 0.95,
-    "n_samples": 1,
-    "eos": "<|endoftext|>",
-    "seed": 0,
-    "model": "Qwen/CodeQwen1.5-7B-Chat",
-    "modeltype": "causal",
-    "peft_model": null,
-    "revision": null,
-    "use_auth_token": false,
-    "trust_remote_code": false,
-    "tasks": "humaneval",
-    "instruction_tokens": null,
-    "batch_size": 1,
-    "max_length_generation": 2048,
-    "precision": "fp32",
-    "load_in_8bit": false,
-    "load_in_4bit": false,
-    "left_padding": false,
-    "limit": null,
-    "limit_start": 0,
-    "save_every_k_tasks": -1,
-    "postprocess": true,
-    "allow_code_execution": true,
-    "generation_only": false,
-    "load_generations_path": null,
-    "load_data_path": null,
-    "metric_output_path": "evaluation_results.json",
-    "save_generations": true,
-    "load_generations_intermediate_paths": null,
-    "save_generations_path": "generations.json",
-    "save_references": true,
-    "save_references_path": "references.json",
-    "prompt": "prompt",
-    "max_memory_per_gpu": null,
-    "check_references": false,
-    "codegen_url": "http://192.168.123.104:31234/v1/codegen"
-  }
-}
-```
diff --git a/examples/FaqGen/README.md b/examples/FaqGen/README.md
deleted file mode 100644
index 70d66744..00000000
--- a/examples/FaqGen/README.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# FaqGen Performance Evaluation
-
-## Dataset 
-We evaluate performance on QA dataset [Squad_v2](https://huggingface.co/datasets/rajpurkar/squad_v2). Generate FAQs on "context" columns in validation dataset, which contains 1204 unique records.
-
-First download dataset and put at "./data".
-
-Extract unique "context" columns, which will be save to 'data/sqv2_context.json':
-```
-python get_context.py
-```
-
-## Generate FAQs
-
-### Launch FaQGen microservice
-Please refer to [FaQGen microservice](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/faq-generation/tgi), set up an microservice endpoint.
-```
-export FAQ_ENDPOINT = "http://${your_ip}:9000/v1/faqgen"
-```
-
-### Generate FAQs with microservice
-Use the microservice endpoint to generate FAQs for dataset.
-```
-python generate_FAQ.py
-```
-
-Post-process the output to get the right data, which will be save to 'data/sqv2_faq.json'.
-```
-python post_process_FAQ.py
-```
-
-## Evaluate with Ragas
-
-### Launch TGI service
-We use "mistralai/Mixtral-8x7B-Instruct-v0.1" as LLM referee to evaluate the model. First we need to launch a LLM endpoint on Gaudi.
-```
-export HUGGING_FACE_HUB_TOKEN="your_huggingface_token"
-bash launch_tgi.sh
-```
-Get the endpoint:
-```
-export LLM_ENDPOINT = "http://${ip_address}:8082"
-```
-
-Verify the service:
-```bash
-curl http://${ip_address}:8082/generate \
-    -X POST \
-    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
-    -H 'Content-Type: application/json'
-```
-
-### Evaluate
-evaluate the performance with the LLM:
-```
-python evaluate.py
-```
-
-### Performance Result
-Here is the tested result for your reference
-|  answer_relevancy   | faithfulness  | context_utilization | reference_free_rubrics_score |
-|  ----  | ----  |----  |----  |
-| 0.7191	| 0.9681	| 0.8964 |	4.4125|
diff --git a/examples/FaqGen/evaluate.py b/examples/FaqGen/evaluate.py
deleted file mode 100644
index a082d093..00000000
--- a/examples/FaqGen/evaluate.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import json
-import os
-
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
-
-from evals.metrics.ragas import RagasMetric
-
-llm_endpoint = os.getenv("LLM_ENDPOINT", "http://0.0.0.0:8082")
-
-f = open("data/sqv2_context.json", "r")
-sqv2_context = json.load(f)
-
-f = open("data/sqv2_faq.json", "r")
-sqv2_faq = json.load(f)
-
-templ = """Create a concise FAQs (frequently asked questions and answers) for following text:
-        TEXT: {text}
-        Do not use any prefix or suffix to the FAQ.
-    """
-
-number = 1204
-question = []
-answer = []
-ground_truth = ["None"] * number
-contexts = []
-for i in range(number):
-    inputs = sqv2_context[str(i)]
-    inputs_faq = templ.format_map({"text": inputs})
-    actual_output = sqv2_faq[str(i)]
-
-    question.append(inputs_faq)
-    answer.append(actual_output)
-    contexts.append([inputs_faq])
-
-embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5")
-metrics_faq = ["answer_relevancy", "faithfulness", "context_utilization", "reference_free_rubrics_score"]
-metric = RagasMetric(threshold=0.5, model=llm_endpoint, embeddings=embeddings, metrics=metrics_faq)
-
-test_case = {"question": question, "answer": answer, "ground_truth": ground_truth, "contexts": contexts}
-
-metric.measure(test_case)
-print(metric.score)
diff --git a/examples/FaqGen/generate_FAQ.py b/examples/FaqGen/generate_FAQ.py
deleted file mode 100644
index 2ed70b9e..00000000
--- a/examples/FaqGen/generate_FAQ.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import json
-import os
-import time
-
-import requests
-
-llm_endpoint = os.getenv("FAQ_ENDPOINT", "http://0.0.0.0:9000/v1/faqgen")
-
-f = open("data/sqv2_context.json", "r")
-sqv2_context = json.load(f)
-
-start_time = time.time()
-headers = {"Content-Type": "application/json"}
-for i in range(1204):
-    start_time_tmp = time.time()
-    print(i)
-    inputs = sqv2_context[str(i)]
-    data = {"query": inputs, "max_new_tokens": 128}
-    response = requests.post(llm_endpoint, json=data, headers=headers)
-    f = open(f"data/result/sqv2_faq_{i}", "w")
-    f.write(inputs)
-    f.write(str(response.content, encoding="utf-8"))
-    f.close()
-    print(f"Cost {time.time()-start_time_tmp} seconds")
-print(f"\n Finished! \n Totally Cost {time.time()-start_time} seconds\n")
diff --git a/examples/FaqGen/get_context.py b/examples/FaqGen/get_context.py
deleted file mode 100644
index 8cb73a05..00000000
--- a/examples/FaqGen/get_context.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import json
-import os
-
-import pandas as pd
-
-data_path = "./data"
-data = pd.read_parquet(os.path.join(data_path, "squad_v2/squad_v2/validation-00000-of-00001.parquet"))
-sq_context = list(data["context"].unique())
-sq_context_d = dict()
-for i in range(len(sq_context)):
-    sq_context_d[i] = sq_context[i]
-
-with open(os.path.join(data_path, "sqv2_context.json"), "w") as outfile:
-    json.dump(sq_context_d, outfile)
diff --git a/examples/FaqGen/launch_tgi.sh b/examples/FaqGen/launch_tgi.sh
deleted file mode 100644
index b3e04bbb..00000000
--- a/examples/FaqGen/launch_tgi.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-max_input_tokens=3072
-max_total_tokens=4096
-port_number=8082
-model_name="mistralai/Mixtral-8x7B-Instruct-v0.1"
-volume="./data"
-docker run -it --rm \
-    --name="tgi_Mixtral" \
-    -p $port_number:80 \
-    -v $volume:/data \
-    --runtime=habana \
-    --restart always \
-    -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \
-    -e HABANA_VISIBLE_DEVICES=all \
-    -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-    -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true \
-    --cap-add=sys_nice \
-    --ipc=host \
-    -e HTTPS_PROXY=$https_proxy \
-    -e HTTP_PROXY=$https_proxy \
-    ghcr.io/huggingface/tgi-gaudi:2.0.1 \
-    --model-id $model_name \
-    --max-input-tokens $max_input_tokens \
-    --max-total-tokens $max_total_tokens \
-    --sharded true \
-    --num-shard 2
diff --git a/examples/FaqGen/post_process_FAQ.py b/examples/FaqGen/post_process_FAQ.py
deleted file mode 100644
index 83e6b835..00000000
--- a/examples/FaqGen/post_process_FAQ.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import json
-
-faq_dict = {}
-fails = []
-for i in range(1204):
-    data = open(f"data/result/sqv2_faq_{i}", "r").readlines()
-    result = data[-6][6:]
-    # print(result)
-    if "LLMChain/final_output" not in result:
-        print(f"error1: fail for {i}")
-        fails.append(i)
-        continue
-    try:
-        result2 = json.loads(result)
-        result3 = result2["ops"][0]["value"]["text"]
-        faq_dict[str(i)] = result3
-    except:
-        print(f"error2: fail for {i}")
-        fails.append(i)
-        continue
-with open("data/sqv2_faq.json", "w") as outfile:
-    json.dump(faq_dict, outfile)
-print("Failure index:")
-print(fails)