From 04fe29e9e2503f07c18589bf3ad5f1fb2b80a1c7 Mon Sep 17 00:00:00 2001
From: Xinyao Wang <xinyao.wang@intel.com>
Date: Tue, 3 Dec 2024 10:56:11 +0800
Subject: [PATCH 1/2] refactor example

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
---
 comps/3rd_parties/tgi/docker/README.md        | 15 +++++
 comps/3rd_parties/tgi/kubernetes/README.md    |  0
 .../vllm/docker}/Dockerfile.intel_gpu         |  0
 .../vllm/docker}/build_docker_vllm.sh         |  0
 .../docker}/build_docker_vllm_openvino.sh     |  0
 .../vllm/docker}/launch_vllm_service.sh       |  0
 .../docker}/launch_vllm_service_openvino.sh   |  0
 comps/3rd_parties/vllm/kubernetes/README.md   |  0
 .../faq-generation_tgi_langchain.yaml}        |  0
 .../faq-generation_vllm_langchain.yaml}       |  0
 .../summarization_tgi_langchain.yaml}         |  0
 .../summarization_vllm_langchain.yaml}        |  0
 .../text-generation_native_langchain.yaml}    |  0
 .../text-generation_native_llama_index.yaml}  |  0
 .../text-generation_predictionguard.yaml}     |  0
 .../docker_compose/text-generation_tgi.yaml}  |  0
 .../text-generation_vllm_langchain.yaml}      |  0
 .../text-generation_vllm_llama_index.yaml}    |  0
 comps/llms/deployment/kubernetes/README.md    |  0
 .../faq-generation/tgi/langchain/Dockerfile   |  0
 .../faq-generation/tgi/langchain/README.md    |  0
 .../faq-generation/tgi/langchain/__init__.py  |  0
 .../tgi/langchain/entrypoint.sh               |  0
 .../faq-generation/tgi/langchain/llm.py       |  0
 .../tgi/langchain/requirements-runtime.txt    |  0
 .../tgi/langchain/requirements.txt            |  0
 .../faq-generation/vllm/langchain/Dockerfile  |  0
 .../faq-generation/vllm/langchain/README.md   |  0
 .../faq-generation/vllm/langchain/__init__.py |  0
 .../vllm/langchain/entrypoint.sh              |  0
 .../faq-generation/vllm/langchain/llm.py      |  0
 .../vllm/langchain/requirements-runtime.txt   |  0
 .../vllm/langchain/requirements.txt           |  0
 comps/llms/{ => src}/requirements.txt         |  0
 .../summarization/tgi/langchain/Dockerfile    |  0
 .../summarization/tgi/langchain/README.md     |  0
 .../summarization/tgi/langchain/__init__.py   |  0
 .../summarization/tgi/langchain/entrypoint.sh |  0
 .../summarization/tgi/langchain/llm.py        |  0
 .../tgi/langchain/requirements-runtime.txt    |  0
 .../tgi/langchain/requirements.txt            |  0
 .../summarization/vllm/langchain/Dockerfile   |  0
 .../summarization/vllm/langchain/README.md    |  0
 .../summarization/vllm/langchain/__init__.py  |  0
 .../vllm/langchain/entrypoint.sh              |  0
 .../summarization/vllm/langchain/llm.py       |  0
 .../vllm/langchain/requirements-runtime.txt   |  0
 .../vllm/langchain/requirements.txt           |  0
 .../llms/{ => src}/text-generation/README.md  |  0
 .../native/langchain/Dockerfile               |  0
 .../native/langchain/README.md                |  0
 .../text-generation/native/langchain/llm.py   |  0
 .../native/langchain/requirements.txt         |  0
 .../native/langchain/template.py              |  0
 .../text-generation/native/langchain/utils.py |  0
 .../native/llama_index/Dockerfile             |  0
 .../native/llama_index/README.md              |  0
 .../text-generation/native/llama_index/llm.py |  0
 .../native/llama_index/requirements.txt       |  0
 .../native/llama_index/template.py            |  0
 .../native/llama_index/utils.py               |  0
 .../ollama/langchain/Dockerfile               |  0
 .../ollama/langchain/README.md                |  0
 .../ollama/langchain/__init__.py              |  0
 .../ollama/langchain/entrypoint.sh            |  0
 .../text-generation/ollama/langchain/llm.py   |  0
 .../ollama/langchain/requirements-runtime.txt |  0
 .../ollama/langchain/requirements.txt         |  0
 .../predictionguard/Dockerfile                |  0
 .../text-generation/predictionguard/README.md |  0
 .../predictionguard/__init__.py               |  0
 .../predictionguard/entrypoint.sh             |  0
 .../predictionguard/llm_predictionguard.py    |  0
 .../predictionguard/requirements.txt          |  0
 .../{ => src}/text-generation/tgi/Dockerfile  |  0
 .../{ => src}/text-generation/tgi/README.md   | 23 ++-----
 .../{ => src}/text-generation/tgi/__init__.py |  0
 .../text-generation/tgi/entrypoint.sh         |  0
 .../text-generation/tgi/launch_tgi_service.sh |  0
 .../llms/{ => src}/text-generation/tgi/llm.py |  0
 .../tgi/requirements-runtime.txt              |  0
 .../text-generation/tgi/requirements.txt      |  0
 .../{ => src}/text-generation/tgi/template.py |  0
 .../text-generation/vllm/langchain/Dockerfile |  0
 .../text-generation/vllm/langchain/README.md  |  0
 .../langchain/build_docker_microservice.sh    |  0
 .../vllm/langchain/entrypoint.sh              |  0
 .../vllm/langchain/launch_microservice.sh     |  0
 .../text-generation/vllm/langchain/llm.py     |  0
 .../text-generation/vllm/langchain/query.sh   |  0
 .../vllm/langchain/requirements-runtime.txt   |  0
 .../vllm/langchain/requirements.txt           |  0
 .../vllm/langchain/template.py                |  0
 .../vllm/llama_index/Dockerfile               |  0
 .../vllm/llama_index/README.md                |  0
 .../llama_index/build_docker_microservice.sh  |  0
 .../vllm/llama_index/entrypoint.sh            |  0
 .../vllm/llama_index/launch_microservice.sh   |  0
 .../text-generation/vllm/llama_index/llm.py   |  0
 .../text-generation/vllm/llama_index/query.sh |  0
 .../vllm/llama_index/requirements-runtime.txt |  0
 .../vllm/llama_index/requirements.txt         |  0
 comps/llms/{ => src}/utils/lm-eval/Dockerfile |  0
 comps/llms/{ => src}/utils/lm-eval/README.md  |  0
 .../{ => src}/utils/lm-eval/requirements.txt  |  0
 .../{ => src}/utils/lm-eval/self_hosted_hf.py |  0
 .../dependency/build_docker_vllm.sh           | 43 -------------
 .../dependency/build_docker_vllm_openvino.sh  | 10 ---
 .../dependency/launch_vllm_service.sh         | 44 -------------
 .../launch_vllm_service_openvino.sh           | 61 -------------------
 110 files changed, 20 insertions(+), 176 deletions(-)
 create mode 100644 comps/3rd_parties/tgi/docker/README.md
 create mode 100644 comps/3rd_parties/tgi/kubernetes/README.md
 rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/Dockerfile.intel_gpu (100%)
 rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/build_docker_vllm.sh (100%)
 rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/build_docker_vllm_openvino.sh (100%)
 rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/launch_vllm_service.sh (100%)
 rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/launch_vllm_service_openvino.sh (100%)
 create mode 100644 comps/3rd_parties/vllm/kubernetes/README.md
 rename comps/llms/{faq-generation/tgi/langchain/docker_compose_llm.yaml => deployment/docker_compose/faq-generation_tgi_langchain.yaml} (100%)
 rename comps/llms/{faq-generation/vllm/langchain/docker_compose_llm.yaml => deployment/docker_compose/faq-generation_vllm_langchain.yaml} (100%)
 rename comps/llms/{summarization/tgi/langchain/docker_compose_llm.yaml => deployment/docker_compose/summarization_tgi_langchain.yaml} (100%)
 rename comps/llms/{summarization/vllm/langchain/docker_compose_llm.yaml => deployment/docker_compose/summarization_vllm_langchain.yaml} (100%)
 rename comps/llms/{text-generation/native/langchain/docker_compose_llm.yaml => deployment/docker_compose/text-generation_native_langchain.yaml} (100%)
 rename comps/llms/{text-generation/native/llama_index/docker_compose_llm.yaml => deployment/docker_compose/text-generation_native_llama_index.yaml} (100%)
 rename comps/llms/{text-generation/predictionguard/docker_compose_llm.yaml => deployment/docker_compose/text-generation_predictionguard.yaml} (100%)
 rename comps/llms/{text-generation/tgi/docker_compose_llm.yaml => deployment/docker_compose/text-generation_tgi.yaml} (100%)
 rename comps/llms/{text-generation/vllm/langchain/docker_compose_llm.yaml => deployment/docker_compose/text-generation_vllm_langchain.yaml} (100%)
 rename comps/llms/{text-generation/vllm/llama_index/docker_compose_llm.yaml => deployment/docker_compose/text-generation_vllm_llama_index.yaml} (100%)
 create mode 100644 comps/llms/deployment/kubernetes/README.md
 rename comps/llms/{ => src}/faq-generation/tgi/langchain/Dockerfile (100%)
 rename comps/llms/{ => src}/faq-generation/tgi/langchain/README.md (100%)
 rename comps/llms/{ => src}/faq-generation/tgi/langchain/__init__.py (100%)
 rename comps/llms/{ => src}/faq-generation/tgi/langchain/entrypoint.sh (100%)
 rename comps/llms/{ => src}/faq-generation/tgi/langchain/llm.py (100%)
 rename comps/llms/{ => src}/faq-generation/tgi/langchain/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/faq-generation/tgi/langchain/requirements.txt (100%)
 rename comps/llms/{ => src}/faq-generation/vllm/langchain/Dockerfile (100%)
 rename comps/llms/{ => src}/faq-generation/vllm/langchain/README.md (100%)
 rename comps/llms/{ => src}/faq-generation/vllm/langchain/__init__.py (100%)
 rename comps/llms/{ => src}/faq-generation/vllm/langchain/entrypoint.sh (100%)
 rename comps/llms/{ => src}/faq-generation/vllm/langchain/llm.py (100%)
 rename comps/llms/{ => src}/faq-generation/vllm/langchain/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/faq-generation/vllm/langchain/requirements.txt (100%)
 rename comps/llms/{ => src}/requirements.txt (100%)
 rename comps/llms/{ => src}/summarization/tgi/langchain/Dockerfile (100%)
 rename comps/llms/{ => src}/summarization/tgi/langchain/README.md (100%)
 rename comps/llms/{ => src}/summarization/tgi/langchain/__init__.py (100%)
 rename comps/llms/{ => src}/summarization/tgi/langchain/entrypoint.sh (100%)
 rename comps/llms/{ => src}/summarization/tgi/langchain/llm.py (100%)
 rename comps/llms/{ => src}/summarization/tgi/langchain/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/summarization/tgi/langchain/requirements.txt (100%)
 rename comps/llms/{ => src}/summarization/vllm/langchain/Dockerfile (100%)
 rename comps/llms/{ => src}/summarization/vllm/langchain/README.md (100%)
 rename comps/llms/{ => src}/summarization/vllm/langchain/__init__.py (100%)
 rename comps/llms/{ => src}/summarization/vllm/langchain/entrypoint.sh (100%)
 rename comps/llms/{ => src}/summarization/vllm/langchain/llm.py (100%)
 rename comps/llms/{ => src}/summarization/vllm/langchain/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/summarization/vllm/langchain/requirements.txt (100%)
 rename comps/llms/{ => src}/text-generation/README.md (100%)
 rename comps/llms/{ => src}/text-generation/native/langchain/Dockerfile (100%)
 rename comps/llms/{ => src}/text-generation/native/langchain/README.md (100%)
 rename comps/llms/{ => src}/text-generation/native/langchain/llm.py (100%)
 rename comps/llms/{ => src}/text-generation/native/langchain/requirements.txt (100%)
 rename comps/llms/{ => src}/text-generation/native/langchain/template.py (100%)
 rename comps/llms/{ => src}/text-generation/native/langchain/utils.py (100%)
 rename comps/llms/{ => src}/text-generation/native/llama_index/Dockerfile (100%)
 rename comps/llms/{ => src}/text-generation/native/llama_index/README.md (100%)
 rename comps/llms/{ => src}/text-generation/native/llama_index/llm.py (100%)
 rename comps/llms/{ => src}/text-generation/native/llama_index/requirements.txt (100%)
 rename comps/llms/{ => src}/text-generation/native/llama_index/template.py (100%)
 rename comps/llms/{ => src}/text-generation/native/llama_index/utils.py (100%)
 rename comps/llms/{ => src}/text-generation/ollama/langchain/Dockerfile (100%)
 rename comps/llms/{ => src}/text-generation/ollama/langchain/README.md (100%)
 rename comps/llms/{ => src}/text-generation/ollama/langchain/__init__.py (100%)
 rename comps/llms/{ => src}/text-generation/ollama/langchain/entrypoint.sh (100%)
 rename comps/llms/{ => src}/text-generation/ollama/langchain/llm.py (100%)
 rename comps/llms/{ => src}/text-generation/ollama/langchain/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/text-generation/ollama/langchain/requirements.txt (100%)
 rename comps/llms/{ => src}/text-generation/predictionguard/Dockerfile (100%)
 rename comps/llms/{ => src}/text-generation/predictionguard/README.md (100%)
 rename comps/llms/{ => src}/text-generation/predictionguard/__init__.py (100%)
 rename comps/llms/{ => src}/text-generation/predictionguard/entrypoint.sh (100%)
 rename comps/llms/{ => src}/text-generation/predictionguard/llm_predictionguard.py (100%)
 rename comps/llms/{ => src}/text-generation/predictionguard/requirements.txt (100%)
 rename comps/llms/{ => src}/text-generation/tgi/Dockerfile (100%)
 rename comps/llms/{ => src}/text-generation/tgi/README.md (87%)
 rename comps/llms/{ => src}/text-generation/tgi/__init__.py (100%)
 rename comps/llms/{ => src}/text-generation/tgi/entrypoint.sh (100%)
 rename comps/llms/{ => src}/text-generation/tgi/launch_tgi_service.sh (100%)
 rename comps/llms/{ => src}/text-generation/tgi/llm.py (100%)
 rename comps/llms/{ => src}/text-generation/tgi/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/text-generation/tgi/requirements.txt (100%)
 rename comps/llms/{ => src}/text-generation/tgi/template.py (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/Dockerfile (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/README.md (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/build_docker_microservice.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/entrypoint.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/launch_microservice.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/llm.py (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/query.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/requirements.txt (100%)
 rename comps/llms/{ => src}/text-generation/vllm/langchain/template.py (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/Dockerfile (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/README.md (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/build_docker_microservice.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/entrypoint.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/launch_microservice.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/llm.py (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/query.sh (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/requirements-runtime.txt (100%)
 rename comps/llms/{ => src}/text-generation/vllm/llama_index/requirements.txt (100%)
 rename comps/llms/{ => src}/utils/lm-eval/Dockerfile (100%)
 rename comps/llms/{ => src}/utils/lm-eval/README.md (100%)
 rename comps/llms/{ => src}/utils/lm-eval/requirements.txt (100%)
 rename comps/llms/{ => src}/utils/lm-eval/self_hosted_hf.py (100%)
 delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh
 delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh
 delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh
 delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh

diff --git a/comps/3rd_parties/tgi/docker/README.md b/comps/3rd_parties/tgi/docker/README.md
new file mode 100644
index 0000000000..9b7b5f0bfa
--- /dev/null
+++ b/comps/3rd_parties/tgi/docker/README.md
@@ -0,0 +1,15 @@
+## Launch TGI endpoint
+
+```bash
+export HF_TOKEN=${your_hf_api_token}
+docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
+```
+
+## Verify the TGI Service
+
+```bash
+curl http://${your_ip}:8008/v1/chat/completions \
+     -X POST \
+     -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
+     -H 'Content-Type: application/json'
+```
diff --git a/comps/3rd_parties/tgi/kubernetes/README.md b/comps/3rd_parties/tgi/kubernetes/README.md
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu b/comps/3rd_parties/vllm/docker/Dockerfile.intel_gpu
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu
rename to comps/3rd_parties/vllm/docker/Dockerfile.intel_gpu
diff --git a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh b/comps/3rd_parties/vllm/docker/build_docker_vllm.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh
rename to comps/3rd_parties/vllm/docker/build_docker_vllm.sh
diff --git a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm_openvino.sh b/comps/3rd_parties/vllm/docker/build_docker_vllm_openvino.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm_openvino.sh
rename to comps/3rd_parties/vllm/docker/build_docker_vllm_openvino.sh
diff --git a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh b/comps/3rd_parties/vllm/docker/launch_vllm_service.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh
rename to comps/3rd_parties/vllm/docker/launch_vllm_service.sh
diff --git a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service_openvino.sh b/comps/3rd_parties/vllm/docker/launch_vllm_service_openvino.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service_openvino.sh
rename to comps/3rd_parties/vllm/docker/launch_vllm_service_openvino.sh
diff --git a/comps/3rd_parties/vllm/kubernetes/README.md b/comps/3rd_parties/vllm/kubernetes/README.md
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/comps/llms/faq-generation/tgi/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/faq-generation_tgi_langchain.yaml
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/faq-generation_tgi_langchain.yaml
diff --git a/comps/llms/faq-generation/vllm/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/faq-generation_vllm_langchain.yaml
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/faq-generation_vllm_langchain.yaml
diff --git a/comps/llms/summarization/tgi/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/summarization_tgi_langchain.yaml
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/summarization_tgi_langchain.yaml
diff --git a/comps/llms/summarization/vllm/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/summarization_vllm_langchain.yaml
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/summarization_vllm_langchain.yaml
diff --git a/comps/llms/text-generation/native/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml
similarity index 100%
rename from comps/llms/text-generation/native/langchain/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml
diff --git a/comps/llms/text-generation/native/llama_index/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml
similarity index 100%
rename from comps/llms/text-generation/native/llama_index/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml
diff --git a/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml
similarity index 100%
rename from comps/llms/text-generation/predictionguard/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml
diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_tgi.yaml
similarity index 100%
rename from comps/llms/text-generation/tgi/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/text-generation_tgi.yaml
diff --git a/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml
diff --git a/comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml
rename to comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml
diff --git a/comps/llms/deployment/kubernetes/README.md b/comps/llms/deployment/kubernetes/README.md
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/comps/llms/faq-generation/tgi/langchain/Dockerfile b/comps/llms/src/faq-generation/tgi/langchain/Dockerfile
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/Dockerfile
rename to comps/llms/src/faq-generation/tgi/langchain/Dockerfile
diff --git a/comps/llms/faq-generation/tgi/langchain/README.md b/comps/llms/src/faq-generation/tgi/langchain/README.md
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/README.md
rename to comps/llms/src/faq-generation/tgi/langchain/README.md
diff --git a/comps/llms/faq-generation/tgi/langchain/__init__.py b/comps/llms/src/faq-generation/tgi/langchain/__init__.py
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/__init__.py
rename to comps/llms/src/faq-generation/tgi/langchain/__init__.py
diff --git a/comps/llms/faq-generation/tgi/langchain/entrypoint.sh b/comps/llms/src/faq-generation/tgi/langchain/entrypoint.sh
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/entrypoint.sh
rename to comps/llms/src/faq-generation/tgi/langchain/entrypoint.sh
diff --git a/comps/llms/faq-generation/tgi/langchain/llm.py b/comps/llms/src/faq-generation/tgi/langchain/llm.py
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/llm.py
rename to comps/llms/src/faq-generation/tgi/langchain/llm.py
diff --git a/comps/llms/faq-generation/tgi/langchain/requirements-runtime.txt b/comps/llms/src/faq-generation/tgi/langchain/requirements-runtime.txt
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/requirements-runtime.txt
rename to comps/llms/src/faq-generation/tgi/langchain/requirements-runtime.txt
diff --git a/comps/llms/faq-generation/tgi/langchain/requirements.txt b/comps/llms/src/faq-generation/tgi/langchain/requirements.txt
similarity index 100%
rename from comps/llms/faq-generation/tgi/langchain/requirements.txt
rename to comps/llms/src/faq-generation/tgi/langchain/requirements.txt
diff --git a/comps/llms/faq-generation/vllm/langchain/Dockerfile b/comps/llms/src/faq-generation/vllm/langchain/Dockerfile
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/Dockerfile
rename to comps/llms/src/faq-generation/vllm/langchain/Dockerfile
diff --git a/comps/llms/faq-generation/vllm/langchain/README.md b/comps/llms/src/faq-generation/vllm/langchain/README.md
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/README.md
rename to comps/llms/src/faq-generation/vllm/langchain/README.md
diff --git a/comps/llms/faq-generation/vllm/langchain/__init__.py b/comps/llms/src/faq-generation/vllm/langchain/__init__.py
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/__init__.py
rename to comps/llms/src/faq-generation/vllm/langchain/__init__.py
diff --git a/comps/llms/faq-generation/vllm/langchain/entrypoint.sh b/comps/llms/src/faq-generation/vllm/langchain/entrypoint.sh
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/entrypoint.sh
rename to comps/llms/src/faq-generation/vllm/langchain/entrypoint.sh
diff --git a/comps/llms/faq-generation/vllm/langchain/llm.py b/comps/llms/src/faq-generation/vllm/langchain/llm.py
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/llm.py
rename to comps/llms/src/faq-generation/vllm/langchain/llm.py
diff --git a/comps/llms/faq-generation/vllm/langchain/requirements-runtime.txt b/comps/llms/src/faq-generation/vllm/langchain/requirements-runtime.txt
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/requirements-runtime.txt
rename to comps/llms/src/faq-generation/vllm/langchain/requirements-runtime.txt
diff --git a/comps/llms/faq-generation/vllm/langchain/requirements.txt b/comps/llms/src/faq-generation/vllm/langchain/requirements.txt
similarity index 100%
rename from comps/llms/faq-generation/vllm/langchain/requirements.txt
rename to comps/llms/src/faq-generation/vllm/langchain/requirements.txt
diff --git a/comps/llms/requirements.txt b/comps/llms/src/requirements.txt
similarity index 100%
rename from comps/llms/requirements.txt
rename to comps/llms/src/requirements.txt
diff --git a/comps/llms/summarization/tgi/langchain/Dockerfile b/comps/llms/src/summarization/tgi/langchain/Dockerfile
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/Dockerfile
rename to comps/llms/src/summarization/tgi/langchain/Dockerfile
diff --git a/comps/llms/summarization/tgi/langchain/README.md b/comps/llms/src/summarization/tgi/langchain/README.md
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/README.md
rename to comps/llms/src/summarization/tgi/langchain/README.md
diff --git a/comps/llms/summarization/tgi/langchain/__init__.py b/comps/llms/src/summarization/tgi/langchain/__init__.py
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/__init__.py
rename to comps/llms/src/summarization/tgi/langchain/__init__.py
diff --git a/comps/llms/summarization/tgi/langchain/entrypoint.sh b/comps/llms/src/summarization/tgi/langchain/entrypoint.sh
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/entrypoint.sh
rename to comps/llms/src/summarization/tgi/langchain/entrypoint.sh
diff --git a/comps/llms/summarization/tgi/langchain/llm.py b/comps/llms/src/summarization/tgi/langchain/llm.py
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/llm.py
rename to comps/llms/src/summarization/tgi/langchain/llm.py
diff --git a/comps/llms/summarization/tgi/langchain/requirements-runtime.txt b/comps/llms/src/summarization/tgi/langchain/requirements-runtime.txt
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/requirements-runtime.txt
rename to comps/llms/src/summarization/tgi/langchain/requirements-runtime.txt
diff --git a/comps/llms/summarization/tgi/langchain/requirements.txt b/comps/llms/src/summarization/tgi/langchain/requirements.txt
similarity index 100%
rename from comps/llms/summarization/tgi/langchain/requirements.txt
rename to comps/llms/src/summarization/tgi/langchain/requirements.txt
diff --git a/comps/llms/summarization/vllm/langchain/Dockerfile b/comps/llms/src/summarization/vllm/langchain/Dockerfile
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/Dockerfile
rename to comps/llms/src/summarization/vllm/langchain/Dockerfile
diff --git a/comps/llms/summarization/vllm/langchain/README.md b/comps/llms/src/summarization/vllm/langchain/README.md
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/README.md
rename to comps/llms/src/summarization/vllm/langchain/README.md
diff --git a/comps/llms/summarization/vllm/langchain/__init__.py b/comps/llms/src/summarization/vllm/langchain/__init__.py
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/__init__.py
rename to comps/llms/src/summarization/vllm/langchain/__init__.py
diff --git a/comps/llms/summarization/vllm/langchain/entrypoint.sh b/comps/llms/src/summarization/vllm/langchain/entrypoint.sh
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/entrypoint.sh
rename to comps/llms/src/summarization/vllm/langchain/entrypoint.sh
diff --git a/comps/llms/summarization/vllm/langchain/llm.py b/comps/llms/src/summarization/vllm/langchain/llm.py
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/llm.py
rename to comps/llms/src/summarization/vllm/langchain/llm.py
diff --git a/comps/llms/summarization/vllm/langchain/requirements-runtime.txt b/comps/llms/src/summarization/vllm/langchain/requirements-runtime.txt
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/requirements-runtime.txt
rename to comps/llms/src/summarization/vllm/langchain/requirements-runtime.txt
diff --git a/comps/llms/summarization/vllm/langchain/requirements.txt b/comps/llms/src/summarization/vllm/langchain/requirements.txt
similarity index 100%
rename from comps/llms/summarization/vllm/langchain/requirements.txt
rename to comps/llms/src/summarization/vllm/langchain/requirements.txt
diff --git a/comps/llms/text-generation/README.md b/comps/llms/src/text-generation/README.md
similarity index 100%
rename from comps/llms/text-generation/README.md
rename to comps/llms/src/text-generation/README.md
diff --git a/comps/llms/text-generation/native/langchain/Dockerfile b/comps/llms/src/text-generation/native/langchain/Dockerfile
similarity index 100%
rename from comps/llms/text-generation/native/langchain/Dockerfile
rename to comps/llms/src/text-generation/native/langchain/Dockerfile
diff --git a/comps/llms/text-generation/native/langchain/README.md b/comps/llms/src/text-generation/native/langchain/README.md
similarity index 100%
rename from comps/llms/text-generation/native/langchain/README.md
rename to comps/llms/src/text-generation/native/langchain/README.md
diff --git a/comps/llms/text-generation/native/langchain/llm.py b/comps/llms/src/text-generation/native/langchain/llm.py
similarity index 100%
rename from comps/llms/text-generation/native/langchain/llm.py
rename to comps/llms/src/text-generation/native/langchain/llm.py
diff --git a/comps/llms/text-generation/native/langchain/requirements.txt b/comps/llms/src/text-generation/native/langchain/requirements.txt
similarity index 100%
rename from comps/llms/text-generation/native/langchain/requirements.txt
rename to comps/llms/src/text-generation/native/langchain/requirements.txt
diff --git a/comps/llms/text-generation/native/langchain/template.py b/comps/llms/src/text-generation/native/langchain/template.py
similarity index 100%
rename from comps/llms/text-generation/native/langchain/template.py
rename to comps/llms/src/text-generation/native/langchain/template.py
diff --git a/comps/llms/text-generation/native/langchain/utils.py b/comps/llms/src/text-generation/native/langchain/utils.py
similarity index 100%
rename from comps/llms/text-generation/native/langchain/utils.py
rename to comps/llms/src/text-generation/native/langchain/utils.py
diff --git a/comps/llms/text-generation/native/llama_index/Dockerfile b/comps/llms/src/text-generation/native/llama_index/Dockerfile
similarity index 100%
rename from comps/llms/text-generation/native/llama_index/Dockerfile
rename to comps/llms/src/text-generation/native/llama_index/Dockerfile
diff --git a/comps/llms/text-generation/native/llama_index/README.md b/comps/llms/src/text-generation/native/llama_index/README.md
similarity index 100%
rename from comps/llms/text-generation/native/llama_index/README.md
rename to comps/llms/src/text-generation/native/llama_index/README.md
diff --git a/comps/llms/text-generation/native/llama_index/llm.py b/comps/llms/src/text-generation/native/llama_index/llm.py
similarity index 100%
rename from comps/llms/text-generation/native/llama_index/llm.py
rename to comps/llms/src/text-generation/native/llama_index/llm.py
diff --git a/comps/llms/text-generation/native/llama_index/requirements.txt b/comps/llms/src/text-generation/native/llama_index/requirements.txt
similarity index 100%
rename from comps/llms/text-generation/native/llama_index/requirements.txt
rename to comps/llms/src/text-generation/native/llama_index/requirements.txt
diff --git a/comps/llms/text-generation/native/llama_index/template.py b/comps/llms/src/text-generation/native/llama_index/template.py
similarity index 100%
rename from comps/llms/text-generation/native/llama_index/template.py
rename to comps/llms/src/text-generation/native/llama_index/template.py
diff --git a/comps/llms/text-generation/native/llama_index/utils.py b/comps/llms/src/text-generation/native/llama_index/utils.py
similarity index 100%
rename from comps/llms/text-generation/native/llama_index/utils.py
rename to comps/llms/src/text-generation/native/llama_index/utils.py
diff --git a/comps/llms/text-generation/ollama/langchain/Dockerfile b/comps/llms/src/text-generation/ollama/langchain/Dockerfile
similarity index 100%
rename from comps/llms/text-generation/ollama/langchain/Dockerfile
rename to comps/llms/src/text-generation/ollama/langchain/Dockerfile
diff --git a/comps/llms/text-generation/ollama/langchain/README.md b/comps/llms/src/text-generation/ollama/langchain/README.md
similarity index 100%
rename from comps/llms/text-generation/ollama/langchain/README.md
rename to comps/llms/src/text-generation/ollama/langchain/README.md
diff --git a/comps/llms/text-generation/ollama/langchain/__init__.py b/comps/llms/src/text-generation/ollama/langchain/__init__.py
similarity index 100%
rename from comps/llms/text-generation/ollama/langchain/__init__.py
rename to comps/llms/src/text-generation/ollama/langchain/__init__.py
diff --git a/comps/llms/text-generation/ollama/langchain/entrypoint.sh b/comps/llms/src/text-generation/ollama/langchain/entrypoint.sh
similarity index 100%
rename from comps/llms/text-generation/ollama/langchain/entrypoint.sh
rename to comps/llms/src/text-generation/ollama/langchain/entrypoint.sh
diff --git a/comps/llms/text-generation/ollama/langchain/llm.py b/comps/llms/src/text-generation/ollama/langchain/llm.py
similarity index 100%
rename from comps/llms/text-generation/ollama/langchain/llm.py
rename to comps/llms/src/text-generation/ollama/langchain/llm.py
diff --git a/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt b/comps/llms/src/text-generation/ollama/langchain/requirements-runtime.txt
similarity index 100%
rename from comps/llms/text-generation/ollama/langchain/requirements-runtime.txt
rename to comps/llms/src/text-generation/ollama/langchain/requirements-runtime.txt
diff --git a/comps/llms/text-generation/ollama/langchain/requirements.txt b/comps/llms/src/text-generation/ollama/langchain/requirements.txt
similarity index 100%
rename from comps/llms/text-generation/ollama/langchain/requirements.txt
rename to comps/llms/src/text-generation/ollama/langchain/requirements.txt
diff --git a/comps/llms/text-generation/predictionguard/Dockerfile b/comps/llms/src/text-generation/predictionguard/Dockerfile
similarity index 100%
rename from comps/llms/text-generation/predictionguard/Dockerfile
rename to comps/llms/src/text-generation/predictionguard/Dockerfile
diff --git a/comps/llms/text-generation/predictionguard/README.md b/comps/llms/src/text-generation/predictionguard/README.md
similarity index 100%
rename from comps/llms/text-generation/predictionguard/README.md
rename to comps/llms/src/text-generation/predictionguard/README.md
diff --git a/comps/llms/text-generation/predictionguard/__init__.py b/comps/llms/src/text-generation/predictionguard/__init__.py
similarity index 100%
rename from comps/llms/text-generation/predictionguard/__init__.py
rename to comps/llms/src/text-generation/predictionguard/__init__.py
diff --git a/comps/llms/text-generation/predictionguard/entrypoint.sh b/comps/llms/src/text-generation/predictionguard/entrypoint.sh
similarity index 100%
rename from comps/llms/text-generation/predictionguard/entrypoint.sh
rename to comps/llms/src/text-generation/predictionguard/entrypoint.sh
diff --git a/comps/llms/text-generation/predictionguard/llm_predictionguard.py b/comps/llms/src/text-generation/predictionguard/llm_predictionguard.py
similarity index 100%
rename from comps/llms/text-generation/predictionguard/llm_predictionguard.py
rename to comps/llms/src/text-generation/predictionguard/llm_predictionguard.py
diff --git a/comps/llms/text-generation/predictionguard/requirements.txt b/comps/llms/src/text-generation/predictionguard/requirements.txt
similarity index 100%
rename from comps/llms/text-generation/predictionguard/requirements.txt
rename to comps/llms/src/text-generation/predictionguard/requirements.txt
diff --git a/comps/llms/text-generation/tgi/Dockerfile b/comps/llms/src/text-generation/tgi/Dockerfile
similarity index 100%
rename from comps/llms/text-generation/tgi/Dockerfile
rename to comps/llms/src/text-generation/tgi/Dockerfile
diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/src/text-generation/tgi/README.md
similarity index 87%
rename from comps/llms/text-generation/tgi/README.md
rename to comps/llms/src/text-generation/tgi/README.md
index 5addf65078..9cfe6509e3 100644
--- a/comps/llms/text-generation/tgi/README.md
+++ b/comps/llms/src/text-generation/tgi/README.md
@@ -12,23 +12,10 @@ To start the LLM microservice, you need to install python packages first.
 pip install -r requirements.txt
 ```
 
-### 1.2 Start LLM Service
+### 1.2 Start 3rd-party TGI Service
+Please refer to [3rd-party TGI](../../../../3rd_parties/tgi/docker/README.md) to start a LLM endpoint and verify. 
 
-```bash
-export HF_TOKEN=${your_hf_api_token}
-docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
-```
-
-### 1.3 Verify the TGI Service
-
-```bash
-curl http://${your_ip}:8008/v1/chat/completions \
-     -X POST \
-     -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
-     -H 'Content-Type: application/json'
-```
-
-### 1.4 Start LLM Service with Python Script
+### 1.3 Start LLM Service with Python Script
 
 ```bash
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
@@ -72,8 +59,8 @@ docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$htt
 ### 2.4 Run Docker with Docker Compose (Option B)
 
 ```bash
-cd text-generation/tgi
-docker compose -f docker_compose_llm.yaml up -d
+cd comps/llms/deployment/docker_compose/
+docker compose -f text-generation_tgi.yaml up -d
 ```
 
 ## 🚀3. Consume LLM Service
diff --git a/comps/llms/text-generation/tgi/__init__.py b/comps/llms/src/text-generation/tgi/__init__.py
similarity index 100%
rename from comps/llms/text-generation/tgi/__init__.py
rename to comps/llms/src/text-generation/tgi/__init__.py
diff --git a/comps/llms/text-generation/tgi/entrypoint.sh b/comps/llms/src/text-generation/tgi/entrypoint.sh
similarity index 100%
rename from comps/llms/text-generation/tgi/entrypoint.sh
rename to comps/llms/src/text-generation/tgi/entrypoint.sh
diff --git a/comps/llms/text-generation/tgi/launch_tgi_service.sh b/comps/llms/src/text-generation/tgi/launch_tgi_service.sh
similarity index 100%
rename from comps/llms/text-generation/tgi/launch_tgi_service.sh
rename to comps/llms/src/text-generation/tgi/launch_tgi_service.sh
diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/src/text-generation/tgi/llm.py
similarity index 100%
rename from comps/llms/text-generation/tgi/llm.py
rename to comps/llms/src/text-generation/tgi/llm.py
diff --git a/comps/llms/text-generation/tgi/requirements-runtime.txt b/comps/llms/src/text-generation/tgi/requirements-runtime.txt
similarity index 100%
rename from comps/llms/text-generation/tgi/requirements-runtime.txt
rename to comps/llms/src/text-generation/tgi/requirements-runtime.txt
diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/src/text-generation/tgi/requirements.txt
similarity index 100%
rename from comps/llms/text-generation/tgi/requirements.txt
rename to comps/llms/src/text-generation/tgi/requirements.txt
diff --git a/comps/llms/text-generation/tgi/template.py b/comps/llms/src/text-generation/tgi/template.py
similarity index 100%
rename from comps/llms/text-generation/tgi/template.py
rename to comps/llms/src/text-generation/tgi/template.py
diff --git a/comps/llms/text-generation/vllm/langchain/Dockerfile b/comps/llms/src/text-generation/vllm/langchain/Dockerfile
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/Dockerfile
rename to comps/llms/src/text-generation/vllm/langchain/Dockerfile
diff --git a/comps/llms/text-generation/vllm/langchain/README.md b/comps/llms/src/text-generation/vllm/langchain/README.md
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/README.md
rename to comps/llms/src/text-generation/vllm/langchain/README.md
diff --git a/comps/llms/text-generation/vllm/langchain/build_docker_microservice.sh b/comps/llms/src/text-generation/vllm/langchain/build_docker_microservice.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/build_docker_microservice.sh
rename to comps/llms/src/text-generation/vllm/langchain/build_docker_microservice.sh
diff --git a/comps/llms/text-generation/vllm/langchain/entrypoint.sh b/comps/llms/src/text-generation/vllm/langchain/entrypoint.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/entrypoint.sh
rename to comps/llms/src/text-generation/vllm/langchain/entrypoint.sh
diff --git a/comps/llms/text-generation/vllm/langchain/launch_microservice.sh b/comps/llms/src/text-generation/vllm/langchain/launch_microservice.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/launch_microservice.sh
rename to comps/llms/src/text-generation/vllm/langchain/launch_microservice.sh
diff --git a/comps/llms/text-generation/vllm/langchain/llm.py b/comps/llms/src/text-generation/vllm/langchain/llm.py
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/llm.py
rename to comps/llms/src/text-generation/vllm/langchain/llm.py
diff --git a/comps/llms/text-generation/vllm/langchain/query.sh b/comps/llms/src/text-generation/vllm/langchain/query.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/query.sh
rename to comps/llms/src/text-generation/vllm/langchain/query.sh
diff --git a/comps/llms/text-generation/vllm/langchain/requirements-runtime.txt b/comps/llms/src/text-generation/vllm/langchain/requirements-runtime.txt
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/requirements-runtime.txt
rename to comps/llms/src/text-generation/vllm/langchain/requirements-runtime.txt
diff --git a/comps/llms/text-generation/vllm/langchain/requirements.txt b/comps/llms/src/text-generation/vllm/langchain/requirements.txt
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/requirements.txt
rename to comps/llms/src/text-generation/vllm/langchain/requirements.txt
diff --git a/comps/llms/text-generation/vllm/langchain/template.py b/comps/llms/src/text-generation/vllm/langchain/template.py
similarity index 100%
rename from comps/llms/text-generation/vllm/langchain/template.py
rename to comps/llms/src/text-generation/vllm/langchain/template.py
diff --git a/comps/llms/text-generation/vllm/llama_index/Dockerfile b/comps/llms/src/text-generation/vllm/llama_index/Dockerfile
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/Dockerfile
rename to comps/llms/src/text-generation/vllm/llama_index/Dockerfile
diff --git a/comps/llms/text-generation/vllm/llama_index/README.md b/comps/llms/src/text-generation/vllm/llama_index/README.md
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/README.md
rename to comps/llms/src/text-generation/vllm/llama_index/README.md
diff --git a/comps/llms/text-generation/vllm/llama_index/build_docker_microservice.sh b/comps/llms/src/text-generation/vllm/llama_index/build_docker_microservice.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/build_docker_microservice.sh
rename to comps/llms/src/text-generation/vllm/llama_index/build_docker_microservice.sh
diff --git a/comps/llms/text-generation/vllm/llama_index/entrypoint.sh b/comps/llms/src/text-generation/vllm/llama_index/entrypoint.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/entrypoint.sh
rename to comps/llms/src/text-generation/vllm/llama_index/entrypoint.sh
diff --git a/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh b/comps/llms/src/text-generation/vllm/llama_index/launch_microservice.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/launch_microservice.sh
rename to comps/llms/src/text-generation/vllm/llama_index/launch_microservice.sh
diff --git a/comps/llms/text-generation/vllm/llama_index/llm.py b/comps/llms/src/text-generation/vllm/llama_index/llm.py
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/llm.py
rename to comps/llms/src/text-generation/vllm/llama_index/llm.py
diff --git a/comps/llms/text-generation/vllm/llama_index/query.sh b/comps/llms/src/text-generation/vllm/llama_index/query.sh
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/query.sh
rename to comps/llms/src/text-generation/vllm/llama_index/query.sh
diff --git a/comps/llms/text-generation/vllm/llama_index/requirements-runtime.txt b/comps/llms/src/text-generation/vllm/llama_index/requirements-runtime.txt
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/requirements-runtime.txt
rename to comps/llms/src/text-generation/vllm/llama_index/requirements-runtime.txt
diff --git a/comps/llms/text-generation/vllm/llama_index/requirements.txt b/comps/llms/src/text-generation/vllm/llama_index/requirements.txt
similarity index 100%
rename from comps/llms/text-generation/vllm/llama_index/requirements.txt
rename to comps/llms/src/text-generation/vllm/llama_index/requirements.txt
diff --git a/comps/llms/utils/lm-eval/Dockerfile b/comps/llms/src/utils/lm-eval/Dockerfile
similarity index 100%
rename from comps/llms/utils/lm-eval/Dockerfile
rename to comps/llms/src/utils/lm-eval/Dockerfile
diff --git a/comps/llms/utils/lm-eval/README.md b/comps/llms/src/utils/lm-eval/README.md
similarity index 100%
rename from comps/llms/utils/lm-eval/README.md
rename to comps/llms/src/utils/lm-eval/README.md
diff --git a/comps/llms/utils/lm-eval/requirements.txt b/comps/llms/src/utils/lm-eval/requirements.txt
similarity index 100%
rename from comps/llms/utils/lm-eval/requirements.txt
rename to comps/llms/src/utils/lm-eval/requirements.txt
diff --git a/comps/llms/utils/lm-eval/self_hosted_hf.py b/comps/llms/src/utils/lm-eval/self_hosted_hf.py
similarity index 100%
rename from comps/llms/utils/lm-eval/self_hosted_hf.py
rename to comps/llms/src/utils/lm-eval/self_hosted_hf.py
diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh b/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh
deleted file mode 100644
index c94dd72372..0000000000
--- a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Set default values
-default_hw_mode="cpu"
-
-# Assign arguments to variable
-hw_mode=${1:-$default_hw_mode}
-
-# Check if all required arguments are provided
-if [ "$#" -lt 0 ] || [ "$#" -gt 1 ]; then
-    echo "Usage: $0 [hw_mode]"
-    echo "Please customize the arguments you want to use.
-    - hw_mode: The hardware mode for the Ray Gaudi endpoint, with the default being 'cpu', and the optional selection can be 'cpu' and 'hpu'."
-    exit 1
-fi
-
-# Build the docker image for vLLM based on the hardware mode
-if [ "$hw_mode" = "hpu" ]; then
-    git clone https://github.com/HabanaAI/vllm-fork.git
-    cd ./vllm-fork/
-    git checkout 3c39626
-    docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
-    cd ..
-    rm -rf vllm-fork
-else
-    git clone https://github.com/vllm-project/vllm.git
-    cd ./vllm/
-    docker build -f Dockerfile.cpu -t opea/vllm-cpu:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
-fi
diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh b/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh
deleted file mode 100644
index d42878ebad..0000000000
--- a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-BASEDIR="$( cd "$( dirname "$0" )" && pwd )"
-git clone https://github.com/vllm-project/vllm.git vllm
-cd ./vllm/ && git checkout v0.6.1
-docker build -t vllm-openvino:latest -f Dockerfile.openvino . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
-cd $BASEDIR && rm -rf vllm
diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh b/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh
deleted file mode 100644
index d3363aa403..0000000000
--- a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# Set default values
-default_port=8008
-default_model=$LLM_MODEL
-default_hw_mode="cpu"
-default_parallel_number=1
-default_block_size=128
-default_max_num_seqs=256
-default_max_seq_len_to_capture=2048
-
-# Assign arguments to variables
-port_number=${1:-$default_port}
-model_name=${2:-$default_model}
-hw_mode=${3:-$default_hw_mode}
-parallel_number=${4:-$default_parallel_number}
-block_size=${5:-$default_block_size}
-max_num_seqs=${6:-$default_max_num_seqs}
-max_seq_len_to_capture=${7:-$default_max_seq_len_to_capture}
-
-# Check if all required arguments are provided
-if [ "$#" -lt 0 ] || [ "$#" -gt 4 ]; then
-    echo "Usage: $0 [port_number] [model_name] [hw_mode] [parallel_number]"
-    echo "port_number: The port number assigned to the vLLM CPU endpoint, with the default being 8080."
-    echo "model_name: The model name utilized for LLM, with the default set to 'meta-llama/Meta-Llama-3-8B-Instruct'."
-    echo "hw_mode: The hardware mode utilized for LLM, with the default set to 'cpu', and the optional selection can be 'hpu'"
-    echo "parallel_number: parallel nodes number for 'hpu' mode"
-    echo "block_size: default set to 128 for better performance on HPU"
-    echo "max_num_seqs: default set to 256 for better performance on HPU"
-    echo "max_seq_len_to_capture: default set to 2048 for better performance on HPU"
-    exit 1
-fi
-
-# Set the volume variable
-volume=$PWD/data
-
-# Build the Docker run command based on hardware mode
-if [ "$hw_mode" = "hpu" ]; then
-    docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/vllm-gaudi:latest --enforce-eager --model $model_name  --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs  $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture
-else
-    docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm-cpu:latest --model $model_name --host 0.0.0.0 --port 80
-fi
diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh b/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh
deleted file mode 100644
index 18ce714dae..0000000000
--- a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-# Set default values
-
-
-default_port=8008
-default_model="meta-llama/Llama-2-7b-hf"
-swap_space=50
-
-while getopts ":hm:p:" opt; do
-  case $opt in
-    h)
-      echo "Usage: $0 [-h] [-m model] [-p port]"
-      echo "Options:"
-      echo "  -h         Display this help message"
-      echo "  -m model   Model (default: meta-llama/Llama-2-7b-hf)"
-      echo "  -p port    Port (default: 8000)"
-      exit 0
-      ;;
-    m)
-      model=$OPTARG
-      ;;
-    p)
-      port=$OPTARG
-      ;;
-    \?)
-      echo "Invalid option: -$OPTARG" >&2
-      exit 1
-      ;;
-  esac
-done
-
-# Assign arguments to variables
-model_name=${model:-$default_model}
-port_number=${port:-$default_port}
-
-
-# Set the Huggingface cache directory variable
-HF_CACHE_DIR=$HOME/.cache/huggingface
-
-# Start the model server using Openvino as the backend inference engine.
-# Provide the container name that is unique and meaningful, typically one that includes the model name.
-
-docker run -d --rm --name="vllm-openvino-server" \
-  -p $port_number:80 \
-  --ipc=host \
-  -e HTTPS_PROXY=$https_proxy \
-  -e HTTP_PROXY=$https_proxy \
-  -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
-  -v $HOME/.cache/huggingface:/home/user/.cache/huggingface \
-  vllm-openvino:latest /bin/bash -c "\
-    cd / && \
-    export VLLM_CPU_KVCACHE_SPACE=50 && \
-    python3 -m vllm.entrypoints.openai.api_server \
-      --model \"$model_name\" \
-      --host 0.0.0.0 \
-      --port 80"

From 6c48526b67f6cd49121d794e7cc7e8846bf7f747 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 3 Dec 2024 03:46:15 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/llms/src/text-generation/tgi/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/comps/llms/src/text-generation/tgi/README.md b/comps/llms/src/text-generation/tgi/README.md
index 9cfe6509e3..c3b8901b93 100644
--- a/comps/llms/src/text-generation/tgi/README.md
+++ b/comps/llms/src/text-generation/tgi/README.md
@@ -13,7 +13,8 @@ pip install -r requirements.txt
 ```
 
 ### 1.2 Start 3rd-party TGI Service
-Please refer to [3rd-party TGI](../../../../3rd_parties/tgi/docker/README.md) to start a LLM endpoint and verify. 
+
+Please refer to [3rd-party TGI](../../../../3rd_parties/tgi/docker/README.md) to start a LLM endpoint and verify.
 
 ### 1.3 Start LLM Service with Python Script