From 04fe29e9e2503f07c18589bf3ad5f1fb2b80a1c7 Mon Sep 17 00:00:00 2001 From: Xinyao Wang Date: Tue, 3 Dec 2024 10:56:11 +0800 Subject: [PATCH 1/2] refactor example Signed-off-by: Xinyao Wang --- comps/3rd_parties/tgi/docker/README.md | 15 +++++ comps/3rd_parties/tgi/kubernetes/README.md | 0 .../vllm/docker}/Dockerfile.intel_gpu | 0 .../vllm/docker}/build_docker_vllm.sh | 0 .../docker}/build_docker_vllm_openvino.sh | 0 .../vllm/docker}/launch_vllm_service.sh | 0 .../docker}/launch_vllm_service_openvino.sh | 0 comps/3rd_parties/vllm/kubernetes/README.md | 0 .../faq-generation_tgi_langchain.yaml} | 0 .../faq-generation_vllm_langchain.yaml} | 0 .../summarization_tgi_langchain.yaml} | 0 .../summarization_vllm_langchain.yaml} | 0 .../text-generation_native_langchain.yaml} | 0 .../text-generation_native_llama_index.yaml} | 0 .../text-generation_predictionguard.yaml} | 0 .../docker_compose/text-generation_tgi.yaml} | 0 .../text-generation_vllm_langchain.yaml} | 0 .../text-generation_vllm_llama_index.yaml} | 0 comps/llms/deployment/kubernetes/README.md | 0 .../faq-generation/tgi/langchain/Dockerfile | 0 .../faq-generation/tgi/langchain/README.md | 0 .../faq-generation/tgi/langchain/__init__.py | 0 .../tgi/langchain/entrypoint.sh | 0 .../faq-generation/tgi/langchain/llm.py | 0 .../tgi/langchain/requirements-runtime.txt | 0 .../tgi/langchain/requirements.txt | 0 .../faq-generation/vllm/langchain/Dockerfile | 0 .../faq-generation/vllm/langchain/README.md | 0 .../faq-generation/vllm/langchain/__init__.py | 0 .../vllm/langchain/entrypoint.sh | 0 .../faq-generation/vllm/langchain/llm.py | 0 .../vllm/langchain/requirements-runtime.txt | 0 .../vllm/langchain/requirements.txt | 0 comps/llms/{ => src}/requirements.txt | 0 .../summarization/tgi/langchain/Dockerfile | 0 .../summarization/tgi/langchain/README.md | 0 .../summarization/tgi/langchain/__init__.py | 0 .../summarization/tgi/langchain/entrypoint.sh | 0 .../summarization/tgi/langchain/llm.py | 0 .../tgi/langchain/requirements-runtime.txt | 0 .../tgi/langchain/requirements.txt | 0 .../summarization/vllm/langchain/Dockerfile | 0 .../summarization/vllm/langchain/README.md | 0 .../summarization/vllm/langchain/__init__.py | 0 .../vllm/langchain/entrypoint.sh | 0 .../summarization/vllm/langchain/llm.py | 0 .../vllm/langchain/requirements-runtime.txt | 0 .../vllm/langchain/requirements.txt | 0 .../llms/{ => src}/text-generation/README.md | 0 .../native/langchain/Dockerfile | 0 .../native/langchain/README.md | 0 .../text-generation/native/langchain/llm.py | 0 .../native/langchain/requirements.txt | 0 .../native/langchain/template.py | 0 .../text-generation/native/langchain/utils.py | 0 .../native/llama_index/Dockerfile | 0 .../native/llama_index/README.md | 0 .../text-generation/native/llama_index/llm.py | 0 .../native/llama_index/requirements.txt | 0 .../native/llama_index/template.py | 0 .../native/llama_index/utils.py | 0 .../ollama/langchain/Dockerfile | 0 .../ollama/langchain/README.md | 0 .../ollama/langchain/__init__.py | 0 .../ollama/langchain/entrypoint.sh | 0 .../text-generation/ollama/langchain/llm.py | 0 .../ollama/langchain/requirements-runtime.txt | 0 .../ollama/langchain/requirements.txt | 0 .../predictionguard/Dockerfile | 0 .../text-generation/predictionguard/README.md | 0 .../predictionguard/__init__.py | 0 .../predictionguard/entrypoint.sh | 0 .../predictionguard/llm_predictionguard.py | 0 .../predictionguard/requirements.txt | 0 .../{ => src}/text-generation/tgi/Dockerfile | 0 .../{ => src}/text-generation/tgi/README.md | 23 ++----- .../{ => src}/text-generation/tgi/__init__.py | 0 .../text-generation/tgi/entrypoint.sh | 0 .../text-generation/tgi/launch_tgi_service.sh | 0 .../llms/{ => src}/text-generation/tgi/llm.py | 0 .../tgi/requirements-runtime.txt | 0 .../text-generation/tgi/requirements.txt | 0 .../{ => src}/text-generation/tgi/template.py | 0 .../text-generation/vllm/langchain/Dockerfile | 0 .../text-generation/vllm/langchain/README.md | 0 .../langchain/build_docker_microservice.sh | 0 .../vllm/langchain/entrypoint.sh | 0 .../vllm/langchain/launch_microservice.sh | 0 .../text-generation/vllm/langchain/llm.py | 0 .../text-generation/vllm/langchain/query.sh | 0 .../vllm/langchain/requirements-runtime.txt | 0 .../vllm/langchain/requirements.txt | 0 .../vllm/langchain/template.py | 0 .../vllm/llama_index/Dockerfile | 0 .../vllm/llama_index/README.md | 0 .../llama_index/build_docker_microservice.sh | 0 .../vllm/llama_index/entrypoint.sh | 0 .../vllm/llama_index/launch_microservice.sh | 0 .../text-generation/vllm/llama_index/llm.py | 0 .../text-generation/vllm/llama_index/query.sh | 0 .../vllm/llama_index/requirements-runtime.txt | 0 .../vllm/llama_index/requirements.txt | 0 comps/llms/{ => src}/utils/lm-eval/Dockerfile | 0 comps/llms/{ => src}/utils/lm-eval/README.md | 0 .../{ => src}/utils/lm-eval/requirements.txt | 0 .../{ => src}/utils/lm-eval/self_hosted_hf.py | 0 .../dependency/build_docker_vllm.sh | 43 ------------- .../dependency/build_docker_vllm_openvino.sh | 10 --- .../dependency/launch_vllm_service.sh | 44 ------------- .../launch_vllm_service_openvino.sh | 61 ------------------- 110 files changed, 20 insertions(+), 176 deletions(-) create mode 100644 comps/3rd_parties/tgi/docker/README.md create mode 100644 comps/3rd_parties/tgi/kubernetes/README.md rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/Dockerfile.intel_gpu (100%) rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/build_docker_vllm.sh (100%) rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/build_docker_vllm_openvino.sh (100%) rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/launch_vllm_service.sh (100%) rename comps/{llms/text-generation/vllm/langchain/dependency => 3rd_parties/vllm/docker}/launch_vllm_service_openvino.sh (100%) create mode 100644 comps/3rd_parties/vllm/kubernetes/README.md rename comps/llms/{faq-generation/tgi/langchain/docker_compose_llm.yaml => deployment/docker_compose/faq-generation_tgi_langchain.yaml} (100%) rename comps/llms/{faq-generation/vllm/langchain/docker_compose_llm.yaml => deployment/docker_compose/faq-generation_vllm_langchain.yaml} (100%) rename comps/llms/{summarization/tgi/langchain/docker_compose_llm.yaml => deployment/docker_compose/summarization_tgi_langchain.yaml} (100%) rename comps/llms/{summarization/vllm/langchain/docker_compose_llm.yaml => deployment/docker_compose/summarization_vllm_langchain.yaml} (100%) rename comps/llms/{text-generation/native/langchain/docker_compose_llm.yaml => deployment/docker_compose/text-generation_native_langchain.yaml} (100%) rename comps/llms/{text-generation/native/llama_index/docker_compose_llm.yaml => deployment/docker_compose/text-generation_native_llama_index.yaml} (100%) rename comps/llms/{text-generation/predictionguard/docker_compose_llm.yaml => deployment/docker_compose/text-generation_predictionguard.yaml} (100%) rename comps/llms/{text-generation/tgi/docker_compose_llm.yaml => deployment/docker_compose/text-generation_tgi.yaml} (100%) rename comps/llms/{text-generation/vllm/langchain/docker_compose_llm.yaml => deployment/docker_compose/text-generation_vllm_langchain.yaml} (100%) rename comps/llms/{text-generation/vllm/llama_index/docker_compose_llm.yaml => deployment/docker_compose/text-generation_vllm_llama_index.yaml} (100%) create mode 100644 comps/llms/deployment/kubernetes/README.md rename comps/llms/{ => src}/faq-generation/tgi/langchain/Dockerfile (100%) rename comps/llms/{ => src}/faq-generation/tgi/langchain/README.md (100%) rename comps/llms/{ => src}/faq-generation/tgi/langchain/__init__.py (100%) rename comps/llms/{ => src}/faq-generation/tgi/langchain/entrypoint.sh (100%) rename comps/llms/{ => src}/faq-generation/tgi/langchain/llm.py (100%) rename comps/llms/{ => src}/faq-generation/tgi/langchain/requirements-runtime.txt (100%) rename comps/llms/{ => src}/faq-generation/tgi/langchain/requirements.txt (100%) rename comps/llms/{ => src}/faq-generation/vllm/langchain/Dockerfile (100%) rename comps/llms/{ => src}/faq-generation/vllm/langchain/README.md (100%) rename comps/llms/{ => src}/faq-generation/vllm/langchain/__init__.py (100%) rename comps/llms/{ => src}/faq-generation/vllm/langchain/entrypoint.sh (100%) rename comps/llms/{ => src}/faq-generation/vllm/langchain/llm.py (100%) rename comps/llms/{ => src}/faq-generation/vllm/langchain/requirements-runtime.txt (100%) rename comps/llms/{ => src}/faq-generation/vllm/langchain/requirements.txt (100%) rename comps/llms/{ => src}/requirements.txt (100%) rename comps/llms/{ => src}/summarization/tgi/langchain/Dockerfile (100%) rename comps/llms/{ => src}/summarization/tgi/langchain/README.md (100%) rename comps/llms/{ => src}/summarization/tgi/langchain/__init__.py (100%) rename comps/llms/{ => src}/summarization/tgi/langchain/entrypoint.sh (100%) rename comps/llms/{ => src}/summarization/tgi/langchain/llm.py (100%) rename comps/llms/{ => src}/summarization/tgi/langchain/requirements-runtime.txt (100%) rename comps/llms/{ => src}/summarization/tgi/langchain/requirements.txt (100%) rename comps/llms/{ => src}/summarization/vllm/langchain/Dockerfile (100%) rename comps/llms/{ => src}/summarization/vllm/langchain/README.md (100%) rename comps/llms/{ => src}/summarization/vllm/langchain/__init__.py (100%) rename comps/llms/{ => src}/summarization/vllm/langchain/entrypoint.sh (100%) rename comps/llms/{ => src}/summarization/vllm/langchain/llm.py (100%) rename comps/llms/{ => src}/summarization/vllm/langchain/requirements-runtime.txt (100%) rename comps/llms/{ => src}/summarization/vllm/langchain/requirements.txt (100%) rename comps/llms/{ => src}/text-generation/README.md (100%) rename comps/llms/{ => src}/text-generation/native/langchain/Dockerfile (100%) rename comps/llms/{ => src}/text-generation/native/langchain/README.md (100%) rename comps/llms/{ => src}/text-generation/native/langchain/llm.py (100%) rename comps/llms/{ => src}/text-generation/native/langchain/requirements.txt (100%) rename comps/llms/{ => src}/text-generation/native/langchain/template.py (100%) rename comps/llms/{ => src}/text-generation/native/langchain/utils.py (100%) rename comps/llms/{ => src}/text-generation/native/llama_index/Dockerfile (100%) rename comps/llms/{ => src}/text-generation/native/llama_index/README.md (100%) rename comps/llms/{ => src}/text-generation/native/llama_index/llm.py (100%) rename comps/llms/{ => src}/text-generation/native/llama_index/requirements.txt (100%) rename comps/llms/{ => src}/text-generation/native/llama_index/template.py (100%) rename comps/llms/{ => src}/text-generation/native/llama_index/utils.py (100%) rename comps/llms/{ => src}/text-generation/ollama/langchain/Dockerfile (100%) rename comps/llms/{ => src}/text-generation/ollama/langchain/README.md (100%) rename comps/llms/{ => src}/text-generation/ollama/langchain/__init__.py (100%) rename comps/llms/{ => src}/text-generation/ollama/langchain/entrypoint.sh (100%) rename comps/llms/{ => src}/text-generation/ollama/langchain/llm.py (100%) rename comps/llms/{ => src}/text-generation/ollama/langchain/requirements-runtime.txt (100%) rename comps/llms/{ => src}/text-generation/ollama/langchain/requirements.txt (100%) rename comps/llms/{ => src}/text-generation/predictionguard/Dockerfile (100%) rename comps/llms/{ => src}/text-generation/predictionguard/README.md (100%) rename comps/llms/{ => src}/text-generation/predictionguard/__init__.py (100%) rename comps/llms/{ => src}/text-generation/predictionguard/entrypoint.sh (100%) rename comps/llms/{ => src}/text-generation/predictionguard/llm_predictionguard.py (100%) rename comps/llms/{ => src}/text-generation/predictionguard/requirements.txt (100%) rename comps/llms/{ => src}/text-generation/tgi/Dockerfile (100%) rename comps/llms/{ => src}/text-generation/tgi/README.md (87%) rename comps/llms/{ => src}/text-generation/tgi/__init__.py (100%) rename comps/llms/{ => src}/text-generation/tgi/entrypoint.sh (100%) rename comps/llms/{ => src}/text-generation/tgi/launch_tgi_service.sh (100%) rename comps/llms/{ => src}/text-generation/tgi/llm.py (100%) rename comps/llms/{ => src}/text-generation/tgi/requirements-runtime.txt (100%) rename comps/llms/{ => src}/text-generation/tgi/requirements.txt (100%) rename comps/llms/{ => src}/text-generation/tgi/template.py (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/Dockerfile (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/README.md (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/build_docker_microservice.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/entrypoint.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/launch_microservice.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/llm.py (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/query.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/requirements-runtime.txt (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/requirements.txt (100%) rename comps/llms/{ => src}/text-generation/vllm/langchain/template.py (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/Dockerfile (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/README.md (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/build_docker_microservice.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/entrypoint.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/launch_microservice.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/llm.py (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/query.sh (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/requirements-runtime.txt (100%) rename comps/llms/{ => src}/text-generation/vllm/llama_index/requirements.txt (100%) rename comps/llms/{ => src}/utils/lm-eval/Dockerfile (100%) rename comps/llms/{ => src}/utils/lm-eval/README.md (100%) rename comps/llms/{ => src}/utils/lm-eval/requirements.txt (100%) rename comps/llms/{ => src}/utils/lm-eval/self_hosted_hf.py (100%) delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh delete mode 100644 comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh diff --git a/comps/3rd_parties/tgi/docker/README.md b/comps/3rd_parties/tgi/docker/README.md new file mode 100644 index 0000000000..9b7b5f0bfa --- /dev/null +++ b/comps/3rd_parties/tgi/docker/README.md @@ -0,0 +1,15 @@ +## Launch TGI endpoint + +```bash +export HF_TOKEN=${your_hf_api_token} +docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} +``` + +## Verify the TGI Service + +```bash +curl http://${your_ip}:8008/v1/chat/completions \ + -X POST \ + -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/3rd_parties/tgi/kubernetes/README.md b/comps/3rd_parties/tgi/kubernetes/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu b/comps/3rd_parties/vllm/docker/Dockerfile.intel_gpu similarity index 100% rename from comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu rename to comps/3rd_parties/vllm/docker/Dockerfile.intel_gpu diff --git a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh b/comps/3rd_parties/vllm/docker/build_docker_vllm.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh rename to comps/3rd_parties/vllm/docker/build_docker_vllm.sh diff --git a/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm_openvino.sh b/comps/3rd_parties/vllm/docker/build_docker_vllm_openvino.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm_openvino.sh rename to comps/3rd_parties/vllm/docker/build_docker_vllm_openvino.sh diff --git a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh b/comps/3rd_parties/vllm/docker/launch_vllm_service.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service.sh rename to comps/3rd_parties/vllm/docker/launch_vllm_service.sh diff --git a/comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service_openvino.sh b/comps/3rd_parties/vllm/docker/launch_vllm_service_openvino.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/dependency/launch_vllm_service_openvino.sh rename to comps/3rd_parties/vllm/docker/launch_vllm_service_openvino.sh diff --git a/comps/3rd_parties/vllm/kubernetes/README.md b/comps/3rd_parties/vllm/kubernetes/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/llms/faq-generation/tgi/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/faq-generation_tgi_langchain.yaml similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/faq-generation_tgi_langchain.yaml diff --git a/comps/llms/faq-generation/vllm/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/faq-generation_vllm_langchain.yaml similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/faq-generation_vllm_langchain.yaml diff --git a/comps/llms/summarization/tgi/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/summarization_tgi_langchain.yaml similarity index 100% rename from comps/llms/summarization/tgi/langchain/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/summarization_tgi_langchain.yaml diff --git a/comps/llms/summarization/vllm/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/summarization_vllm_langchain.yaml similarity index 100% rename from comps/llms/summarization/vllm/langchain/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/summarization_vllm_langchain.yaml diff --git a/comps/llms/text-generation/native/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml similarity index 100% rename from comps/llms/text-generation/native/langchain/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml diff --git a/comps/llms/text-generation/native/llama_index/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml similarity index 100% rename from comps/llms/text-generation/native/llama_index/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml diff --git a/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml similarity index 100% rename from comps/llms/text-generation/predictionguard/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_tgi.yaml similarity index 100% rename from comps/llms/text-generation/tgi/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_tgi.yaml diff --git a/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml similarity index 100% rename from comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml diff --git a/comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml diff --git a/comps/llms/deployment/kubernetes/README.md b/comps/llms/deployment/kubernetes/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/llms/faq-generation/tgi/langchain/Dockerfile b/comps/llms/src/faq-generation/tgi/langchain/Dockerfile similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/Dockerfile rename to comps/llms/src/faq-generation/tgi/langchain/Dockerfile diff --git a/comps/llms/faq-generation/tgi/langchain/README.md b/comps/llms/src/faq-generation/tgi/langchain/README.md similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/README.md rename to comps/llms/src/faq-generation/tgi/langchain/README.md diff --git a/comps/llms/faq-generation/tgi/langchain/__init__.py b/comps/llms/src/faq-generation/tgi/langchain/__init__.py similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/__init__.py rename to comps/llms/src/faq-generation/tgi/langchain/__init__.py diff --git a/comps/llms/faq-generation/tgi/langchain/entrypoint.sh b/comps/llms/src/faq-generation/tgi/langchain/entrypoint.sh similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/entrypoint.sh rename to comps/llms/src/faq-generation/tgi/langchain/entrypoint.sh diff --git a/comps/llms/faq-generation/tgi/langchain/llm.py b/comps/llms/src/faq-generation/tgi/langchain/llm.py similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/llm.py rename to comps/llms/src/faq-generation/tgi/langchain/llm.py diff --git a/comps/llms/faq-generation/tgi/langchain/requirements-runtime.txt b/comps/llms/src/faq-generation/tgi/langchain/requirements-runtime.txt similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/requirements-runtime.txt rename to comps/llms/src/faq-generation/tgi/langchain/requirements-runtime.txt diff --git a/comps/llms/faq-generation/tgi/langchain/requirements.txt b/comps/llms/src/faq-generation/tgi/langchain/requirements.txt similarity index 100% rename from comps/llms/faq-generation/tgi/langchain/requirements.txt rename to comps/llms/src/faq-generation/tgi/langchain/requirements.txt diff --git a/comps/llms/faq-generation/vllm/langchain/Dockerfile b/comps/llms/src/faq-generation/vllm/langchain/Dockerfile similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/Dockerfile rename to comps/llms/src/faq-generation/vllm/langchain/Dockerfile diff --git a/comps/llms/faq-generation/vllm/langchain/README.md b/comps/llms/src/faq-generation/vllm/langchain/README.md similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/README.md rename to comps/llms/src/faq-generation/vllm/langchain/README.md diff --git a/comps/llms/faq-generation/vllm/langchain/__init__.py b/comps/llms/src/faq-generation/vllm/langchain/__init__.py similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/__init__.py rename to comps/llms/src/faq-generation/vllm/langchain/__init__.py diff --git a/comps/llms/faq-generation/vllm/langchain/entrypoint.sh b/comps/llms/src/faq-generation/vllm/langchain/entrypoint.sh similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/entrypoint.sh rename to comps/llms/src/faq-generation/vllm/langchain/entrypoint.sh diff --git a/comps/llms/faq-generation/vllm/langchain/llm.py b/comps/llms/src/faq-generation/vllm/langchain/llm.py similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/llm.py rename to comps/llms/src/faq-generation/vllm/langchain/llm.py diff --git a/comps/llms/faq-generation/vllm/langchain/requirements-runtime.txt b/comps/llms/src/faq-generation/vllm/langchain/requirements-runtime.txt similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/requirements-runtime.txt rename to comps/llms/src/faq-generation/vllm/langchain/requirements-runtime.txt diff --git a/comps/llms/faq-generation/vllm/langchain/requirements.txt b/comps/llms/src/faq-generation/vllm/langchain/requirements.txt similarity index 100% rename from comps/llms/faq-generation/vllm/langchain/requirements.txt rename to comps/llms/src/faq-generation/vllm/langchain/requirements.txt diff --git a/comps/llms/requirements.txt b/comps/llms/src/requirements.txt similarity index 100% rename from comps/llms/requirements.txt rename to comps/llms/src/requirements.txt diff --git a/comps/llms/summarization/tgi/langchain/Dockerfile b/comps/llms/src/summarization/tgi/langchain/Dockerfile similarity index 100% rename from comps/llms/summarization/tgi/langchain/Dockerfile rename to comps/llms/src/summarization/tgi/langchain/Dockerfile diff --git a/comps/llms/summarization/tgi/langchain/README.md b/comps/llms/src/summarization/tgi/langchain/README.md similarity index 100% rename from comps/llms/summarization/tgi/langchain/README.md rename to comps/llms/src/summarization/tgi/langchain/README.md diff --git a/comps/llms/summarization/tgi/langchain/__init__.py b/comps/llms/src/summarization/tgi/langchain/__init__.py similarity index 100% rename from comps/llms/summarization/tgi/langchain/__init__.py rename to comps/llms/src/summarization/tgi/langchain/__init__.py diff --git a/comps/llms/summarization/tgi/langchain/entrypoint.sh b/comps/llms/src/summarization/tgi/langchain/entrypoint.sh similarity index 100% rename from comps/llms/summarization/tgi/langchain/entrypoint.sh rename to comps/llms/src/summarization/tgi/langchain/entrypoint.sh diff --git a/comps/llms/summarization/tgi/langchain/llm.py b/comps/llms/src/summarization/tgi/langchain/llm.py similarity index 100% rename from comps/llms/summarization/tgi/langchain/llm.py rename to comps/llms/src/summarization/tgi/langchain/llm.py diff --git a/comps/llms/summarization/tgi/langchain/requirements-runtime.txt b/comps/llms/src/summarization/tgi/langchain/requirements-runtime.txt similarity index 100% rename from comps/llms/summarization/tgi/langchain/requirements-runtime.txt rename to comps/llms/src/summarization/tgi/langchain/requirements-runtime.txt diff --git a/comps/llms/summarization/tgi/langchain/requirements.txt b/comps/llms/src/summarization/tgi/langchain/requirements.txt similarity index 100% rename from comps/llms/summarization/tgi/langchain/requirements.txt rename to comps/llms/src/summarization/tgi/langchain/requirements.txt diff --git a/comps/llms/summarization/vllm/langchain/Dockerfile b/comps/llms/src/summarization/vllm/langchain/Dockerfile similarity index 100% rename from comps/llms/summarization/vllm/langchain/Dockerfile rename to comps/llms/src/summarization/vllm/langchain/Dockerfile diff --git a/comps/llms/summarization/vllm/langchain/README.md b/comps/llms/src/summarization/vllm/langchain/README.md similarity index 100% rename from comps/llms/summarization/vllm/langchain/README.md rename to comps/llms/src/summarization/vllm/langchain/README.md diff --git a/comps/llms/summarization/vllm/langchain/__init__.py b/comps/llms/src/summarization/vllm/langchain/__init__.py similarity index 100% rename from comps/llms/summarization/vllm/langchain/__init__.py rename to comps/llms/src/summarization/vllm/langchain/__init__.py diff --git a/comps/llms/summarization/vllm/langchain/entrypoint.sh b/comps/llms/src/summarization/vllm/langchain/entrypoint.sh similarity index 100% rename from comps/llms/summarization/vllm/langchain/entrypoint.sh rename to comps/llms/src/summarization/vllm/langchain/entrypoint.sh diff --git a/comps/llms/summarization/vllm/langchain/llm.py b/comps/llms/src/summarization/vllm/langchain/llm.py similarity index 100% rename from comps/llms/summarization/vllm/langchain/llm.py rename to comps/llms/src/summarization/vllm/langchain/llm.py diff --git a/comps/llms/summarization/vllm/langchain/requirements-runtime.txt b/comps/llms/src/summarization/vllm/langchain/requirements-runtime.txt similarity index 100% rename from comps/llms/summarization/vllm/langchain/requirements-runtime.txt rename to comps/llms/src/summarization/vllm/langchain/requirements-runtime.txt diff --git a/comps/llms/summarization/vllm/langchain/requirements.txt b/comps/llms/src/summarization/vllm/langchain/requirements.txt similarity index 100% rename from comps/llms/summarization/vllm/langchain/requirements.txt rename to comps/llms/src/summarization/vllm/langchain/requirements.txt diff --git a/comps/llms/text-generation/README.md b/comps/llms/src/text-generation/README.md similarity index 100% rename from comps/llms/text-generation/README.md rename to comps/llms/src/text-generation/README.md diff --git a/comps/llms/text-generation/native/langchain/Dockerfile b/comps/llms/src/text-generation/native/langchain/Dockerfile similarity index 100% rename from comps/llms/text-generation/native/langchain/Dockerfile rename to comps/llms/src/text-generation/native/langchain/Dockerfile diff --git a/comps/llms/text-generation/native/langchain/README.md b/comps/llms/src/text-generation/native/langchain/README.md similarity index 100% rename from comps/llms/text-generation/native/langchain/README.md rename to comps/llms/src/text-generation/native/langchain/README.md diff --git a/comps/llms/text-generation/native/langchain/llm.py b/comps/llms/src/text-generation/native/langchain/llm.py similarity index 100% rename from comps/llms/text-generation/native/langchain/llm.py rename to comps/llms/src/text-generation/native/langchain/llm.py diff --git a/comps/llms/text-generation/native/langchain/requirements.txt b/comps/llms/src/text-generation/native/langchain/requirements.txt similarity index 100% rename from comps/llms/text-generation/native/langchain/requirements.txt rename to comps/llms/src/text-generation/native/langchain/requirements.txt diff --git a/comps/llms/text-generation/native/langchain/template.py b/comps/llms/src/text-generation/native/langchain/template.py similarity index 100% rename from comps/llms/text-generation/native/langchain/template.py rename to comps/llms/src/text-generation/native/langchain/template.py diff --git a/comps/llms/text-generation/native/langchain/utils.py b/comps/llms/src/text-generation/native/langchain/utils.py similarity index 100% rename from comps/llms/text-generation/native/langchain/utils.py rename to comps/llms/src/text-generation/native/langchain/utils.py diff --git a/comps/llms/text-generation/native/llama_index/Dockerfile b/comps/llms/src/text-generation/native/llama_index/Dockerfile similarity index 100% rename from comps/llms/text-generation/native/llama_index/Dockerfile rename to comps/llms/src/text-generation/native/llama_index/Dockerfile diff --git a/comps/llms/text-generation/native/llama_index/README.md b/comps/llms/src/text-generation/native/llama_index/README.md similarity index 100% rename from comps/llms/text-generation/native/llama_index/README.md rename to comps/llms/src/text-generation/native/llama_index/README.md diff --git a/comps/llms/text-generation/native/llama_index/llm.py b/comps/llms/src/text-generation/native/llama_index/llm.py similarity index 100% rename from comps/llms/text-generation/native/llama_index/llm.py rename to comps/llms/src/text-generation/native/llama_index/llm.py diff --git a/comps/llms/text-generation/native/llama_index/requirements.txt b/comps/llms/src/text-generation/native/llama_index/requirements.txt similarity index 100% rename from comps/llms/text-generation/native/llama_index/requirements.txt rename to comps/llms/src/text-generation/native/llama_index/requirements.txt diff --git a/comps/llms/text-generation/native/llama_index/template.py b/comps/llms/src/text-generation/native/llama_index/template.py similarity index 100% rename from comps/llms/text-generation/native/llama_index/template.py rename to comps/llms/src/text-generation/native/llama_index/template.py diff --git a/comps/llms/text-generation/native/llama_index/utils.py b/comps/llms/src/text-generation/native/llama_index/utils.py similarity index 100% rename from comps/llms/text-generation/native/llama_index/utils.py rename to comps/llms/src/text-generation/native/llama_index/utils.py diff --git a/comps/llms/text-generation/ollama/langchain/Dockerfile b/comps/llms/src/text-generation/ollama/langchain/Dockerfile similarity index 100% rename from comps/llms/text-generation/ollama/langchain/Dockerfile rename to comps/llms/src/text-generation/ollama/langchain/Dockerfile diff --git a/comps/llms/text-generation/ollama/langchain/README.md b/comps/llms/src/text-generation/ollama/langchain/README.md similarity index 100% rename from comps/llms/text-generation/ollama/langchain/README.md rename to comps/llms/src/text-generation/ollama/langchain/README.md diff --git a/comps/llms/text-generation/ollama/langchain/__init__.py b/comps/llms/src/text-generation/ollama/langchain/__init__.py similarity index 100% rename from comps/llms/text-generation/ollama/langchain/__init__.py rename to comps/llms/src/text-generation/ollama/langchain/__init__.py diff --git a/comps/llms/text-generation/ollama/langchain/entrypoint.sh b/comps/llms/src/text-generation/ollama/langchain/entrypoint.sh similarity index 100% rename from comps/llms/text-generation/ollama/langchain/entrypoint.sh rename to comps/llms/src/text-generation/ollama/langchain/entrypoint.sh diff --git a/comps/llms/text-generation/ollama/langchain/llm.py b/comps/llms/src/text-generation/ollama/langchain/llm.py similarity index 100% rename from comps/llms/text-generation/ollama/langchain/llm.py rename to comps/llms/src/text-generation/ollama/langchain/llm.py diff --git a/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt b/comps/llms/src/text-generation/ollama/langchain/requirements-runtime.txt similarity index 100% rename from comps/llms/text-generation/ollama/langchain/requirements-runtime.txt rename to comps/llms/src/text-generation/ollama/langchain/requirements-runtime.txt diff --git a/comps/llms/text-generation/ollama/langchain/requirements.txt b/comps/llms/src/text-generation/ollama/langchain/requirements.txt similarity index 100% rename from comps/llms/text-generation/ollama/langchain/requirements.txt rename to comps/llms/src/text-generation/ollama/langchain/requirements.txt diff --git a/comps/llms/text-generation/predictionguard/Dockerfile b/comps/llms/src/text-generation/predictionguard/Dockerfile similarity index 100% rename from comps/llms/text-generation/predictionguard/Dockerfile rename to comps/llms/src/text-generation/predictionguard/Dockerfile diff --git a/comps/llms/text-generation/predictionguard/README.md b/comps/llms/src/text-generation/predictionguard/README.md similarity index 100% rename from comps/llms/text-generation/predictionguard/README.md rename to comps/llms/src/text-generation/predictionguard/README.md diff --git a/comps/llms/text-generation/predictionguard/__init__.py b/comps/llms/src/text-generation/predictionguard/__init__.py similarity index 100% rename from comps/llms/text-generation/predictionguard/__init__.py rename to comps/llms/src/text-generation/predictionguard/__init__.py diff --git a/comps/llms/text-generation/predictionguard/entrypoint.sh b/comps/llms/src/text-generation/predictionguard/entrypoint.sh similarity index 100% rename from comps/llms/text-generation/predictionguard/entrypoint.sh rename to comps/llms/src/text-generation/predictionguard/entrypoint.sh diff --git a/comps/llms/text-generation/predictionguard/llm_predictionguard.py b/comps/llms/src/text-generation/predictionguard/llm_predictionguard.py similarity index 100% rename from comps/llms/text-generation/predictionguard/llm_predictionguard.py rename to comps/llms/src/text-generation/predictionguard/llm_predictionguard.py diff --git a/comps/llms/text-generation/predictionguard/requirements.txt b/comps/llms/src/text-generation/predictionguard/requirements.txt similarity index 100% rename from comps/llms/text-generation/predictionguard/requirements.txt rename to comps/llms/src/text-generation/predictionguard/requirements.txt diff --git a/comps/llms/text-generation/tgi/Dockerfile b/comps/llms/src/text-generation/tgi/Dockerfile similarity index 100% rename from comps/llms/text-generation/tgi/Dockerfile rename to comps/llms/src/text-generation/tgi/Dockerfile diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/src/text-generation/tgi/README.md similarity index 87% rename from comps/llms/text-generation/tgi/README.md rename to comps/llms/src/text-generation/tgi/README.md index 5addf65078..9cfe6509e3 100644 --- a/comps/llms/text-generation/tgi/README.md +++ b/comps/llms/src/text-generation/tgi/README.md @@ -12,23 +12,10 @@ To start the LLM microservice, you need to install python packages first. pip install -r requirements.txt ``` -### 1.2 Start LLM Service +### 1.2 Start 3rd-party TGI Service +Please refer to [3rd-party TGI](../../../../3rd_parties/tgi/docker/README.md) to start a LLM endpoint and verify. -```bash -export HF_TOKEN=${your_hf_api_token} -docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} -``` - -### 1.3 Verify the TGI Service - -```bash -curl http://${your_ip}:8008/v1/chat/completions \ - -X POST \ - -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ - -H 'Content-Type: application/json' -``` - -### 1.4 Start LLM Service with Python Script +### 1.3 Start LLM Service with Python Script ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:8008" @@ -72,8 +59,8 @@ docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$htt ### 2.4 Run Docker with Docker Compose (Option B) ```bash -cd text-generation/tgi -docker compose -f docker_compose_llm.yaml up -d +cd comps/llms/deployment/docker_compose/ +docker compose -f text-generation_tgi.yaml up -d ``` ## 🚀3. Consume LLM Service diff --git a/comps/llms/text-generation/tgi/__init__.py b/comps/llms/src/text-generation/tgi/__init__.py similarity index 100% rename from comps/llms/text-generation/tgi/__init__.py rename to comps/llms/src/text-generation/tgi/__init__.py diff --git a/comps/llms/text-generation/tgi/entrypoint.sh b/comps/llms/src/text-generation/tgi/entrypoint.sh similarity index 100% rename from comps/llms/text-generation/tgi/entrypoint.sh rename to comps/llms/src/text-generation/tgi/entrypoint.sh diff --git a/comps/llms/text-generation/tgi/launch_tgi_service.sh b/comps/llms/src/text-generation/tgi/launch_tgi_service.sh similarity index 100% rename from comps/llms/text-generation/tgi/launch_tgi_service.sh rename to comps/llms/src/text-generation/tgi/launch_tgi_service.sh diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/src/text-generation/tgi/llm.py similarity index 100% rename from comps/llms/text-generation/tgi/llm.py rename to comps/llms/src/text-generation/tgi/llm.py diff --git a/comps/llms/text-generation/tgi/requirements-runtime.txt b/comps/llms/src/text-generation/tgi/requirements-runtime.txt similarity index 100% rename from comps/llms/text-generation/tgi/requirements-runtime.txt rename to comps/llms/src/text-generation/tgi/requirements-runtime.txt diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/src/text-generation/tgi/requirements.txt similarity index 100% rename from comps/llms/text-generation/tgi/requirements.txt rename to comps/llms/src/text-generation/tgi/requirements.txt diff --git a/comps/llms/text-generation/tgi/template.py b/comps/llms/src/text-generation/tgi/template.py similarity index 100% rename from comps/llms/text-generation/tgi/template.py rename to comps/llms/src/text-generation/tgi/template.py diff --git a/comps/llms/text-generation/vllm/langchain/Dockerfile b/comps/llms/src/text-generation/vllm/langchain/Dockerfile similarity index 100% rename from comps/llms/text-generation/vllm/langchain/Dockerfile rename to comps/llms/src/text-generation/vllm/langchain/Dockerfile diff --git a/comps/llms/text-generation/vllm/langchain/README.md b/comps/llms/src/text-generation/vllm/langchain/README.md similarity index 100% rename from comps/llms/text-generation/vllm/langchain/README.md rename to comps/llms/src/text-generation/vllm/langchain/README.md diff --git a/comps/llms/text-generation/vllm/langchain/build_docker_microservice.sh b/comps/llms/src/text-generation/vllm/langchain/build_docker_microservice.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/build_docker_microservice.sh rename to comps/llms/src/text-generation/vllm/langchain/build_docker_microservice.sh diff --git a/comps/llms/text-generation/vllm/langchain/entrypoint.sh b/comps/llms/src/text-generation/vllm/langchain/entrypoint.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/entrypoint.sh rename to comps/llms/src/text-generation/vllm/langchain/entrypoint.sh diff --git a/comps/llms/text-generation/vllm/langchain/launch_microservice.sh b/comps/llms/src/text-generation/vllm/langchain/launch_microservice.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/launch_microservice.sh rename to comps/llms/src/text-generation/vllm/langchain/launch_microservice.sh diff --git a/comps/llms/text-generation/vllm/langchain/llm.py b/comps/llms/src/text-generation/vllm/langchain/llm.py similarity index 100% rename from comps/llms/text-generation/vllm/langchain/llm.py rename to comps/llms/src/text-generation/vllm/langchain/llm.py diff --git a/comps/llms/text-generation/vllm/langchain/query.sh b/comps/llms/src/text-generation/vllm/langchain/query.sh similarity index 100% rename from comps/llms/text-generation/vllm/langchain/query.sh rename to comps/llms/src/text-generation/vllm/langchain/query.sh diff --git a/comps/llms/text-generation/vllm/langchain/requirements-runtime.txt b/comps/llms/src/text-generation/vllm/langchain/requirements-runtime.txt similarity index 100% rename from comps/llms/text-generation/vllm/langchain/requirements-runtime.txt rename to comps/llms/src/text-generation/vllm/langchain/requirements-runtime.txt diff --git a/comps/llms/text-generation/vllm/langchain/requirements.txt b/comps/llms/src/text-generation/vllm/langchain/requirements.txt similarity index 100% rename from comps/llms/text-generation/vllm/langchain/requirements.txt rename to comps/llms/src/text-generation/vllm/langchain/requirements.txt diff --git a/comps/llms/text-generation/vllm/langchain/template.py b/comps/llms/src/text-generation/vllm/langchain/template.py similarity index 100% rename from comps/llms/text-generation/vllm/langchain/template.py rename to comps/llms/src/text-generation/vllm/langchain/template.py diff --git a/comps/llms/text-generation/vllm/llama_index/Dockerfile b/comps/llms/src/text-generation/vllm/llama_index/Dockerfile similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/Dockerfile rename to comps/llms/src/text-generation/vllm/llama_index/Dockerfile diff --git a/comps/llms/text-generation/vllm/llama_index/README.md b/comps/llms/src/text-generation/vllm/llama_index/README.md similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/README.md rename to comps/llms/src/text-generation/vllm/llama_index/README.md diff --git a/comps/llms/text-generation/vllm/llama_index/build_docker_microservice.sh b/comps/llms/src/text-generation/vllm/llama_index/build_docker_microservice.sh similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/build_docker_microservice.sh rename to comps/llms/src/text-generation/vllm/llama_index/build_docker_microservice.sh diff --git a/comps/llms/text-generation/vllm/llama_index/entrypoint.sh b/comps/llms/src/text-generation/vllm/llama_index/entrypoint.sh similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/entrypoint.sh rename to comps/llms/src/text-generation/vllm/llama_index/entrypoint.sh diff --git a/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh b/comps/llms/src/text-generation/vllm/llama_index/launch_microservice.sh similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/launch_microservice.sh rename to comps/llms/src/text-generation/vllm/llama_index/launch_microservice.sh diff --git a/comps/llms/text-generation/vllm/llama_index/llm.py b/comps/llms/src/text-generation/vllm/llama_index/llm.py similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/llm.py rename to comps/llms/src/text-generation/vllm/llama_index/llm.py diff --git a/comps/llms/text-generation/vllm/llama_index/query.sh b/comps/llms/src/text-generation/vllm/llama_index/query.sh similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/query.sh rename to comps/llms/src/text-generation/vllm/llama_index/query.sh diff --git a/comps/llms/text-generation/vllm/llama_index/requirements-runtime.txt b/comps/llms/src/text-generation/vllm/llama_index/requirements-runtime.txt similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/requirements-runtime.txt rename to comps/llms/src/text-generation/vllm/llama_index/requirements-runtime.txt diff --git a/comps/llms/text-generation/vllm/llama_index/requirements.txt b/comps/llms/src/text-generation/vllm/llama_index/requirements.txt similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/requirements.txt rename to comps/llms/src/text-generation/vllm/llama_index/requirements.txt diff --git a/comps/llms/utils/lm-eval/Dockerfile b/comps/llms/src/utils/lm-eval/Dockerfile similarity index 100% rename from comps/llms/utils/lm-eval/Dockerfile rename to comps/llms/src/utils/lm-eval/Dockerfile diff --git a/comps/llms/utils/lm-eval/README.md b/comps/llms/src/utils/lm-eval/README.md similarity index 100% rename from comps/llms/utils/lm-eval/README.md rename to comps/llms/src/utils/lm-eval/README.md diff --git a/comps/llms/utils/lm-eval/requirements.txt b/comps/llms/src/utils/lm-eval/requirements.txt similarity index 100% rename from comps/llms/utils/lm-eval/requirements.txt rename to comps/llms/src/utils/lm-eval/requirements.txt diff --git a/comps/llms/utils/lm-eval/self_hosted_hf.py b/comps/llms/src/utils/lm-eval/self_hosted_hf.py similarity index 100% rename from comps/llms/utils/lm-eval/self_hosted_hf.py rename to comps/llms/src/utils/lm-eval/self_hosted_hf.py diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh b/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh deleted file mode 100644 index c94dd72372..0000000000 --- a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Set default values -default_hw_mode="cpu" - -# Assign arguments to variable -hw_mode=${1:-$default_hw_mode} - -# Check if all required arguments are provided -if [ "$#" -lt 0 ] || [ "$#" -gt 1 ]; then - echo "Usage: $0 [hw_mode]" - echo "Please customize the arguments you want to use. - - hw_mode: The hardware mode for the Ray Gaudi endpoint, with the default being 'cpu', and the optional selection can be 'cpu' and 'hpu'." - exit 1 -fi - -# Build the docker image for vLLM based on the hardware mode -if [ "$hw_mode" = "hpu" ]; then - git clone https://github.com/HabanaAI/vllm-fork.git - cd ./vllm-fork/ - git checkout 3c39626 - docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy - cd .. - rm -rf vllm-fork -else - git clone https://github.com/vllm-project/vllm.git - cd ./vllm/ - docker build -f Dockerfile.cpu -t opea/vllm-cpu:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -fi diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh b/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh deleted file mode 100644 index d42878ebad..0000000000 --- a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -BASEDIR="$( cd "$( dirname "$0" )" && pwd )" -git clone https://github.com/vllm-project/vllm.git vllm -cd ./vllm/ && git checkout v0.6.1 -docker build -t vllm-openvino:latest -f Dockerfile.openvino . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -cd $BASEDIR && rm -rf vllm diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh b/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh deleted file mode 100644 index d3363aa403..0000000000 --- a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Set default values -default_port=8008 -default_model=$LLM_MODEL -default_hw_mode="cpu" -default_parallel_number=1 -default_block_size=128 -default_max_num_seqs=256 -default_max_seq_len_to_capture=2048 - -# Assign arguments to variables -port_number=${1:-$default_port} -model_name=${2:-$default_model} -hw_mode=${3:-$default_hw_mode} -parallel_number=${4:-$default_parallel_number} -block_size=${5:-$default_block_size} -max_num_seqs=${6:-$default_max_num_seqs} -max_seq_len_to_capture=${7:-$default_max_seq_len_to_capture} - -# Check if all required arguments are provided -if [ "$#" -lt 0 ] || [ "$#" -gt 4 ]; then - echo "Usage: $0 [port_number] [model_name] [hw_mode] [parallel_number]" - echo "port_number: The port number assigned to the vLLM CPU endpoint, with the default being 8080." - echo "model_name: The model name utilized for LLM, with the default set to 'meta-llama/Meta-Llama-3-8B-Instruct'." - echo "hw_mode: The hardware mode utilized for LLM, with the default set to 'cpu', and the optional selection can be 'hpu'" - echo "parallel_number: parallel nodes number for 'hpu' mode" - echo "block_size: default set to 128 for better performance on HPU" - echo "max_num_seqs: default set to 256 for better performance on HPU" - echo "max_seq_len_to_capture: default set to 2048 for better performance on HPU" - exit 1 -fi - -# Set the volume variable -volume=$PWD/data - -# Build the Docker run command based on hardware mode -if [ "$hw_mode" = "hpu" ]; then - docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/vllm-gaudi:latest --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture -else - docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm-cpu:latest --model $model_name --host 0.0.0.0 --port 80 -fi diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh b/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh deleted file mode 100644 index 18ce714dae..0000000000 --- a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -# Set default values - - -default_port=8008 -default_model="meta-llama/Llama-2-7b-hf" -swap_space=50 - -while getopts ":hm:p:" opt; do - case $opt in - h) - echo "Usage: $0 [-h] [-m model] [-p port]" - echo "Options:" - echo " -h Display this help message" - echo " -m model Model (default: meta-llama/Llama-2-7b-hf)" - echo " -p port Port (default: 8000)" - exit 0 - ;; - m) - model=$OPTARG - ;; - p) - port=$OPTARG - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - exit 1 - ;; - esac -done - -# Assign arguments to variables -model_name=${model:-$default_model} -port_number=${port:-$default_port} - - -# Set the Huggingface cache directory variable -HF_CACHE_DIR=$HOME/.cache/huggingface - -# Start the model server using Openvino as the backend inference engine. -# Provide the container name that is unique and meaningful, typically one that includes the model name. - -docker run -d --rm --name="vllm-openvino-server" \ - -p $port_number:80 \ - --ipc=host \ - -e HTTPS_PROXY=$https_proxy \ - -e HTTP_PROXY=$https_proxy \ - -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - -v $HOME/.cache/huggingface:/home/user/.cache/huggingface \ - vllm-openvino:latest /bin/bash -c "\ - cd / && \ - export VLLM_CPU_KVCACHE_SPACE=50 && \ - python3 -m vllm.entrypoints.openai.api_server \ - --model \"$model_name\" \ - --host 0.0.0.0 \ - --port 80" From 6c48526b67f6cd49121d794e7cc7e8846bf7f747 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 03:46:15 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/llms/src/text-generation/tgi/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/comps/llms/src/text-generation/tgi/README.md b/comps/llms/src/text-generation/tgi/README.md index 9cfe6509e3..c3b8901b93 100644 --- a/comps/llms/src/text-generation/tgi/README.md +++ b/comps/llms/src/text-generation/tgi/README.md @@ -13,7 +13,8 @@ pip install -r requirements.txt ``` ### 1.2 Start 3rd-party TGI Service -Please refer to [3rd-party TGI](../../../../3rd_parties/tgi/docker/README.md) to start a LLM endpoint and verify. + +Please refer to [3rd-party TGI](../../../../3rd_parties/tgi/docker/README.md) to start a LLM endpoint and verify. ### 1.3 Start LLM Service with Python Script