From 36fb9a987d31e813398c39e2383988ef4597adbb Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Tue, 10 Sep 2024 11:07:48 +0800 Subject: [PATCH] [ChatQnA] Update benchmarking manifests (#766) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 55 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 54 -- .../oob_four_gaudi_with_rerank.yaml | 734 +++++++++++++++++ .../four_gaudi/reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 54 -- .../retrieval-microservice_run.yaml | 72 -- .../with_rerank/four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 55 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 54 -- .../oob_single_gaudi_with_rerank.yaml | 734 +++++++++++++++++ .../reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 54 -- .../retrieval-microservice_run.yaml | 72 -- .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 55 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 62 -- .../two_gaudi/embedding-microservice_run.yaml | 54 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 54 -- .../two_gaudi/oob_two_gaudi_with_rerank.yaml | 734 +++++++++++++++++ .../two_gaudi/reranking-dependency_run.yaml | 85 -- .../two_gaudi/reranking-microservice_run.yaml | 54 -- .../two_gaudi/retrieval-microservice_run.yaml | 72 -- .../with_rerank/two_gaudi/vector-db_run.yaml | 48 -- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 55 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 54 -- .../oob_four_gaudi_without_rerank.yaml | 734 +++++++++++++++++ .../four_gaudi/reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 54 -- .../retrieval-microservice_run.yaml | 72 -- .../four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 55 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 54 -- .../oob_single_gaudi_without_rerank.yaml | 583 +++++++++++++ .../retrieval-microservice_run.yaml | 72 -- .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 55 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 62 -- .../two_gaudi/embedding-microservice_run.yaml | 54 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 54 -- .../oob_two_gaudi_without_rerank.yaml | 583 +++++++++++++ .../two_gaudi/retrieval-microservice_run.yaml | 72 -- .../two_gaudi/vector-db_run.yaml | 48 -- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 62 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 59 -- .../four_gaudi/reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_four_gaudi_with_rerank.yaml | 770 ++++++++++++++++++ .../with_rerank/four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 62 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 59 -- .../reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_single_gaudi_with_rerank.yaml | 770 ++++++++++++++++++ .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 62 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 69 -- .../two_gaudi/embedding-microservice_run.yaml | 59 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 59 -- .../two_gaudi/reranking-dependency_run.yaml | 85 -- .../two_gaudi/reranking-microservice_run.yaml | 59 -- .../two_gaudi/retrieval-microservice_run.yaml | 79 -- .../tuned_two_gaudi_with_rerank.yaml | 770 ++++++++++++++++++ .../with_rerank/two_gaudi/vector-db_run.yaml | 48 -- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 62 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_four_gaudi_without_rerank.yaml | 614 ++++++++++++++ .../four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 62 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_single_gaudi_without_rerank.yaml | 614 ++++++++++++++ .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 62 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 69 -- .../two_gaudi/embedding-microservice_run.yaml | 59 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 59 -- .../two_gaudi/retrieval-microservice_run.yaml | 79 -- .../tuned_two_gaudi_without_rerank.yaml | 614 ++++++++++++++ .../two_gaudi/vector-db_run.yaml | 48 -- 134 files changed, 8254 insertions(+), 7546 deletions(-) delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 98422525f4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 130089f874..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 31 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..f8684c239e --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -0,0 +1,734 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 98422525f4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 093d2264bc..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..b05326a30d --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -0,0 +1,734 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 98422525f4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 9499f04ed4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 15 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..13d8345129 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -0,0 +1,734 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 687fdc51e5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 64b4197db1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 32 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..e010496b8f --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml @@ -0,0 +1,734 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 687fdc51e5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index bbf9d6aeb3..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 8 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..b75e8f291a --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml @@ -0,0 +1,583 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 687fdc51e5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index e78da3e38a..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 16 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..fa62ef84d7 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml @@ -0,0 +1,583 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 30cacdffe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 69dbd7af96..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 348aa7a23e..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 6903ee5060..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 31 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 7cc6ad1233..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 193350b0d7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 25314a7824..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..373b46c8a1 --- /dev/null +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -0,0 +1,770 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 4a3d3f5631..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index f27ffcad00..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index f23ba0b4fa..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 6de12aa6db..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 1d9e291122..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 25f6a00b38..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 298abd73a0..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..9d2f0ee96d --- /dev/null +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -0,0 +1,770 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index da8d9e8f13..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 485d73402c..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3822537c40..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 550e338e15..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 15 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 49a67fd2ea..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index bec1c8b2cb..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index b6799fc60a..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..4ed98c347c --- /dev/null +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -0,0 +1,770 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc + namespace: default +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 22c8c4d462..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 69dbd7af96..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 348aa7a23e..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index ebee24319e..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 32 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 7cc6ad1233..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 25314a7824..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..10c0963a7a --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml @@ -0,0 +1,614 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index cfe155580b..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index f27ffcad00..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index f23ba0b4fa..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 6fd539c954..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 8 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 1d9e291122..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 298abd73a0..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..1388453a2b --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml @@ -0,0 +1,614 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index b95d4edecc..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 485d73402c..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3822537c40..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 466008735f..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 16 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 49a67fd2ea..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index b6799fc60a..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..b38a502538 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml @@ -0,0 +1,614 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc + namespace: default +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc + namespace: default +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc + namespace: default +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc + namespace: default +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001