From da5fa68f70ee4f9c27ef1988f31021b714c9661f Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 9 Sep 2024 00:04:40 -0700 Subject: [PATCH 1/3] updated manifests --- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 55 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 54 -- .../oob_four_gaudi_with_rerank.yaml | 725 +++++++++++++++++ .../four_gaudi/reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 54 -- .../retrieval-microservice_run.yaml | 72 -- .../with_rerank/four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 55 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 54 -- .../oob_single_gaudi_with_rerank.yaml | 725 +++++++++++++++++ .../reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 54 -- .../retrieval-microservice_run.yaml | 72 -- .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 55 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 62 -- .../two_gaudi/embedding-microservice_run.yaml | 54 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 54 -- .../two_gaudi/oob_two_gaudi_with_rerank.yaml | 725 +++++++++++++++++ .../two_gaudi/reranking-dependency_run.yaml | 85 -- .../two_gaudi/reranking-microservice_run.yaml | 54 -- .../two_gaudi/retrieval-microservice_run.yaml | 72 -- .../with_rerank/two_gaudi/vector-db_run.yaml | 48 -- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 55 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 54 -- .../oob_four_gaudi_without_rerank.yaml | 725 +++++++++++++++++ .../four_gaudi/reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 54 -- .../retrieval-microservice_run.yaml | 72 -- .../four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 55 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 62 -- .../embedding-microservice_run.yaml | 54 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 54 -- .../oob_single_gaudi_without_rerank.yaml | 576 +++++++++++++ .../retrieval-microservice_run.yaml | 72 -- .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 55 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 62 -- .../two_gaudi/embedding-microservice_run.yaml | 54 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 54 -- .../oob_two_gaudi_without_rerank.yaml | 576 +++++++++++++ .../two_gaudi/retrieval-microservice_run.yaml | 72 -- .../two_gaudi/vector-db_run.yaml | 48 -- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 62 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 59 -- .../four_gaudi/reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_four_gaudi_with_rerank.yaml | 761 ++++++++++++++++++ .../with_rerank/four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 62 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 59 -- .../reranking-dependency_run.yaml | 85 -- .../reranking-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_single_gaudi_with_rerank.yaml | 761 ++++++++++++++++++ .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 62 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 69 -- .../two_gaudi/embedding-microservice_run.yaml | 59 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 59 -- .../two_gaudi/reranking-dependency_run.yaml | 85 -- .../two_gaudi/reranking-microservice_run.yaml | 59 -- .../two_gaudi/retrieval-microservice_run.yaml | 79 -- .../tuned_two_gaudi_with_rerank.yaml | 761 ++++++++++++++++++ .../with_rerank/two_gaudi/vector-db_run.yaml | 48 -- .../four_gaudi/chatqna_config_map.yaml | 23 - .../four_gaudi/chatqna_mega_service_run.yaml | 62 -- .../four_gaudi/dataprep-microservice_run.yaml | 75 -- .../four_gaudi/embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../four_gaudi/llm-dependency_run.yaml | 88 -- .../four_gaudi/llm-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_four_gaudi_without_rerank.yaml | 607 ++++++++++++++ .../four_gaudi/vector-db_run.yaml | 48 -- .../single_gaudi/chatqna_config_map.yaml | 23 - .../chatqna_mega_service_run.yaml | 62 -- .../dataprep-microservice_run.yaml | 75 -- .../embedding-dependency_run.yaml | 69 -- .../embedding-microservice_run.yaml | 59 -- .../single_gaudi/llm-dependency_run.yaml | 88 -- .../single_gaudi/llm-microservice_run.yaml | 59 -- .../retrieval-microservice_run.yaml | 79 -- .../tuned_single_gaudi_without_rerank.yaml | 607 ++++++++++++++ .../single_gaudi/vector-db_run.yaml | 48 -- .../two_gaudi/chatqna_config_map.yaml | 23 - .../two_gaudi/chatqna_mega_service_run.yaml | 62 -- .../two_gaudi/dataprep-microservice_run.yaml | 75 -- .../two_gaudi/embedding-dependency_run.yaml | 69 -- .../two_gaudi/embedding-microservice_run.yaml | 59 -- .../two_gaudi/llm-dependency_run.yaml | 88 -- .../two_gaudi/llm-microservice_run.yaml | 59 -- .../two_gaudi/retrieval-microservice_run.yaml | 79 -- .../tuned_two_gaudi_without_rerank.yaml | 607 ++++++++++++++ .../two_gaudi/vector-db_run.yaml | 48 -- 134 files changed, 8156 insertions(+), 7546 deletions(-) delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 98422525f4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 130089f874..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 31 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..a6183c7933 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -0,0 +1,725 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 98422525f4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 093d2264bc..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..927ab2e0c1 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -0,0 +1,725 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 98422525f4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 9499f04ed4..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 15 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..ad5e7de8b9 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -0,0 +1,725 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 687fdc51e5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 64b4197db1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 32 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..3150795b21 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml @@ -0,0 +1,725 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 0723d46a8d..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 687fdc51e5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index bbf9d6aeb3..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 8 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..be4a6cb823 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml @@ -0,0 +1,576 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 687fdc51e5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871db..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3af5b9859c..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index e78da3e38a..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 16 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 3056dbc1d1..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..fd8b153f85 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml @@ -0,0 +1,576 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc5..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 30cacdffe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 69dbd7af96..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 348aa7a23e..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 6903ee5060..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 31 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 7cc6ad1233..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 193350b0d7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 25314a7824..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..e2c7a5dd7f --- /dev/null +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -0,0 +1,761 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 4a3d3f5631..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index f27ffcad00..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index f23ba0b4fa..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 6de12aa6db..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 1d9e291122..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index 25f6a00b38..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 298abd73a0..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..a0e8f7a832 --- /dev/null +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -0,0 +1,761 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index da8d9e8f13..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 485d73402c..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3822537c40..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 550e338e15..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 15 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 49a67fd2ea..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd14..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml deleted file mode 100644 index bec1c8b2cb..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index b6799fc60a..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..ea7c231511 --- /dev/null +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -0,0 +1,761 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 22c8c4d462..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 69dbd7af96..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 348aa7a23e..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml deleted file mode 100644 index ebee24319e..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 32 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 7cc6ad1233..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 25314a7824..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..25237a2100 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml @@ -0,0 +1,607 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index cfe155580b..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index f27ffcad00..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index f23ba0b4fa..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 6fd539c954..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 8 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 1d9e291122..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index 298abd73a0..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..a96cf8ed32 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml @@ -0,0 +1,607 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml deleted file mode 100644 index 368c800e49..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index b95d4edecc..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce5..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 485d73402c..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml deleted file mode 100644 index 3822537c40..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 466008735f..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 16 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml deleted file mode 100644 index 49a67fd2ea..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index b6799fc60a..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..aaeb4bace3 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml @@ -0,0 +1,607 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe7..0000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 From beeeacea5bb31614907342e0197e86fa81bbf0c0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 07:05:02 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml | 2 -- .../with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml | 2 -- .../oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml | 2 -- .../four_gaudi/oob_four_gaudi_without_rerank.yaml | 2 -- .../single_gaudi/oob_single_gaudi_without_rerank.yaml | 2 -- .../without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml | 2 -- .../with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml | 2 -- .../single_gaudi/tuned_single_gaudi_with_rerank.yaml | 2 -- .../with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml | 2 -- .../four_gaudi/tuned_four_gaudi_without_rerank.yaml | 2 -- .../single_gaudi/tuned_single_gaudi_without_rerank.yaml | 2 -- .../two_gaudi/tuned_two_gaudi_without_rerank.yaml | 2 -- 12 files changed, 24 deletions(-) diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index a6183c7933..5034096eb3 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -721,5 +721,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index 927ab2e0c1..2b5d9bc719 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -721,5 +721,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index ad5e7de8b9..d6343640af 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -721,5 +721,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml index 3150795b21..989e541030 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml @@ -721,5 +721,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml index be4a6cb823..38a9d099d6 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml @@ -572,5 +572,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml index fd8b153f85..eba1cce9ac 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml @@ -572,5 +572,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index e2c7a5dd7f..940bc0c9f5 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -757,5 +757,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index a0e8f7a832..406b27ce30 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -757,5 +757,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index ea7c231511..385fa4fc57 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -757,5 +757,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml index 25237a2100..34da35baae 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml @@ -603,5 +603,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml index a96cf8ed32..2998668652 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml @@ -603,5 +603,3 @@ spec: --- - - diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml index aaeb4bace3..d995aab854 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml @@ -603,5 +603,3 @@ spec: --- - - From 6602ed0f1910043ace18cd47c9e9e87805233783 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 9 Sep 2024 00:16:09 -0700 Subject: [PATCH 3/3] updated namespace --- .../four_gaudi/oob_four_gaudi_with_rerank.yaml | 11 +++++++++++ .../single_gaudi/oob_single_gaudi_with_rerank.yaml | 11 +++++++++++ .../two_gaudi/oob_two_gaudi_with_rerank.yaml | 11 +++++++++++ .../four_gaudi/oob_four_gaudi_without_rerank.yaml | 11 +++++++++++ .../single_gaudi/oob_single_gaudi_without_rerank.yaml | 9 +++++++++ .../two_gaudi/oob_two_gaudi_without_rerank.yaml | 9 +++++++++ .../four_gaudi/tuned_four_gaudi_with_rerank.yaml | 11 +++++++++++ .../single_gaudi/tuned_single_gaudi_with_rerank.yaml | 11 +++++++++++ .../two_gaudi/tuned_two_gaudi_with_rerank.yaml | 11 +++++++++++ .../four_gaudi/tuned_four_gaudi_without_rerank.yaml | 9 +++++++++ .../tuned_single_gaudi_without_rerank.yaml | 9 +++++++++ .../two_gaudi/tuned_two_gaudi_without_rerank.yaml | 9 +++++++++ 12 files changed, 122 insertions(+) diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index 5034096eb3..f8684c239e 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -72,6 +72,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -147,6 +148,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -220,6 +222,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -279,6 +282,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -372,6 +376,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -431,6 +436,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -521,6 +527,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -580,6 +587,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-svc + namespace: default spec: type: ClusterIP selector: @@ -657,6 +665,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -677,6 +686,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -707,6 +717,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index 2b5d9bc719..b05326a30d 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -72,6 +72,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -147,6 +148,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -220,6 +222,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -279,6 +282,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -372,6 +376,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -431,6 +436,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -521,6 +527,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -580,6 +587,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-svc + namespace: default spec: type: ClusterIP selector: @@ -657,6 +665,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -677,6 +686,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -707,6 +717,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index d6343640af..13d8345129 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -72,6 +72,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -147,6 +148,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -220,6 +222,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -279,6 +282,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -372,6 +376,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -431,6 +436,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -521,6 +527,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -580,6 +587,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-svc + namespace: default spec: type: ClusterIP selector: @@ -657,6 +665,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -677,6 +686,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -707,6 +717,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml index 989e541030..e010496b8f 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml @@ -72,6 +72,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -147,6 +148,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -220,6 +222,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -279,6 +282,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -372,6 +376,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -431,6 +436,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -521,6 +527,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -580,6 +587,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-svc + namespace: default spec: type: ClusterIP selector: @@ -657,6 +665,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -677,6 +686,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -707,6 +717,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml index 38a9d099d6..b75e8f291a 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml @@ -72,6 +72,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -147,6 +148,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -220,6 +222,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -279,6 +282,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -372,6 +376,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -431,6 +436,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -508,6 +514,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -528,6 +535,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -558,6 +566,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml index eba1cce9ac..fa62ef84d7 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml @@ -72,6 +72,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -147,6 +148,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -220,6 +222,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -279,6 +282,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -372,6 +376,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -431,6 +436,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -508,6 +514,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -528,6 +535,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -558,6 +566,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index 940bc0c9f5..373b46c8a1 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -79,6 +79,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -154,6 +155,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -234,6 +236,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -298,6 +301,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -391,6 +395,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -455,6 +460,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -545,6 +551,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -609,6 +616,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-svc + namespace: default spec: type: ClusterIP selector: @@ -693,6 +701,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -713,6 +722,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -743,6 +753,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index 406b27ce30..9d2f0ee96d 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -79,6 +79,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -154,6 +155,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -234,6 +236,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -298,6 +301,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -391,6 +395,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -455,6 +460,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -545,6 +551,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -609,6 +616,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-svc + namespace: default spec: type: ClusterIP selector: @@ -693,6 +701,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -713,6 +722,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -743,6 +753,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 385fa4fc57..4ed98c347c 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -79,6 +79,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -154,6 +155,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -234,6 +236,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -298,6 +301,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -391,6 +395,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -455,6 +460,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -545,6 +551,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -609,6 +616,7 @@ kind: Service apiVersion: v1 metadata: name: reranking-svc + namespace: default spec: type: ClusterIP selector: @@ -693,6 +701,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -713,6 +722,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -743,6 +753,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml index 34da35baae..10c0963a7a 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml @@ -79,6 +79,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -154,6 +155,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -234,6 +236,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -298,6 +301,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -391,6 +395,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -455,6 +460,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -539,6 +545,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -559,6 +566,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -589,6 +597,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml index 2998668652..1388453a2b 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml @@ -79,6 +79,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -154,6 +155,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -234,6 +236,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -298,6 +301,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -391,6 +395,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -455,6 +460,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -539,6 +545,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -559,6 +566,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -589,6 +597,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml index d995aab854..b38a502538 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml @@ -79,6 +79,7 @@ kind: Service apiVersion: v1 metadata: name: chatqna-backend-server-svc + namespace: default spec: type: NodePort selector: @@ -154,6 +155,7 @@ kind: Service apiVersion: v1 metadata: name: dataprep-svc + namespace: default spec: type: ClusterIP selector: @@ -234,6 +236,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -298,6 +301,7 @@ kind: Service apiVersion: v1 metadata: name: embedding-svc + namespace: default spec: type: ClusterIP selector: @@ -391,6 +395,7 @@ kind: Service apiVersion: v1 metadata: name: llm-dependency-svc + namespace: default spec: type: ClusterIP selector: @@ -455,6 +460,7 @@ kind: Service apiVersion: v1 metadata: name: llm-svc + namespace: default spec: type: ClusterIP selector: @@ -539,6 +545,7 @@ kind: Service apiVersion: v1 metadata: name: retriever-svc + namespace: default spec: type: ClusterIP selector: @@ -559,6 +566,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -589,6 +597,7 @@ apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: type: ClusterIP selector: