From 3563f5db6be91100a10c2f070fed28820a34ca0c Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:24:54 +0800 Subject: [PATCH] [ChatQnA]Update manifests (#716) * update manifests for v0.9 --- ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml | 2 +- ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml | 4 ++-- ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml | 4 ++-- ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml index 24b8e72df..368c800e4 100644 --- a/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml @@ -15,7 +15,7 @@ data: TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} EMBEDDING_SERVICE_HOST_IP: embedding-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc RERANK_SERVICE_HOST_IP: reranking-svc diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml index 130089f87..127160e33 100644 --- a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1024' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml index 093d2264b..ae30c6a21 100644 --- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1024' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml index 9499f04ed..4d9988441 100644 --- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml @@ -35,9 +35,9 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1024' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens