From 8b60948c7b9ab96c4d12dd361b329ff72b2e0e0b Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Thu, 25 Jul 2024 09:21:06 +0800 Subject: [PATCH] Add auto truncate for embedding and rerank (#449) Signed-off-by: lvliang-intel --- ChatQnA/docker/gaudi/docker_compose.yaml | 2 +- ChatQnA/docker/gaudi/docker_compose_guardrails.yaml | 2 +- ChatQnA/docker/gpu/docker_compose.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ChatQnA/docker/gaudi/docker_compose.yaml b/ChatQnA/docker/gaudi/docker_compose.yaml index 976aee97e..6465a5a4a 100644 --- a/ChatQnA/docker/gaudi/docker_compose.yaml +++ b/ChatQnA/docker/gaudi/docker_compose.yaml @@ -46,7 +46,7 @@ services: MAX_WARMUP_SEQUENCE_LENGTH: 512 INIT_HCCL_ON_ACQUIRE: 0 ENABLE_EXPERIMENTAL_FLAGS: true - command: --model-id ${EMBEDDING_MODEL_ID} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate embedding: image: opea/embedding-tei:latest container_name: embedding-tei-server diff --git a/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml b/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml index 7967f7958..1f8ad93b5 100644 --- a/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml +++ b/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml @@ -80,7 +80,7 @@ services: HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - command: --model-id ${EMBEDDING_MODEL_ID} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate embedding: image: opea/embedding-tei:latest container_name: embedding-tei-server diff --git a/ChatQnA/docker/gpu/docker_compose.yaml b/ChatQnA/docker/gpu/docker_compose.yaml index 160d66d3e..74d63e8e3 100644 --- a/ChatQnA/docker/gpu/docker_compose.yaml +++ b/ChatQnA/docker/gpu/docker_compose.yaml @@ -39,7 +39,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate deploy: resources: reservations: @@ -97,7 +97,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${RERANK_MODEL_ID} + command: --model-id ${RERANK_MODEL_ID} --auto-truncate deploy: resources: reservations: