From ef8625af9ee2389cf2764032b80b85baf2401d30 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Wed, 24 Jul 2024 10:58:56 +0800 Subject: [PATCH] Add auto truncate for embedding and rerank Signed-off-by: lvliang-intel --- ChatQnA/docker/gaudi/docker_compose.yaml | 2 +- ChatQnA/docker/gaudi/docker_compose_guardrails.yaml | 2 +- ChatQnA/docker/gpu/docker_compose.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ChatQnA/docker/gaudi/docker_compose.yaml b/ChatQnA/docker/gaudi/docker_compose.yaml index b8a420ec2..9f51b77e5 100644 --- a/ChatQnA/docker/gaudi/docker_compose.yaml +++ b/ChatQnA/docker/gaudi/docker_compose.yaml @@ -46,7 +46,7 @@ services: MAX_WARMUP_SEQUENCE_LENGTH: 512 INIT_HCCL_ON_ACQUIRE: 0 ENABLE_EXPERIMENTAL_FLAGS: true - command: --model-id ${EMBEDDING_MODEL_ID} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate embedding: image: opea/embedding-tei:latest container_name: embedding-tei-server diff --git a/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml b/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml index 37129d99c..5c6591072 100644 --- a/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml +++ b/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml @@ -80,7 +80,7 @@ services: HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - command: --model-id ${EMBEDDING_MODEL_ID} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate embedding: image: opea/embedding-tei:latest container_name: embedding-tei-server diff --git a/ChatQnA/docker/gpu/docker_compose.yaml b/ChatQnA/docker/gpu/docker_compose.yaml index ee78c3346..2aa729a2c 100644 --- a/ChatQnA/docker/gpu/docker_compose.yaml +++ b/ChatQnA/docker/gpu/docker_compose.yaml @@ -39,7 +39,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate deploy: resources: reservations: @@ -97,7 +97,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${RERANK_MODEL_ID} + command: --model-id ${RERANK_MODEL_ID} --auto-truncate deploy: resources: reservations: