From 3c164f3aa25bae957e436a09628235e1a11d6e8d Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Fri, 18 Oct 2024 21:49:36 +0800 Subject: [PATCH] Make rerank run on gaudi for hpu docker compose (#980) Signed-off-by: lvliang-intel --- .../docker_compose/intel/hpu/gaudi/compose.yaml | 15 ++++++++++----- .../intel/hpu/gaudi/compose_guardrails.yaml | 13 ++++++++++--- .../intel/hpu/gaudi/compose_vllm.yaml | 13 ++++++++++--- .../intel/hpu/gaudi/compose_vllm_ray.yaml | 13 ++++++++++--- .../intel/hpu/gaudi/compose_without_rerank.yaml | 4 ++-- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 9036c2ccf..20a8e9ffc 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -40,11 +40,11 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - INIT_HCCL_ON_ACQUIRE: 0 - ENABLE_EXPERIMENTAL_FLAGS: true command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} @@ -65,20 +65,25 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.0.5 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 3edb7bba7..320ac6140 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -79,6 +79,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 @@ -102,20 +104,25 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.0.5 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml index 162527b10..0d7035cb6 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml @@ -40,6 +40,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 @@ -63,20 +65,25 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml index f8b9fc7e3..296c5df52 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml @@ -40,6 +40,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 @@ -63,20 +65,25 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-ray-service: image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index cf5a7c2ca..1b82d4ef1 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -40,11 +40,11 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - INIT_HCCL_ON_ACQUIRE: 0 - ENABLE_EXPERIMENTAL_FLAGS: true command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}