From f4b4ac0d3a762805fe2e1f1a09c8311cadc2114d Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Thu, 25 Jul 2024 08:54:34 +0800 Subject: [PATCH] Update TEI version v1.5 for better performance (#447) Signed-off-by: lvliang-intel --- ChatQnA/docker/aipc/docker_compose.yaml | 4 ++-- ChatQnA/docker/gaudi/docker_compose.yaml | 2 +- ChatQnA/docker/gaudi/docker_compose_guardrails.yaml | 2 +- ChatQnA/docker/gaudi/how_to_validate_service.md | 2 +- ChatQnA/docker/gpu/docker_compose.yaml | 4 ++-- ChatQnA/docker/xeon/docker_compose.yaml | 4 ++-- ChatQnA/kubernetes/README.md | 4 ++-- ChatQnA/tests/test_chatqna_guardrails_on_gaudi.sh | 2 +- ChatQnA/tests/test_chatqna_on_gaudi.sh | 2 +- SearchQnA/docker/gaudi/compose.yaml | 2 +- SearchQnA/docker/xeon/compose.yaml | 4 ++-- SearchQnA/tests/test_searchqna_on_gaudi.sh | 2 +- SearchQnA/tests/test_searchqna_on_xeon.sh | 2 +- 13 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ChatQnA/docker/aipc/docker_compose.yaml b/ChatQnA/docker/aipc/docker_compose.yaml index a0040b5ab..b7df95b24 100644 --- a/ChatQnA/docker/aipc/docker_compose.yaml +++ b/ChatQnA/docker/aipc/docker_compose.yaml @@ -24,7 +24,7 @@ services: REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server ports: - "6006:80" @@ -73,7 +73,7 @@ services: LANGCHAIN_PROJECT: "opea-retriever-service" restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-reranking-server ports: - "8808:80" diff --git a/ChatQnA/docker/gaudi/docker_compose.yaml b/ChatQnA/docker/gaudi/docker_compose.yaml index b8a420ec2..976aee97e 100644 --- a/ChatQnA/docker/gaudi/docker_compose.yaml +++ b/ChatQnA/docker/gaudi/docker_compose.yaml @@ -83,7 +83,7 @@ services: LANGCHAIN_PROJECT: "opea-retriever-service" restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-reranking-gaudi-server ports: - "8808:80" diff --git a/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml b/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml index 37129d99c..7967f7958 100644 --- a/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml +++ b/ChatQnA/docker/gaudi/docker_compose_guardrails.yaml @@ -117,7 +117,7 @@ services: LANGCHAIN_PROJECT: "opea-retriever-service" restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-reranking-gaudi-server ports: - "8808:80" diff --git a/ChatQnA/docker/gaudi/how_to_validate_service.md b/ChatQnA/docker/gaudi/how_to_validate_service.md index 407bd5369..04f020402 100644 --- a/ChatQnA/docker/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker/gaudi/how_to_validate_service.md @@ -59,7 +59,7 @@ f810f3b4d329 opea/embedding-tei:latest "python e 05c40b636239 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server 174bd43fa6b5 opea/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server 74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db -88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server +88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server ``` In this case, `ghcr.io/huggingface/tgi-gaudi:1.2.1` Existed. diff --git a/ChatQnA/docker/gpu/docker_compose.yaml b/ChatQnA/docker/gpu/docker_compose.yaml index ee78c3346..160d66d3e 100644 --- a/ChatQnA/docker/gpu/docker_compose.yaml +++ b/ChatQnA/docker/gpu/docker_compose.yaml @@ -27,7 +27,7 @@ services: REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server ports: - "8090:80" @@ -83,7 +83,7 @@ services: LANGCHAIN_PROJECT: "opea-retriever-service" restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-reranking-server ports: - "8808:80" diff --git a/ChatQnA/docker/xeon/docker_compose.yaml b/ChatQnA/docker/xeon/docker_compose.yaml index 71818f85f..fe796ce1a 100644 --- a/ChatQnA/docker/xeon/docker_compose.yaml +++ b/ChatQnA/docker/xeon/docker_compose.yaml @@ -27,7 +27,7 @@ services: REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server ports: - "6006:80" @@ -76,7 +76,7 @@ services: LANGCHAIN_PROJECT: "opea-retriever-service" restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-reranking-server ports: - "8808:80" diff --git a/ChatQnA/kubernetes/README.md b/ChatQnA/kubernetes/README.md index 041b8afcd..e8e88d3db 100644 --- a/ChatQnA/kubernetes/README.md +++ b/ChatQnA/kubernetes/README.md @@ -14,10 +14,10 @@ The ChatQnA application is defined as a Custom Resource (CR) file that the above The ChatQnA uses the below prebuilt images if you choose a Xeon deployment - redis-vector-db: redis/redis-stack:7.2.0-v9 -- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 +- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - embedding: opea/embedding-tei:latest - retriever: opea/retriever-redis:latest -- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 +- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - reranking: opea/reranking-tei:latest - tgi_service: ghcr.io/huggingface/text-generation-inference:1.4 - llm: opea/llm-tgi:latest diff --git a/ChatQnA/tests/test_chatqna_guardrails_on_gaudi.sh b/ChatQnA/tests/test_chatqna_guardrails_on_gaudi.sh index 7db270daa..5c0238e0b 100644 --- a/ChatQnA/tests/test_chatqna_guardrails_on_gaudi.sh +++ b/ChatQnA/tests/test_chatqna_guardrails_on_gaudi.sh @@ -27,7 +27,7 @@ function build_docker_images() { # docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest . docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 - docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 cd $WORKPATH/docker docker build --no-cache -t opea/chatqna-guardrails:latest -f Dockerfile_guardrails . diff --git a/ChatQnA/tests/test_chatqna_on_gaudi.sh b/ChatQnA/tests/test_chatqna_on_gaudi.sh index a8d4741e6..dd9b4336f 100644 --- a/ChatQnA/tests/test_chatqna_on_gaudi.sh +++ b/ChatQnA/tests/test_chatqna_on_gaudi.sh @@ -26,7 +26,7 @@ function build_docker_images() { # docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest . docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 - docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 cd $WORKPATH/docker docker build --no-cache -t opea/chatqna:latest -f Dockerfile . diff --git a/SearchQnA/docker/gaudi/compose.yaml b/SearchQnA/docker/gaudi/compose.yaml index b7198e363..4cf6462b3 100644 --- a/SearchQnA/docker/gaudi/compose.yaml +++ b/SearchQnA/docker/gaudi/compose.yaml @@ -58,7 +58,7 @@ services: GOOGLE_CSE_ID: ${GOOGLE_CSE_ID} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-reranking-server ports: - "3004:80" diff --git a/SearchQnA/docker/xeon/compose.yaml b/SearchQnA/docker/xeon/compose.yaml index 9a81a989f..ee9402ffb 100644 --- a/SearchQnA/docker/xeon/compose.yaml +++ b/SearchQnA/docker/xeon/compose.yaml @@ -6,7 +6,7 @@ version: "3.8" services: tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server ports: - "3001:80" @@ -51,7 +51,7 @@ services: GOOGLE_CSE_ID: ${GOOGLE_CSE_ID} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-reranking-server ports: - "3004:80" diff --git a/SearchQnA/tests/test_searchqna_on_gaudi.sh b/SearchQnA/tests/test_searchqna_on_gaudi.sh index 379a846e8..92710551e 100644 --- a/SearchQnA/tests/test_searchqna_on_gaudi.sh +++ b/SearchQnA/tests/test_searchqna_on_gaudi.sh @@ -25,7 +25,7 @@ function build_docker_images() { # cd tei-gaudi/ # docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest . - docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 cd $WORKPATH/docker docker build --no-cache -t opea/searchqna:latest -f Dockerfile . diff --git a/SearchQnA/tests/test_searchqna_on_xeon.sh b/SearchQnA/tests/test_searchqna_on_xeon.sh index 73a6361f1..a02757b5f 100644 --- a/SearchQnA/tests/test_searchqna_on_xeon.sh +++ b/SearchQnA/tests/test_searchqna_on_xeon.sh @@ -17,7 +17,7 @@ function build_docker_images() { docker build -t opea/web-retriever-chroma:latest -f comps/web_retrievers/langchain/chroma/docker/Dockerfile . docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile . docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile . - docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker pull ghcr.io/huggingface/text-generation-inference:1.4 cd $WORKPATH/docker docker build -t opea/searchqna:latest -f Dockerfile .