Upgrade tgi-gaudi to version 2.0.6 (#551)

* Upgrade tgi-gaudi to version 2.0.6 * Fix faqgen test to allign with GenAIExamples Signed-off-by: Lianhao Lu <[email protected]>
opea-project · Nov 14, 2024 · 915baa0 · 915baa0
1 parent 691bbc5
commit 915baa0
Show file tree

Hide file tree

Showing 15 changed files with 22 additions and 18 deletions.
diff --git a/helm-charts/agentqna/gaudi-values.yaml b/helm-charts/agentqna/gaudi-values.yaml
@@ -8,7 +8,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 4

diff --git a/helm-charts/audioqna/gaudi-values.yaml b/helm-charts/audioqna/gaudi-values.yaml
@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml
@@ -9,7 +9,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml
@@ -49,7 +49,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1
@@ -81,7 +81,7 @@ tgi-guardrails:
   LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml
@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/helm-charts/codetrans/gaudi-values.yaml b/helm-charts/codetrans/gaudi-values.yaml
@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/helm-charts/common/agent/gaudi-values.yaml b/helm-charts/common/agent/gaudi-values.yaml
@@ -9,7 +9,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 4

diff --git a/helm-charts/common/tgi/gaudi-values.yaml b/helm-charts/common/tgi/gaudi-values.yaml
@@ -9,7 +9,7 @@ accelDevice: "gaudi"
 
 image:
   repository: ghcr.io/huggingface/tgi-gaudi
-  tag: "2.0.5"
+  tag: "2.0.6"
 
 MAX_INPUT_LENGTH: "1024"
 MAX_TOTAL_TOKENS: "2048"

diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml
@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/helm-charts/faqgen/README.md b/helm-charts/faqgen/README.md
@@ -18,8 +18,10 @@ Open another terminal and run the following command to verify the service if wor
 
 ```console
 curl http://localhost:8888/v1/faqgen \
-    -H "Content-Type: application/json" \
-    -d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+      -H "Content-Type: multipart/form-data" \
+      -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+      -F "max_tokens=32" \
+      -F "stream=false"
 ```
 
 ### Verify the workload through UI

diff --git a/helm-charts/faqgen/gaudi-values.yaml b/helm-charts/faqgen/gaudi-values.yaml
@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/helm-charts/faqgen/templates/tests/test-pod.yaml b/helm-charts/faqgen/templates/tests/test-pod.yaml
@@ -20,8 +20,10 @@ spec:
           max_retry=20;
           for ((i=1; i<=max_retry; i++)); do
             curl http://{{ include "faqgen.fullname" . }}:{{ .Values.service.port }}/v1/faqgen -sS --fail-with-body \
-            -H "Content-Type: application/json" \
-            -d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":17}' && break;
+            -H "Content-Type: multipart/form-data" \
+            -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+            -F "max_tokens=32" \
+            -F "stream=false" && break;
             curlcode=$?
             if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
           done;

diff --git a/helm-charts/visualqna/gaudi-values.yaml b/helm-charts/visualqna/gaudi-values.yaml
@@ -9,7 +9,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml
@@ -88,7 +88,7 @@ spec:
                 optional: true
           securityContext:
             {}
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data

diff --git a/microservices-connector/config/samples/ChatQnA/use_cases.md b/microservices-connector/config/samples/ChatQnA/use_cases.md
@@ -26,7 +26,7 @@ Should you desire to use the Gaudi accelerator, two alternate images are used fo
 For Gaudi:
 
 - tei-embedding-service: ghcr.io/huggingface/tei-gaudi:1.5.0
-- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6
 
 ## Deploy ChatQnA pipeline