helm: allow configure max input_length and output_tokens

Signed-off-by: Lianhao Lu <[email protected]>
opea-project · Aug 9, 2024 · a270726 · a270726
1 parent a8d96e7
commit a270726
Show file tree

Hide file tree

Showing 9 changed files with 21 additions and 34 deletions.
diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml
@@ -19,8 +19,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/helm-charts/codetrans/gaudi-values.yaml b/helm-charts/codetrans/gaudi-values.yaml
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/helm-charts/common/tgi/gaudi-values.yaml b/helm-charts/common/tgi/gaudi-values.yaml
@@ -9,11 +9,8 @@ image:
   repository: ghcr.io/huggingface/tgi-gaudi
   tag: "2.0.1"
 
-extraArgs:
-  - "--max-input-length"
-  - "1024"
-  - "--max-total-tokens"
-  - "2048"
+MAX_INPUT_LENGTH: "1024"
+MAX_TOTAL_TOKENS: "2048"
 
 resources:
   limits:

diff --git a/helm-charts/common/tgi/templates/configmap.yaml b/helm-charts/common/tgi/templates/configmap.yaml
@@ -18,3 +18,9 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.MAX_INPUT_LENGTH }}
+  MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }}
+  {{- end }}
+  {{- if .Values.MAX_TOTAL_TOKENS }}
+  MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
+  {{- end }}
diff --git a/helm-charts/common/tgi/templates/deployment.yaml b/helm-charts/common/tgi/templates/deployment.yaml
@@ -45,10 +45,6 @@ spec:
             {{- end }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
-          {{- if .Values.extraArgs }}
-          args:
-            {{- toYaml .Values.extraArgs | nindent 12}}
-          {{- end }}
           volumeMounts:
             - mountPath: /data
               name: model-volume

diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml
@@ -98,6 +98,9 @@ affinity: {}
 
 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
+MAX_INPUT_LENGTH: ""
+MAX_TOTAL_TOKENS: ""
+
 global:
   http_proxy: ""
   https_proxy: ""

diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml
@@ -24,6 +24,8 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
 ---
 # Source: tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
@@ -90,11 +92,6 @@ spec:
             {}
           image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
           imagePullPolicy: IfNotPresent
-          args:
-            - --max-input-length
-            - "1024"
-            - --max-total-tokens
-            - "2048"
           volumeMounts:
             - mountPath: /data
               name: model-volume