Skip to content

Commit

Permalink
helm: allow configure max input_length and output_tokens
Browse files Browse the repository at this point in the history
Signed-off-by: Lianhao Lu <[email protected]>
  • Loading branch information
lianhao committed Aug 9, 2024
1 parent a8d96e7 commit a270726
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 34 deletions.
7 changes: 2 additions & 5 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
7 changes: 2 additions & 5 deletions helm-charts/codegen/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
7 changes: 2 additions & 5 deletions helm-charts/codetrans/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
7 changes: 2 additions & 5 deletions helm-charts/common/tgi/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@ image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.1"

extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"

resources:
limits:
Expand Down
6 changes: 6 additions & 0 deletions helm-charts/common/tgi/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ data:
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
{{- if .Values.MAX_INPUT_LENGTH }}
MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }}
{{- end }}
{{- if .Values.MAX_TOTAL_TOKENS }}
MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
{{- end }}
4 changes: 0 additions & 4 deletions helm-charts/common/tgi/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ spec:
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- if .Values.extraArgs }}
args:
{{- toYaml .Values.extraArgs | nindent 12}}
{{- end }}
volumeMounts:
- mountPath: /data
name: model-volume
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/common/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ affinity: {}

LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

MAX_INPUT_LENGTH: ""
MAX_TOTAL_TOKENS: ""

global:
http_proxy: ""
https_proxy: ""
Expand Down
7 changes: 2 additions & 5 deletions helm-charts/docsum/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
7 changes: 2 additions & 5 deletions microservices-connector/config/manifests/tgi_gaudi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ data:
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
---
# Source: tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
Expand Down Expand Up @@ -90,11 +92,6 @@ spec:
{}
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
args:
- --max-input-length
- "1024"
- --max-total-tokens
- "2048"
volumeMounts:
- mountPath: /data
name: model-volume
Expand Down

0 comments on commit a270726

Please sign in to comment.