Skip to content

Commit

Permalink
Align HPU parameters with docker compose (#528)
Browse files Browse the repository at this point in the history
Signed-off-by: Dolpher Du <[email protected]>
  • Loading branch information
yongfengdu authored Nov 8, 2024
1 parent 809347a commit cb0bd53
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 60 deletions.
48 changes: 30 additions & 18 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,16 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
# higher limits are needed with extra input tokens added by rerank
MAX_INPUT_LENGTH: "2048"
MAX_TOTAL_TOKENS: "4096"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"

livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand All @@ -34,22 +41,8 @@ tgi:
# (i.e. query context docs have been uploaded with data-prep)
teirerank:
accelDevice: "gaudi"
image:
repository: opea/tei-gaudi
tag: "latest"
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1

# Embedding: Second largest bottleneck without rerank
tei:
accelDevice: "gaudi"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: 1.5.0
Expand All @@ -62,3 +55,22 @@ tei:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1

# Embedding: Second largest bottleneck without rerank
# By default tei on gaudi is disabled.
# tei:
# accelDevice: "gaudi"
# OMPI_MCA_btl_vader_single_copy_mechanism: "none"
# MAX_WARMUP_SEQUENCE_LENGTH: "512"
# image:
# repository: ghcr.io/huggingface/tei-gaudi
# tag: 1.5.0
# resources:
# limits:
# habana.ai/gaudi: 1
# securityContext:
# readOnlyRootFilesystem: false
# livenessProbe:
# timeoutSeconds: 1
# readinessProbe:
# timeoutSeconds: 1
50 changes: 32 additions & 18 deletions helm-charts/chatqna/guardrails-gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,29 @@ guardrails-usvc:
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"

# gaudi related config
tei:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: 1.5.0
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
# tei running on CPU by default
# tei:
# accelDevice: "gaudi"
# image:
# repository: ghcr.io/huggingface/tei-gaudi
# tag: 1.5.0
# resources:
# limits:
# habana.ai/gaudi: 1
# securityContext:
# readOnlyRootFilesystem: false
# livenessProbe:
# timeoutSeconds: 1
# readinessProbe:
# timeoutSeconds: 1

teirerank:
accelDevice: "gaudi"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: opea/tei-gaudi
tag: "latest"
repository: ghcr.io/huggingface/tei-gaudi
tag: "1.5.0"
resources:
limits:
habana.ai/gaudi: 1
Expand All @@ -50,9 +53,15 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
# higher limits are needed with extra input tokens added by rerank
MAX_INPUT_LENGTH: "2048"
MAX_TOTAL_TOKENS: "4096"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand All @@ -79,6 +88,11 @@ tgi-guardrails:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
2 changes: 2 additions & 0 deletions helm-charts/common/tei/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

accelDevice: "gaudi"

OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: 1.5.0
Expand Down
14 changes: 13 additions & 1 deletion helm-charts/common/tei/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,16 @@ data:
{{- if .Values.global.HF_ENDPOINT }}
HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
{{- end }}
MAX_WARMUP_SEQUENCE_LENGTH: "512"
{{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }}
HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }}
{{- end }}
{{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }}
HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }}
{{- end }}
# More options for HPU
{{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }}
{{- end }}
{{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
{{- end }}
3 changes: 3 additions & 0 deletions helm-charts/common/tei/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ autoscaling:
port: 2081
shmSize: 1Gi
EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5"
HF_HUB_DISABLE_PROGRESS_BARS: "1"
HF_HUB_ENABLE_HF_TRANSFER: "0"

image:
repository: ghcr.io/huggingface/text-embeddings-inference
pullPolicy: IfNotPresent
Expand Down
6 changes: 4 additions & 2 deletions helm-charts/common/teirerank/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

accelDevice: "gaudi"

OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: opea/tei-gaudi
tag: "latest"
repository: ghcr.io/huggingface/tei-gaudi
tag: 1.5.0

securityContext:
readOnlyRootFilesystem: false
Expand Down
14 changes: 13 additions & 1 deletion helm-charts/common/teirerank/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,16 @@ data:
{{- if .Values.global.HF_ENDPOINT }}
HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
{{- end }}
MAX_WARMUP_SEQUENCE_LENGTH: "512"
{{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }}
HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }}
{{- end }}
{{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }}
HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }}
{{- end }}
# More options for HPU
{{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }}
{{- end }}
{{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
{{- end }}
3 changes: 3 additions & 0 deletions helm-charts/common/teirerank/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ autoscaling:
port: 2082
shmSize: 1Gi
RERANK_MODEL_ID: "BAAI/bge-reranker-base"
HF_HUB_DISABLE_PROGRESS_BARS: "1"
HF_HUB_ENABLE_HF_TRANSFER: "0"

image:
repository: ghcr.io/huggingface/text-embeddings-inference
pullPolicy: IfNotPresent
Expand Down
5 changes: 5 additions & 0 deletions helm-charts/common/tgi/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ image:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"

resources:
limits:
Expand Down
22 changes: 22 additions & 0 deletions helm-charts/common/tgi/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,25 @@ data:
{{- if .Values.CUDA_GRAPHS }}
CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }}
{{- end }}
{{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }}
HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }}
{{- end }}
{{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }}
HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }}
{{- end }}
# More options for HPU
{{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }}
{{- end }}
{{- if .Values.ENABLE_HPU_GRAPH }}
ENABLE_HPU_GRAPH: {{ .Values.ENABLE_HPU_GRAPH | quote }}
{{- end }}
{{- if .Values.LIMIT_HPU_GRAPH }}
LIMIT_HPU_GRAPH: {{ .Values.LIMIT_HPU_GRAPH | quote }}
{{- end }}
{{- if .Values.USE_FLASH_ATTENTION }}
USE_FLASH_ATTENTION: {{ .Values.USE_FLASH_ATTENTION | quote }}
{{- end }}
{{- if .Values.FLASH_ATTENTION_RECOMPUTE }}
FLASH_ATTENTION_RECOMPUTE: {{ .Values.FLASH_ATTENTION_RECOMPUTE | quote }}
{{- end }}
42 changes: 22 additions & 20 deletions helm-charts/common/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,26 +88,26 @@ startupProbe:
periodSeconds: 5
failureThreshold: 180
timeoutSeconds: 2
#livenessProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 24
#readinessProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 5
# periodSeconds: 5
#startupProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 120
# livenessProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 24
# readinessProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 5
# periodSeconds: 5
# startupProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 120

nodeSelector: {}

Expand All @@ -120,6 +120,8 @@ LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
MAX_INPUT_LENGTH: ""
MAX_TOTAL_TOKENS: ""
CUDA_GRAPHS: "0"
HF_HUB_DISABLE_PROGRESS_BARS: "1"
HF_HUB_ENABLE_HF_TRANSFER: "0"

global:
http_proxy: ""
Expand Down

0 comments on commit cb0bd53

Please sign in to comment.