diff --git a/helm-charts/common/tgi/templates/deployment.yaml b/helm-charts/common/tgi/templates/deployment.yaml index 9587bcae6..a40fd749d 100644 --- a/helm-charts/common/tgi/templates/deployment.yaml +++ b/helm-charts/common/tgi/templates/deployment.yaml @@ -48,9 +48,17 @@ spec: {{- end }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.extraCmdArgs }} + args: + {{- range .Values.extraCmdArgs }} + - {{ . | quote }} + {{- end }} + {{- end }} volumeMounts: - mountPath: /data name: model-volume + - mountPath: /dev/shm + name: shm - mountPath: /tmp name: tmp ports: @@ -83,6 +91,10 @@ spec: {{- else }} emptyDir: {} {{- end }} + - name: shm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.shmSize }} - name: tmp emptyDir: {} {{- with .Values.nodeSelector }} diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml index 9aa6bae56..d487851e0 100644 --- a/helm-charts/common/tgi/values.yaml +++ b/helm-charts/common/tgi/values.yaml @@ -11,6 +11,11 @@ horizontalPodAutoscaler: maxReplicas: 6 port: 2080 +shmSize: 1Gi + +# Set extraCmdArgs if you need to pass additional parameters to TGI for performance +# Refer to https://huggingface.co/docs/text-generation-inference/en/reference/launcher for more options. +# extraCmdArgs: ["--dtype","bfloat16"] image: repository: ghcr.io/huggingface/text-generation-inference diff --git a/microservices-connector/config/manifests/tgi.yaml b/microservices-connector/config/manifests/tgi.yaml index 735de5d31..f1d10d735 100644 --- a/microservices-connector/config/manifests/tgi.yaml +++ b/microservices-connector/config/manifests/tgi.yaml @@ -94,6 +94,8 @@ spec: volumeMounts: - mountPath: /data name: model-volume + - mountPath: /dev/shm + name: shm - mountPath: /tmp name: tmp ports: @@ -124,6 +126,10 @@ spec: hostPath: path: /mnt/opea-models type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi - name: tmp emptyDir: {} --- diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml index 410327acf..92ac7f871 100644 --- a/microservices-connector/config/manifests/tgi_gaudi.yaml +++ b/microservices-connector/config/manifests/tgi_gaudi.yaml @@ -95,6 +95,8 @@ spec: volumeMounts: - mountPath: /data name: model-volume + - mountPath: /dev/shm + name: shm - mountPath: /tmp name: tmp ports: @@ -126,6 +128,10 @@ spec: hostPath: path: /mnt/opea-models type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi - name: tmp emptyDir: {} --- diff --git a/microservices-connector/config/manifests/tgi_nv.yaml b/microservices-connector/config/manifests/tgi_nv.yaml index 09c634ff7..e4d03cd69 100644 --- a/microservices-connector/config/manifests/tgi_nv.yaml +++ b/microservices-connector/config/manifests/tgi_nv.yaml @@ -93,6 +93,8 @@ spec: volumeMounts: - mountPath: /data name: model-volume + - mountPath: /dev/shm + name: shm - mountPath: /tmp name: tmp ports: @@ -124,6 +126,10 @@ spec: hostPath: path: /mnt/opea-models type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi - name: tmp emptyDir: {} ---