From 71daaf112ed1e3b04bda7ed052e0080641ea4284 Mon Sep 17 00:00:00 2001 From: PeterYang12 Date: Wed, 24 Jul 2024 17:26:15 -0700 Subject: [PATCH] Add Nvidia GPU support for ChatQnA 1. Add Helm-charts support 2. Add manifests support Signed-off-by: PeterYang12 --- helm-charts/chatqna/README.md | 2 + helm-charts/chatqna/nv-values.yaml | 52 ++++++++++++ helm-charts/common/tgi/nv-values.yaml | 60 ++++++++++++++ manifests/common/tgi_nv.yaml | 114 ++++++++++++++++++++++++++ manifests/update_manifests.sh | 8 +- 5 files changed, 233 insertions(+), 3 deletions(-) create mode 100644 helm-charts/chatqna/nv-values.yaml create mode 100644 helm-charts/common/tgi/nv-values.yaml create mode 100644 manifests/common/tgi_nv.yaml diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md index 5ebc97ca..27768faa 100644 --- a/helm-charts/chatqna/README.md +++ b/helm-charts/chatqna/README.md @@ -26,6 +26,8 @@ export MODELNAME="Intel/neural-chat-7b-v3-3" helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} # To use Gaudi device #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml +# To use Nvidia GPU +#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml ``` ### IMPORTANT NOTE diff --git a/helm-charts/chatqna/nv-values.yaml b/helm-charts/chatqna/nv-values.yaml new file mode 100644 index 00000000..fd58e428 --- /dev/null +++ b/helm-charts/chatqna/nv-values.yaml @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for chatqna. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: opea/chatqna:latest + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + # tag: "1.0" + +port: 8888 +service: + type: ClusterIP + port: 8888 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +# To override values in subchart tgi +tgi: + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B + image: + repository: ghcr.io/huggingface/text-generation-inference + tag: "2.0" + resources: + limits: + nvidia.com/gpu: 1 + +global: + http_proxy: + https_proxy: + no_proxy: + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + LANGCHAIN_TRACING_V2: false + LANGCHAIN_API_KEY: "insert-your-langchain-key-here" + # set modelUseHostPath to host directory if you want to use hostPath volume for model storage + # comment out modeluseHostPath if you want to download the model from huggingface + modelUseHostPath: /mnt/opea-models diff --git a/helm-charts/common/tgi/nv-values.yaml b/helm-charts/common/tgi/nv-values.yaml new file mode 100644 index 00000000..0b69debe --- /dev/null +++ b/helm-charts/common/tgi/nv-values.yaml @@ -0,0 +1,60 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tgi. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +port: 2080 + +image: + repository: ghcr.io/huggingface/text-generation-inference + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "2.0" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + +resources: + limits: + nvidia.com/gpu: 1 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + # set modelUseHostPath to host directory if you want to use hostPath volume for model storage + # comment out modeluseHostPath if you want to download the model from huggingface + modelUseHostPath: /mnt/opea-models diff --git a/manifests/common/tgi_nv.yaml b/manifests/common/tgi_nv.yaml new file mode 100644 index 00000000..857b59b3 --- /dev/null +++ b/manifests/common/tgi_nv.yaml @@ -0,0 +1,114 @@ +--- +# Source: tgi/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: tgi-config + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: tgi + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "Intel/neural-chat-7b-v3-3" + PORT: "2080" + HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" + MAX_INPUT_TOKENS: "1024" + MAX_TOTAL_TOKENS: "4096" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: tgi + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: tgi +--- +# Source: tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: tgi + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: tgi + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: tgi + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: tgi-config + - configMapRef: + name: extra-env-config + optional: true + securityContext: + {} + image: "ghcr.io/huggingface/text-generation-inference:2.0" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + resources: + limits: + nvidia.com/gpu: 1 + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: tmp + emptyDir: {} diff --git a/manifests/update_manifests.sh b/manifests/update_manifests.sh index 512cd8fe..ab3b3e54 100755 --- a/manifests/update_manifests.sh +++ b/manifests/update_manifests.sh @@ -14,9 +14,11 @@ function generate_yaml { outputdir=$2 helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/$chart.yaml - if [ -f ../helm-charts/common/$chart/gaudi-values.yaml ]; then - helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/gaudi-values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/${chart}_gaudi.yaml - fi + + for f in `ls ../helm-charts/common/$chart/*-values.yaml 2>/dev/null `; do + ext=$(basename $f | cut -d'-' -f1) + helm template $chart ../helm-charts/common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/${chart}_${ext}.yaml + done } mkdir -p $OUTPUTDIR