Add Nvidia GPU support for ChatQnA (#225)

1. Add Helm-charts support 2. Add manifests support Signed-off-by: PeterYang12 <[email protected]>
opea-project · Jul 30, 2024 · 868103b · 868103b
1 parent 70205e5
commit 868103b
Show file tree

Hide file tree

Showing 5 changed files with 233 additions and 3 deletions.
diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
@@ -26,6 +26,8 @@ export MODELNAME="Intel/neural-chat-7b-v3-3"
 helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
 # To use Gaudi device
 #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
+# To use Nvidia GPU
+#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
 ```
 
 ### IMPORTANT NOTE

diff --git a/helm-charts/chatqna/nv-values.yaml b/helm-charts/chatqna/nv-values.yaml
@@ -0,0 +1,52 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for chatqna.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+image:
+  repository: opea/chatqna:latest
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  # tag: "1.0"
+
+port: 8888
+service:
+  type: ClusterIP
+  port: 8888
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+# To override values in subchart tgi
+tgi:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
+  image:
+    repository: ghcr.io/huggingface/text-generation-inference
+    tag: "2.0"
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+
+global:
+  http_proxy:
+  https_proxy:
+  no_proxy:
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  LANGCHAIN_TRACING_V2: false
+  LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
+  # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
+  # comment out modeluseHostPath if you want to download the model from huggingface
+  modelUseHostPath: /mnt/opea-models
diff --git a/helm-charts/common/tgi/nv-values.yaml b/helm-charts/common/tgi/nv-values.yaml
@@ -0,0 +1,60 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tgi.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+port: 2080
+
+image:
+  repository: ghcr.io/huggingface/text-generation-inference
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "2.0"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+
+resources:
+  limits:
+    nvidia.com/gpu: 1
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
+  # comment out modeluseHostPath if you want to download the model from huggingface
+  modelUseHostPath: /mnt/opea-models
diff --git a/manifests/common/tgi_nv.yaml b/manifests/common/tgi_nv.yaml
@@ -0,0 +1,114 @@
+---
+# Source: tgi/templates/configmap.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: tgi-config
+  labels:
+    helm.sh/chart: tgi-0.8.0
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: tgi
+    app.kubernetes.io/version: "2.1.0"
+    app.kubernetes.io/managed-by: Helm
+data:
+  MODEL_ID: "Intel/neural-chat-7b-v3-3"
+  PORT: "2080"
+  HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
+  HF_TOKEN: "insert-your-huggingface-token-here"
+  MAX_INPUT_TOKENS: "1024"
+  MAX_TOTAL_TOKENS: "4096"
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HABANA_LOGS: "/tmp/habana_logs"
+  NUMBA_CACHE_DIR: "/tmp"
+  TRANSFORMERS_CACHE: "/tmp/transformers_cache"
+  HF_HOME: "/tmp/.cache/huggingface"
+---
+# Source: tgi/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: tgi
+  labels:
+    helm.sh/chart: tgi-0.8.0
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: tgi
+    app.kubernetes.io/version: "2.1.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 2080
+      protocol: TCP
+      name: tgi
+  selector:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: tgi
+---
+# Source: tgi/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tgi
+  labels:
+    helm.sh/chart: tgi-0.8.0
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: tgi
+    app.kubernetes.io/version: "2.1.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: tgi
+      app.kubernetes.io/instance: tgi
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: tgi
+        app.kubernetes.io/instance: tgi
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: tgi
+          envFrom:
+            - configMapRef:
+                name: tgi-config
+            - configMapRef:
+                name: extra-env-config
+                optional: true
+          securityContext:
+            {}
+          image: "ghcr.io/huggingface/text-generation-inference:2.0"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: 2080
+              protocol: TCP
+          resources:
+            limits:
+              nvidia.com/gpu: 1
+      volumes:
+        - name: model-volume
+          hostPath:
+            path: /mnt/opea-models
+            type: Directory
+        - name: tmp
+          emptyDir: {}
diff --git a/manifests/update_manifests.sh b/manifests/update_manifests.sh
@@ -14,9 +14,11 @@ function generate_yaml {
   outputdir=$2
 
   helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/$chart.yaml
-  if [ -f ../helm-charts/common/$chart/gaudi-values.yaml ]; then
-    helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/gaudi-values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true  > ${outputdir}/${chart}_gaudi.yaml
-  fi
+
+  for f in `ls ../helm-charts/common/$chart/*-values.yaml 2>/dev/null `; do
+    ext=$(basename $f | cut -d'-' -f1)
+    helm template $chart ../helm-charts/common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,noProbe=true  > ${outputdir}/${chart}_${ext}.yaml
+  done
 }
 
 mkdir -p $OUTPUTDIR