Skip to content

Commit

Permalink
Add Nvidia GPU support for ChatQnA
Browse files Browse the repository at this point in the history
1. Add Helm-charts support
2. Add manifests support

Signed-off-by: PeterYang12 <[email protected]>
  • Loading branch information
PeterYang12 authored and daisy-ycguo committed Jul 30, 2024
1 parent 70205e5 commit 71daaf1
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 3 deletions.
2 changes: 2 additions & 0 deletions helm-charts/chatqna/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ export MODELNAME="Intel/neural-chat-7b-v3-3"
helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
# To use Gaudi device
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
# To use Nvidia GPU
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
```

### IMPORTANT NOTE
Expand Down
52 changes: 52 additions & 0 deletions helm-charts/chatqna/nv-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Default values for chatqna.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

image:
repository: opea/chatqna:latest
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
# tag: "1.0"

port: 8888
service:
type: ClusterIP
port: 8888

securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault

# To override values in subchart tgi
tgi:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: "2.0"
resources:
limits:
nvidia.com/gpu: 1

global:
http_proxy:
https_proxy:
no_proxy:
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
LANGCHAIN_TRACING_V2: false
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
# comment out modeluseHostPath if you want to download the model from huggingface
modelUseHostPath: /mnt/opea-models
60 changes: 60 additions & 0 deletions helm-charts/common/tgi/nv-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Default values for tgi.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

port: 2080

image:
repository: ghcr.io/huggingface/text-generation-inference
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "2.0"

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

podAnnotations: {}

podSecurityContext: {}
# fsGroup: 2000

securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault

service:
type: ClusterIP

resources:
limits:
nvidia.com/gpu: 1

nodeSelector: {}

tolerations: []

affinity: {}

LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

global:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
# comment out modeluseHostPath if you want to download the model from huggingface
modelUseHostPath: /mnt/opea-models
114 changes: 114 additions & 0 deletions manifests/common/tgi_nv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
---
# Source: tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: ConfigMap
metadata:
name: tgi-config
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: tgi
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "Intel/neural-chat-7b-v3-3"
PORT: "2080"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
---
# Source: tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: Service
metadata:
name: tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: tgi
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: tgi
---
# Source: tgi/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: tgi
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: tgi
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: tgi
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: tgi-config
- configMapRef:
name: extra-env-config
optional: true
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:2.0"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
resources:
limits:
nvidia.com/gpu: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
- name: tmp
emptyDir: {}
8 changes: 5 additions & 3 deletions manifests/update_manifests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ function generate_yaml {
outputdir=$2

helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/$chart.yaml
if [ -f ../helm-charts/common/$chart/gaudi-values.yaml ]; then
helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/gaudi-values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/${chart}_gaudi.yaml
fi

for f in `ls ../helm-charts/common/$chart/*-values.yaml 2>/dev/null `; do
ext=$(basename $f | cut -d'-' -f1)
helm template $chart ../helm-charts/common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/${chart}_${ext}.yaml
done
}

mkdir -p $OUTPUTDIR
Expand Down

0 comments on commit 71daaf1

Please sign in to comment.