helm: Add guardrails llama_guard support (#391)

- Add component of guardrails llama_guard - Add guardrails support in chatqna e2e workload - Remove deprecated TRANSFORMERS_CACHE env in tgi because it results missing downloaded files on tgi-gaudi Signed-off-by: Lianhao Lu <[email protected]>
opea-project · Sep 5, 2024 · 8206a8c · 8206a8c
1 parent 5a2b306
commit 8206a8c
Show file tree

Hide file tree

Showing 21 changed files with 639 additions and 13 deletions.
diff --git a/helm-charts/chatqna/Chart.yaml b/helm-charts/chatqna/Chart.yaml
@@ -6,6 +6,15 @@ name: chatqna
 description: The Helm chart to deploy ChatQnA
 type: application
 dependencies:
+  - name: tgi
+    version: 1.0.0
+    alias: tgi-guardrails
+    repository: "file://../common/tgi"
+    condition: guardrails-usvc.enabled
+  - name: guardrails-usvc
+    version: 1.0.0
+    repository: "file://../common/guardrails-usvc"
+    condition: guardrails-usvc.enabled
   - name: tgi
     version: 1.0.0
     repository: "file://../common/tgi"

diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
@@ -28,6 +28,10 @@ helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --
 #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
 # To use Nvidia GPU
 #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
+# To include guardrail component in chatqna on Xeon
+#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml
+# To include guardrail component in chatqna on Gaudi
+#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml
 ```
 
 ### IMPORTANT NOTE

diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/chatqna-guardrails
+  tag: "latest"
+
+# guardrails related config
+guardrails-usvc:
+  enabled: true
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
+  SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+
+# gaudi related config
+tei:
+  image:
+    repository: ghcr.io/huggingface/tei-gaudi
+    tag: synapse_1.16
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  securityContext:
+    readOnlyRootFilesystem: false
+
+tgi:
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
+
+tgi-guardrails:
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
diff --git a/helm-charts/chatqna/guardrails-values.yaml b/helm-charts/chatqna/guardrails-values.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/chatqna-guardrails
+  tag: "latest"
+
+# guardrails related config
+guardrails-usvc:
+  enabled: true
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
+  SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+tgi-guardrails:
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
diff --git a/helm-charts/chatqna/templates/deployment.yaml b/helm-charts/chatqna/templates/deployment.yaml
@@ -38,6 +38,8 @@ spec:
               value: {{ .Release.Name }}-retriever-usvc
             - name: EMBEDDING_SERVICE_HOST_IP
               value: {{ .Release.Name }}-embedding-usvc
+            - name: GUARDRAIL_SERVICE_HOST_IP
+              value: {{ .Release.Name }}-guardrails-usvc
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"

diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml
@@ -39,6 +39,11 @@ affinity: {}
 tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
+# disable guardrails-usvc by default
+# See guardrails-values.yaml for guardrail related options
+guardrails-usvc:
+  enabled: false
+
 global:
   http_proxy: ""
   https_proxy: ""

diff --git a/helm-charts/common/guardrails-usvc/.helmignore b/helm-charts/common/guardrails-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/common/guardrails-usvc/Chart.yaml b/helm-charts/common/guardrails-usvc/Chart.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: guardrails-usvc
+description: The Helm chart for deploying guardrails-usvc as microservice
+type: application
+version: 1.0.0
+appVersion: "v1.0"
+dependencies:
+  - name: tgi
+    version: 1.0.0
+    repository: file://../tgi
+    condition: autodependency.enabled
diff --git a/helm-charts/common/guardrails-usvc/README.md b/helm-charts/common/guardrails-usvc/README.md
@@ -0,0 +1,57 @@
+# guardrails-usvc
+
+Helm chart for deploying LLM microservice.
+
+guardrails-usvc depends on TGI, you should set TGI_LLM_ENDPOINT as tgi endpoint.
+
+## (Option1): Installing the chart separately:
+
+First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information. Please use model `meta-llama/Meta-Llama-Guard-2-8B` during installation.
+
+After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/guardrails-usvc
+export HFTOKEN="insert-your-huggingface-token-here"
+export SAFETY_GUARD_ENDPOINT="http://tgi"
+export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
+helm dependency update
+helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --wait
+```
+
+## (Option2): Installing the chart with dependencies automatically:
+
+```console
+cd GenAIInfra/helm-charts/common/guardrails-usvc
+export HFTOKEN="insert-your-huggingface-token-here"
+helm dependency update
+helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set autodependency.enabled=true --wait
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/guardrails-usvc 9090:9090` to expose the llm-uservice service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9090/v1/guardrails \
+    -X POST \
+    -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default                              | Description                                                                                                                                                  |
+| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                                 | Your own Hugging Face API token                                                                                                                              |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"`                 | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
+| image.repository                | string | `"opea/guardrails-usvc"`             |                                                                                                                                                              |
+| service.port                    | string | `"9090"`                             |                                                                                                                                                              |
+| SAFETY_GUARD_ENDPOINT           | string | `""`                                 | LLM endpoint                                                                                                                                                 |
+| SAFETY_GUARD_MODEL_ID           | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using                                                                                                             |
diff --git a/helm-charts/common/guardrails-usvc/templates/_helpers.tpl b/helm-charts/common/guardrails-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "guardrails-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "guardrails-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "guardrails-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "guardrails-usvc.labels" -}}
+helm.sh/chart: {{ include "guardrails-usvc.chart" . }}
+{{ include "guardrails-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "guardrails-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "guardrails-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "guardrails-usvc.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "guardrails-usvc.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/common/guardrails-usvc/templates/configmap.yaml b/helm-charts/common/guardrails-usvc/templates/configmap.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}-config
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.SAFETY_GUARD_ENDPOINT }}
+  SAFETY_GUARD_ENDPOINT: {{ tpl .Values.SAFETY_GUARD_ENDPOINT . | quote}}
+  {{- else }}
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi"
+  {{- end }}
+  SAFETY_GUARD_MODEL_ID: {{ .Values.SAFETY_GUARD_MODEL_ID | quote }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.SAFETY_GUARD_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
diff --git a/helm-charts/common/guardrails-usvc/templates/deployment.yaml b/helm-charts/common/guardrails-usvc/templates/deployment.yaml
@@ -0,0 +1,79 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "guardrails-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "guardrails-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "guardrails-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: guardrails-usvc
+              containerPort: 9090
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/helm-charts/common/guardrails-usvc/templates/service.yaml b/helm-charts/common/guardrails-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9090
+      protocol: TCP
+      name: guardrails-usvc
+  selector:
+    {{- include "guardrails-usvc.selectorLabels" . | nindent 4 }}