diff --git a/helm-charts/common/lvm-uservice/.helmignore b/helm-charts/common/lvm-uservice/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/common/lvm-uservice/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/common/lvm-uservice/Chart.yaml b/helm-charts/common/lvm-uservice/Chart.yaml new file mode 100644 index 000000000..46e4fdb20 --- /dev/null +++ b/helm-charts/common/lvm-uservice/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: lvm-uservice +description: The Helm chart for deploying lvm as microservice +type: application +version: 1.0.0 +# The lvm microservice server version +appVersion: "v1.0" +dependencies: + - name: tgi + version: 1.0.0 + repository: file://../tgi + condition: tgi.enabled diff --git a/helm-charts/common/lvm-uservice/README.md b/helm-charts/common/lvm-uservice/README.md new file mode 100644 index 000000000..d28a49ed1 --- /dev/null +++ b/helm-charts/common/lvm-uservice/README.md @@ -0,0 +1,55 @@ +# lvm-uservice + +Helm chart for deploying LVM microservice. + +lvm-uservice depends on TGI, you should set LVM_ENDPOINT as tgi endpoint. + +## (Option1): Installing the chart separately + +First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information. + +After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`. + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/lvm-uservice +export HFTOKEN="insert-your-huggingface-token-here" +export LVM_ENDPOINT="http://tgi" +helm dependency update +helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/lvm-uservice +export HFTOKEN="insert-your-huggingface-token-here" +helm dependency update +helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.enabled=true --wait +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/lvm-uservice 9000:9000` to expose the lvm-uservice service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ---------------- | ------------------------------- | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/lvm-tgi"` | | +| service.port | string | `"9000"` | | +| LVM_ENDPOINT | string | `""` | LVM endpoint | +| global.monitoring | bop; | false | Service usage metrics | diff --git a/helm-charts/common/lvm-uservice/ci-values.yaml b/helm-charts/common/lvm-uservice/ci-values.yaml new file mode 100644 index 000000000..a4c378251 --- /dev/null +++ b/helm-charts/common/lvm-uservice/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for lvm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: true diff --git a/helm-charts/common/lvm-uservice/templates/_helpers.tpl b/helm-charts/common/lvm-uservice/templates/_helpers.tpl new file mode 100644 index 000000000..86c26b0e6 --- /dev/null +++ b/helm-charts/common/lvm-uservice/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "lvm-uservice.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "lvm-uservice.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "lvm-uservice.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "lvm-uservice.labels" -}} +helm.sh/chart: {{ include "lvm-uservice.chart" . }} +{{ include "lvm-uservice.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "lvm-uservice.selectorLabels" -}} +app.kubernetes.io/name: {{ include "lvm-uservice.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "lvm-uservice.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "lvm-uservice.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/common/lvm-uservice/templates/configmap.yaml b/helm-charts/common/lvm-uservice/templates/configmap.yaml new file mode 100644 index 000000000..62f75d323 --- /dev/null +++ b/helm-charts/common/lvm-uservice/templates/configmap.yaml @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "lvm-uservice.fullname" . }}-config + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} +data: + {{- if .Values.LVM_ENDPOINT }} + LVM_ENDPOINT: {{ .Values.LVM_ENDPOINT | quote}} + {{- else }} + LVM_ENDPOINT: "http://{{ .Release.Name }}-tgi" + {{- end }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.LVM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/common/lvm-uservice/templates/deployment.yaml b/helm-charts/common/lvm-uservice/templates/deployment.yaml new file mode 100644 index 000000000..308158a95 --- /dev/null +++ b/helm-charts/common/lvm-uservice/templates/deployment.yaml @@ -0,0 +1,88 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "lvm-uservice.fullname" . }} + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "lvm-uservice.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "lvm-uservice.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "lvm-uservice.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: lvm-uservice + containerPort: 9399 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "lvm-uservice.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/common/lvm-uservice/templates/service.yaml b/helm-charts/common/lvm-uservice/templates/service.yaml new file mode 100644 index 000000000..37e454dec --- /dev/null +++ b/helm-charts/common/lvm-uservice/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "lvm-uservice.fullname" . }} + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9399 + protocol: TCP + name: lvm-uservice + selector: + {{- include "lvm-uservice.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/common/lvm-uservice/templates/servicemonitor.yaml b/helm-charts/common/lvm-uservice/templates/servicemonitor.yaml new file mode 100644 index 000000000..9fe58419b --- /dev/null +++ b/helm-charts/common/lvm-uservice/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "lvm-uservice.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "lvm-uservice.selectorLabels" . | nindent 6 }} + endpoints: + - port: lvm-uservice + interval: 5s +{{- end }} diff --git a/helm-charts/common/lvm-uservice/templates/tests/test-pod.yaml b/helm-charts/common/lvm-uservice/templates/tests/test-pod.yaml new file mode 100644 index 000000000..7782cdb1e --- /dev/null +++ b/helm-charts/common/lvm-uservice/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "lvm-uservice.fullname" . }}-testpod" + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "lvm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/lvm -sS --fail-with-body \ + -X POST \ + -d '{"image":"iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC","prompt":"What is this?"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/common/lvm-uservice/values.yaml b/helm-charts/common/lvm-uservice/values.yaml new file mode 100644 index 000000000..fdc1be187 --- /dev/null +++ b/helm-charts/common/lvm-uservice/values.yaml @@ -0,0 +1,101 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for lvm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: false + LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + +replicaCount: 1 +LVM_ENDPOINT: "" + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +image: + repository: opea/lvm-tgi + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for lvm service is 9399 + port: 9399 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: lvm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: lvm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: lvm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/visualqna/.helmignore b/helm-charts/visualqna/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/visualqna/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/visualqna/Chart.yaml b/helm-charts/visualqna/Chart.yaml new file mode 100644 index 000000000..8d45ced6d --- /dev/null +++ b/helm-charts/visualqna/Chart.yaml @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: visualqna +description: The Helm chart to deploy VisualQnA +type: application +dependencies: + - name: tgi + version: 1.0.0 + repository: "file://../common/tgi" + - name: lvm-uservice + version: 1.0.0 + repository: "file://../common/lvm-uservice" + - name: ui + alias: visualqna-ui + version: 1.0.0 + repository: "file://../common/ui" +version: 1.0.0 +appVersion: "v1.0" diff --git a/helm-charts/visualqna/README.md b/helm-charts/visualqna/README.md new file mode 100644 index 000000000..77f221cd9 --- /dev/null +++ b/helm-charts/visualqna/README.md @@ -0,0 +1,45 @@ +# VisualQnA + +Helm chart for deploying VisualQnA service. VisualQnA depends on the following services: + +- [lvm-uservice](../common/lvm-uservice/README.md) +- [tgi](../common/tgi/README.md) +- [ui](../common/ui/README.md) + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Curl command and UI are the two options that can be leveraged to verify the result. + +### Verify the workload through curl command + +Run the command `kubectl port-forward svc/visualqna 8888:8888` to expose the service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:8888/v1/visualqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "What is the revenue of Nike in 2023?"}' +``` + +### Verify the workload through UI + +The UI has already been installed via the Helm chart. To access it, use the external IP of one your Kubernetes node along with the NGINX port. You can find the NGINX port using the following command: + +```bash +export port=$(kubectl get service visualqna-nginx --output='jsonpath={.spec.ports[0].nodePort}') +echo $port +``` + +Open a browser to access `http://:${port}` to play with the VisualQnA workload. + +## Values + +| Key | Type | Default | Description | +| ----------------- | ------ | ------------------------------------- | -------------------------------------------------------------------------------------- | +| image.repository | string | `"opea/visualqna"` | | +| service.port | string | `"8888"` | | +| tgi.LLM_MODEL_ID | string | `"llava-hf/llava-v1.6-mistral-7b-hf"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.monitoring | bop; | false | Enable usage metrics for the service components. See ../monitoring.md before enabling! | diff --git a/helm-charts/visualqna/ci-gaudi-values.yaml b/helm-charts/visualqna/ci-gaudi-values.yaml new file mode 120000 index 000000000..7243d31b2 --- /dev/null +++ b/helm-charts/visualqna/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/visualqna/ci-values.yaml b/helm-charts/visualqna/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/visualqna/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/visualqna/gaudi-values.yaml b/helm-charts/visualqna/gaudi-values.yaml new file mode 100644 index 000000000..901d58613 --- /dev/null +++ b/helm-charts/visualqna/gaudi-values.yaml @@ -0,0 +1,31 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +# TGI: largest bottleneck for VisualQnA +tgi: + accelDevice: "gaudi" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.5" + resources: + limits: + habana.ai/gaudi: 1 + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + CUDA_GRAPHS: "" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/visualqna/templates/_helpers.tpl b/helm-charts/visualqna/templates/_helpers.tpl new file mode 100644 index 000000000..68b296a94 --- /dev/null +++ b/helm-charts/visualqna/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "visualqna.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "visualqna.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "visualqna.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "visualqna.labels" -}} +helm.sh/chart: {{ include "visualqna.chart" . }} +{{ include "visualqna.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "visualqna.selectorLabels" -}} +app.kubernetes.io/name: {{ include "visualqna.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "visualqna.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "visualqna.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/visualqna/templates/deployment.yaml b/helm-charts/visualqna/templates/deployment.yaml new file mode 100644 index 000000000..9bacfbbbf --- /dev/null +++ b/helm-charts/visualqna/templates/deployment.yaml @@ -0,0 +1,85 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "visualqna.fullname" . }} + labels: + {{- include "visualqna.labels" . | nindent 4 }} + app: {{ include "visualqna.fullname" . }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "visualqna.selectorLabels" . | nindent 6 }} + app: {{ include "visualqna.fullname" . }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "visualqna.selectorLabels" . | nindent 8 }} + app: {{ include "visualqna.fullname" . }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + env: + - name: LVM_SERVICE_HOST_IP + value: {{ .Release.Name }}-lvm-uservice + - name: LVM_SERVICE_PORT + value: "80" + - name: MEGA_SERVICE_HOST_IP + value: "0.0.0.0" + - name: MEGA_SERVICE_PORT + value: {{ .Values.port | quote }} + {{- if .Values.LOGFLAG }} + - name: LOGFLAG + value: {{ .Values.LOGFLAG | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: visualqna + containerPort: {{ .Values.port }} + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "visualqna.selectorLabels" . | nindent 14 }} + app: {{ include "visualqna.fullname" . }} + {{- end }} diff --git a/helm-charts/visualqna/templates/nginx-deployment.yaml b/helm-charts/visualqna/templates/nginx-deployment.yaml new file mode 100644 index 000000000..6d44f7b94 --- /dev/null +++ b/helm-charts/visualqna/templates/nginx-deployment.yaml @@ -0,0 +1,94 @@ +apiVersion: v1 +data: + default.conf: |+ + # Copyright (C) 2024 Intel Corporation + # SPDX-License-Identifier: Apache-2.0 + + + server { + listen 80; + listen [::]:80; + + location /home { + alias /usr/share/nginx/html/index.html; + } + + location / { + proxy_pass http://{{ include "ui.fullname" (index .Subcharts "visualqna-ui") }}:{{ index .Values "visualqna-ui" "service" "port" }}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /v1/visualqna { + proxy_pass http://{{ include "visualqna.fullname" . }}:{{ .Values.service.port }}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + } + +kind: ConfigMap +metadata: + name: {{ include "visualqna.fullname" . }}-nginx-config +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "visualqna.fullname" . }}-nginx + labels: + {{- include "visualqna.labels" . | nindent 4 }} + app: {{ include "visualqna.fullname" . }}-nginx +spec: + selector: + matchLabels: + {{- include "visualqna.selectorLabels" . | nindent 6 }} + app: {{ include "visualqna.fullname" . }}-nginx + template: + metadata: + labels: + {{- include "visualqna.selectorLabels" . | nindent 8 }} + app: {{ include "visualqna.fullname" . }}-nginx + spec: + containers: + - image: nginx:1.27.1 + imagePullPolicy: {{ .Values.image.pullPolicy }} + name: nginx + volumeMounts: + - mountPath: /etc/nginx/conf.d + name: nginx-config-volume + securityContext: {} + volumes: + - configMap: + defaultMode: 420 + name: {{ include "visualqna.fullname" . }}-nginx-config + name: nginx-config-volume + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "visualqna.fullname" . }}-nginx +spec: + ports: + - port: 80 + protocol: TCP + targetPort: 80 + selector: + {{- include "visualqna.selectorLabels" . | nindent 4 }} + app: {{ include "visualqna.fullname" . }}-nginx + type: NodePort diff --git a/helm-charts/visualqna/templates/service.yaml b/helm-charts/visualqna/templates/service.yaml new file mode 100644 index 000000000..ec6fa2113 --- /dev/null +++ b/helm-charts/visualqna/templates/service.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "visualqna.fullname" . }} + labels: + {{- include "visualqna.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.port }} + protocol: TCP + name: visualqna + selector: + {{- include "visualqna.selectorLabels" . | nindent 4 }} + app: {{ include "visualqna.fullname" . }} diff --git a/helm-charts/visualqna/templates/tests/test-pod.yaml b/helm-charts/visualqna/templates/tests/test-pod.yaml new file mode 100644 index 000000000..32c9c5bbb --- /dev/null +++ b/helm-charts/visualqna/templates/tests/test-pod.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "visualqna.fullname" . }}-testpod" + labels: + {{- include "visualqna.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + set -x + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "visualqna.fullname" . }}:{{ .Values.service.port }}/v1/visualqna -sS --fail-with-body \ + -H 'Content-Type: application/json' \ + -d '{"messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + } + ], + "max_tokens": 300 + }' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/visualqna/values.yaml b/helm-charts/visualqna/values.yaml new file mode 100644 index 000000000..829da3a2e --- /dev/null +++ b/helm-charts/visualqna/values.yaml @@ -0,0 +1,78 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for visualqna. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: opea/visualqna + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +port: 8888 +service: + type: ClusterIP + port: 8888 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# This is just to avoid Helm errors when HPA is NOT used +# (use hpa-values.yaml files to actually enable HPA). +horizontalPodAutoscaler: + enabled: false + +# Override values in specific subcharts +tgi: + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf + +visualqna-ui: + image: + repository: opea/visualqna-ui + tag: "latest" + BACKEND_SERVICE_ENDPOINT: "/v1/visualqna" + containerPort: 5173 + +# If you would like to switch to traditional UI image +# Uncomment the following lines +# visualqna-ui: +# image: +# repository: "opea/visualqna-ui" +# tag: "latest" +# containerPort: "5173" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + # set modelUseHostPath or modelUsePVC to use model cache. + modelUseHostPath: "" + # modelUseHostPath: /mnt/opea-models + # modelUsePVC: model-volume + + # Install Prometheus serviceMonitors for service components + monitoring: false + + # Prometheus Helm install release name needed for serviceMonitors + prometheusRelease: prometheus-stack