From 9b5def0c26ae97a4c8a6e52a42c44917e9d79352 Mon Sep 17 00:00:00 2001
From: Lianhao Lu <lianhao.lu@intel.com>
Date: Tue, 16 Jul 2024 17:19:57 +0800
Subject: [PATCH] Add helm test for chart tgi

- Added helm test for chart tgi

- Update the chart README

Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
---
 helm-charts/common/tgi/README.md              | 31 ++++++++++++++-----
 helm-charts/common/tgi/templates/NOTES.txt    | 16 ----------
 .../common/tgi/templates/tests/test-pod.yaml  | 25 +++++++++++++++
 3 files changed, 48 insertions(+), 24 deletions(-)
 delete mode 100644 helm-charts/common/tgi/templates/NOTES.txt
 create mode 100644 helm-charts/common/tgi/templates/tests/test-pod.yaml

diff --git a/helm-charts/common/tgi/README.md b/helm-charts/common/tgi/README.md
index b7d1d20e..62e4d70c 100644
--- a/helm-charts/common/tgi/README.md
+++ b/helm-charts/common/tgi/README.md
@@ -20,16 +20,31 @@ By default, the tgi service will downloading the "bigscience/bloom-560m" which i
 
 If you already cached the model locally, you can pass it to container like this example:
 
-MODELDIR=/home/ubuntu/hfmodels
+MODELDIR=/mnt/opea-models
 
 MODELNAME="/data/models--bigscience--bloom-560m"
 
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng.
+
+Then run the command `kubectl port-forward svc/tgi 2080:80` to expose the tgi service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:2080/generate \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+    -H 'Content-Type: application/json'
+```
+
 ## Values
 
-| Key                     | Type   | Default                                           | Description                                                                                                                                                  |
-| ----------------------- | ------ | ------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| LLM_MODEL_ID            | string | `"bigscience/bloom-560m"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                                     |
-| port                    | string | `2080`                                            | Hugging Face Text Generation Inference service port                                                                                                          |
-| global.modelUseHostPath | string | `"/mnt/opea-models"`                              | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
-| image.repository        | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                                              |
-| image.tag               | string | `"1.4"`                                           |                                                                                                                                                              |
+| Key                             | Type   | Default                                           | Description                                                                                                                                                                                                           |
+| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| LLM_MODEL_ID                    | string | `"bigscience/bloom-560m"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                              |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here`              | Hugging Face API token                                                                                                                                                                                                |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| image.repository                | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                                                                                                       |
+| image.tag                       | string | `"1.4"`                                           |                                                                                                                                                                                                                       |
diff --git a/helm-charts/common/tgi/templates/NOTES.txt b/helm-charts/common/tgi/templates/NOTES.txt
deleted file mode 100644
index ddaeaec8..00000000
--- a/helm-charts/common/tgi/templates/NOTES.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-1. Get the application URL by running these commands:
-{{- if contains "NodePort" .Values.service.type }}
-  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "tgi.fullname" . }})
-  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
-  echo http://$NODE_IP:$NODE_PORT
-{{- else if contains "LoadBalancer" .Values.service.type }}
-     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
-           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "tgi.fullname" . }}'
-  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "tgi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
-  echo http://$SERVICE_IP:{{ .Values.service.port }}
-{{- else if contains "ClusterIP" .Values.service.type }}
-  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
-  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
-  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
-  echo "Visit http://127.0.0.1:8080 to use your application"
-{{- end }}
diff --git a/helm-charts/common/tgi/templates/tests/test-pod.yaml b/helm-charts/common/tgi/templates/tests/test-pod.yaml
new file mode 100644
index 00000000..a4f32782
--- /dev/null
+++ b/helm-charts/common/tgi/templates/tests/test-pod.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "tgi.fullname" . }}-testpod"
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: alpine/curl
+      #image: python:3.10.14
+      command: ['sh', '-c']
+      args:
+        - |
+          curl http://{{ include "tgi.fullname" . }}/generate -sS --fail-with-body \
+            -X POST \
+            -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+            -H 'Content-Type: application/json'
+  restartPolicy: Never