From 9b5def0c26ae97a4c8a6e52a42c44917e9d79352 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Tue, 16 Jul 2024 17:19:57 +0800 Subject: [PATCH] Add helm test for chart tgi - Added helm test for chart tgi - Update the chart README Signed-off-by: Lianhao Lu --- helm-charts/common/tgi/README.md | 31 ++++++++++++++----- helm-charts/common/tgi/templates/NOTES.txt | 16 ---------- .../common/tgi/templates/tests/test-pod.yaml | 25 +++++++++++++++ 3 files changed, 48 insertions(+), 24 deletions(-) delete mode 100644 helm-charts/common/tgi/templates/NOTES.txt create mode 100644 helm-charts/common/tgi/templates/tests/test-pod.yaml diff --git a/helm-charts/common/tgi/README.md b/helm-charts/common/tgi/README.md index b7d1d20e..62e4d70c 100644 --- a/helm-charts/common/tgi/README.md +++ b/helm-charts/common/tgi/README.md @@ -20,16 +20,31 @@ By default, the tgi service will downloading the "bigscience/bloom-560m" which i If you already cached the model locally, you can pass it to container like this example: -MODELDIR=/home/ubuntu/hfmodels +MODELDIR=/mnt/opea-models MODELNAME="/data/models--bigscience--bloom-560m" +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng. + +Then run the command `kubectl port-forward svc/tgi 2080:80` to expose the tgi service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:2080/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + ## Values -| Key | Type | Default | Description | -| ----------------------- | ------ | ------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | -| port | string | `2080` | Hugging Face Text Generation Inference service port | -| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory | -| image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | | -| image.tag | string | `"1.4"` | | +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | | +| image.tag | string | `"1.4"` | | diff --git a/helm-charts/common/tgi/templates/NOTES.txt b/helm-charts/common/tgi/templates/NOTES.txt deleted file mode 100644 index ddaeaec8..00000000 --- a/helm-charts/common/tgi/templates/NOTES.txt +++ /dev/null @@ -1,16 +0,0 @@ -1. Get the application URL by running these commands: -{{- if contains "NodePort" .Values.service.type }} - export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "tgi.fullname" . }}) - export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") - echo http://$NODE_IP:$NODE_PORT -{{- else if contains "LoadBalancer" .Values.service.type }} - NOTE: It may take a few minutes for the LoadBalancer IP to be available. - You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "tgi.fullname" . }}' - export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "tgi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") - echo http://$SERVICE_IP:{{ .Values.service.port }} -{{- else if contains "ClusterIP" .Values.service.type }} - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") - export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") - kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT - echo "Visit http://127.0.0.1:8080 to use your application" -{{- end }} diff --git a/helm-charts/common/tgi/templates/tests/test-pod.yaml b/helm-charts/common/tgi/templates/tests/test-pod.yaml new file mode 100644 index 00000000..a4f32782 --- /dev/null +++ b/helm-charts/common/tgi/templates/tests/test-pod.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "tgi.fullname" . }}-testpod" + labels: + {{- include "tgi.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: alpine/curl + #image: python:3.10.14 + command: ['sh', '-c'] + args: + - | + curl http://{{ include "tgi.fullname" . }}/generate -sS --fail-with-body \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' + restartPolicy: Never