From 419e5bfc857095bbcea56747e3f4feefc6d81311 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Thu, 18 Jul 2024 10:05:42 +0000 Subject: [PATCH] helm: Update codegen to latest - Update codegen chart to latest tgi-gaudi version and model - Add helm test to codegen Signed-off-by: Lianhao Lu --- helm-charts/codegen/README.md | 43 ++++++++++++++----- helm-charts/codegen/gaudi-values.yaml | 5 +-- helm-charts/codegen/templates/NOTES.txt | 16 ------- .../templates/tests/test-connection.yaml | 18 -------- .../codegen/templates/tests/test-pod.yaml | 24 +++++++++++ helm-charts/codegen/values.yaml | 3 +- 6 files changed, 59 insertions(+), 50 deletions(-) delete mode 100644 helm-charts/codegen/templates/NOTES.txt delete mode 100644 helm-charts/codegen/templates/tests/test-connection.yaml create mode 100644 helm-charts/codegen/templates/tests/test-pod.yaml diff --git a/helm-charts/codegen/README.md b/helm-charts/codegen/README.md index 7a35c3cd..4c448e4a 100644 --- a/helm-charts/codegen/README.md +++ b/helm-charts/codegen/README.md @@ -2,7 +2,7 @@ Helm chart for deploying CodeGen service. -CodeGen depends on LLM microservice, refer to llm-uservice for more config details. +CodeGen depends on LLM and tgi microservice, refer to [llm-uservice](../common/llm-uservice) and [tgi](../common/tgi) for more config details. ## Installing the Chart @@ -14,18 +14,39 @@ cd GenAIInfra/helm-charts/ helm dependency update codegen export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" -export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B" +export MODELNAME="meta-llama/CodeLlama-7b-hf" +# To run on Xeon helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -# To use Gaudi device -# helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values codegen/gaudi-values.yaml +# To run on Gaudi +#helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/gaudi-values.yaml +``` + +### IMPORTANT NOTE + +1. To use model `meta-llama/CodeLlama-7b-hf`, you should first goto the [huggingface model card](https://huggingface.co/meta-llama/CodeLlama-7b-hf) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model. + +2. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model. + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:7778/v1/codegen \ + -H "Content-Type: application/json" \ + -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' ``` ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| image.repository | string | `"opea/codegen:latest"` | | -| service.port | string | `"7778"` | | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory | -| tgi.LLM_MODEL_ID | string | `"ise-uiuc/Magicoder-S-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory | +| Key | Type | Default | Description | +| ------------------------------- | ------ | -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| image.repository | string | `"opea/codegen:latest"` | | +| service.port | string | `"7778"` | | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to empty/null will force it to download models every time. | +| tgi.LLM_MODEL_ID | string | `"LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf"` | Models id from https://huggingface.co/, or predownloaded model directory | diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml index 30fc5182..cc0cdea6 100644 --- a/helm-charts/codegen/gaudi-values.yaml +++ b/helm-charts/codegen/gaudi-values.yaml @@ -37,11 +37,10 @@ llm-uservice: port: 9000 # To override values in subchart tgi tgi: - LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B - # LLM_MODEL_ID: /data/Magicoder-S-DS-6.7B + LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "1.2.1" + tag: "2.0.1" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/codegen/templates/NOTES.txt b/helm-charts/codegen/templates/NOTES.txt deleted file mode 100644 index f4aea374..00000000 --- a/helm-charts/codegen/templates/NOTES.txt +++ /dev/null @@ -1,16 +0,0 @@ -1. Get the application URL by running these commands: -{{- if contains "NodePort" .Values.service.type }} - export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "codegen.fullname" . }}) - export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") - echo http://$NODE_IP:$NODE_PORT -{{- else if contains "LoadBalancer" .Values.service.type }} - NOTE: It may take a few minutes for the LoadBalancer IP to be available. - You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "codegen.fullname" . }}' - export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "codegen.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") - echo http://$SERVICE_IP:{{ .Values.service.port }} -{{- else if contains "ClusterIP" .Values.service.type }} - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "codegen.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") - export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") - echo "Visit http://127.0.0.1:8080 to use your application" - kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT -{{- end }} diff --git a/helm-charts/codegen/templates/tests/test-connection.yaml b/helm-charts/codegen/templates/tests/test-connection.yaml deleted file mode 100644 index 6eeadf4a..00000000 --- a/helm-charts/codegen/templates/tests/test-connection.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Pod -metadata: - name: "{{ include "codegen.fullname" . }}-test-connection" - labels: - {{- include "codegen.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": test -spec: - containers: - - name: wget - image: busybox - command: ['wget'] - args: ['{{ include "codegen.fullname" . }}:{{ .Values.service.port }}'] - restartPolicy: Never diff --git a/helm-charts/codegen/templates/tests/test-pod.yaml b/helm-charts/codegen/templates/tests/test-pod.yaml new file mode 100644 index 00000000..898b1568 --- /dev/null +++ b/helm-charts/codegen/templates/tests/test-pod.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "codegen.fullname" . }}-testpod" + labels: + {{- include "codegen.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: alpine/curl + #image: python:3.10.14 + command: ['sh', '-c'] + args: + - | + curl http://{{ include "codegen.fullname" . }}:{{ .Values.service.port }}/v1/codegen -sS --fail-with-body \ + -d '{"messages": "def print_hello_world():"}' \ + -H 'Content-Type: application/json' + restartPolicy: Never diff --git a/helm-charts/codegen/values.yaml b/helm-charts/codegen/values.yaml index 06b6cae6..434c4050 100644 --- a/helm-charts/codegen/values.yaml +++ b/helm-charts/codegen/values.yaml @@ -37,8 +37,7 @@ llm-uservice: port: 9000 # To override values in subchart tgi tgi: - LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B - # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B + LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf global: http_proxy: