From 419e5bfc857095bbcea56747e3f4feefc6d81311 Mon Sep 17 00:00:00 2001
From: Lianhao Lu <lianhao.lu@intel.com>
Date: Thu, 18 Jul 2024 10:05:42 +0000
Subject: [PATCH] helm: Update codegen to latest

- Update codegen chart to latest tgi-gaudi version and model

- Add helm test to codegen

Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
---
 helm-charts/codegen/README.md                 | 43 ++++++++++++++-----
 helm-charts/codegen/gaudi-values.yaml         |  5 +--
 helm-charts/codegen/templates/NOTES.txt       | 16 -------
 .../templates/tests/test-connection.yaml      | 18 --------
 .../codegen/templates/tests/test-pod.yaml     | 24 +++++++++++
 helm-charts/codegen/values.yaml               |  3 +-
 6 files changed, 59 insertions(+), 50 deletions(-)
 delete mode 100644 helm-charts/codegen/templates/NOTES.txt
 delete mode 100644 helm-charts/codegen/templates/tests/test-connection.yaml
 create mode 100644 helm-charts/codegen/templates/tests/test-pod.yaml

diff --git a/helm-charts/codegen/README.md b/helm-charts/codegen/README.md
index 7a35c3cd..4c448e4a 100644
--- a/helm-charts/codegen/README.md
+++ b/helm-charts/codegen/README.md
@@ -2,7 +2,7 @@
 
 Helm chart for deploying CodeGen service.
 
-CodeGen depends on LLM microservice, refer to llm-uservice for more config details.
+CodeGen depends on LLM and tgi microservice, refer to [llm-uservice](../common/llm-uservice) and [tgi](../common/tgi) for more config details.
 
 ## Installing the Chart
 
@@ -14,18 +14,39 @@ cd GenAIInfra/helm-charts/
 helm dependency update codegen
 export HFTOKEN="insert-your-huggingface-token-here"
 export MODELDIR="/mnt/opea-models"
-export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B"
+export MODELNAME="meta-llama/CodeLlama-7b-hf"
+# To run on Xeon
 helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
-# To use Gaudi device
-# helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values codegen/gaudi-values.yaml
+# To run on Gaudi
+#helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/gaudi-values.yaml
+```
+
+### IMPORTANT NOTE
+
+1. To use model `meta-llama/CodeLlama-7b-hf`, you should first goto the [huggingface model card](https://huggingface.co/meta-llama/CodeLlama-7b-hf) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model.
+
+2. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model.
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:7778/v1/codegen \
+    -H "Content-Type: application/json" \
+    -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
 ```
 
 ## Values
 
-| Key                             | Type   | Default                          | Description                                                                                                                                                  |
-| ------------------------------- | ------ | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| image.repository                | string | `"opea/codegen:latest"`          |                                                                                                                                                              |
-| service.port                    | string | `"7778"`                         |                                                                                                                                                              |
-| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                             | Your own Hugging Face API token                                                                                                                              |
-| global.modelUseHostPath         | string | `"/mnt/opea-models"`             | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
-| tgi.LLM_MODEL_ID                | string | `"ise-uiuc/Magicoder-S-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory                                                                                     |
+| Key                             | Type   | Default                                      | Description                                                                                                                                                                                                                       |
+| ------------------------------- | ------ | -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| image.repository                | string | `"opea/codegen:latest"`                      |                                                                                                                                                                                                                                   |
+| service.port                    | string | `"7778"`                                     |                                                                                                                                                                                                                                   |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                                         | Your own Hugging Face API token                                                                                                                                                                                                   |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"`                         | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to empty/null will force it to download models every time. |
+| tgi.LLM_MODEL_ID                | string | `"LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf"` | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                                          |
diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml
index 30fc5182..cc0cdea6 100644
--- a/helm-charts/codegen/gaudi-values.yaml
+++ b/helm-charts/codegen/gaudi-values.yaml
@@ -37,11 +37,10 @@ llm-uservice:
     port: 9000
 # To override values in subchart tgi
 tgi:
-  LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
-  # LLM_MODEL_ID: /data/Magicoder-S-DS-6.7B
+  LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "1.2.1"
+    tag: "2.0.1"
   resources:
     limits:
       habana.ai/gaudi: 1
diff --git a/helm-charts/codegen/templates/NOTES.txt b/helm-charts/codegen/templates/NOTES.txt
deleted file mode 100644
index f4aea374..00000000
--- a/helm-charts/codegen/templates/NOTES.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-1. Get the application URL by running these commands:
-{{- if contains "NodePort" .Values.service.type }}
-  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "codegen.fullname" . }})
-  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
-  echo http://$NODE_IP:$NODE_PORT
-{{- else if contains "LoadBalancer" .Values.service.type }}
-     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
-           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "codegen.fullname" . }}'
-  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "codegen.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
-  echo http://$SERVICE_IP:{{ .Values.service.port }}
-{{- else if contains "ClusterIP" .Values.service.type }}
-  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "codegen.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
-  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
-  echo "Visit http://127.0.0.1:8080 to use your application"
-  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
-{{- end }}
diff --git a/helm-charts/codegen/templates/tests/test-connection.yaml b/helm-charts/codegen/templates/tests/test-connection.yaml
deleted file mode 100644
index 6eeadf4a..00000000
--- a/helm-charts/codegen/templates/tests/test-connection.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: Pod
-metadata:
-  name: "{{ include "codegen.fullname" . }}-test-connection"
-  labels:
-    {{- include "codegen.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": test
-spec:
-  containers:
-    - name: wget
-      image: busybox
-      command: ['wget']
-      args: ['{{ include "codegen.fullname" . }}:{{ .Values.service.port }}']
-  restartPolicy: Never
diff --git a/helm-charts/codegen/templates/tests/test-pod.yaml b/helm-charts/codegen/templates/tests/test-pod.yaml
new file mode 100644
index 00000000..898b1568
--- /dev/null
+++ b/helm-charts/codegen/templates/tests/test-pod.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "codegen.fullname" . }}-testpod"
+  labels:
+    {{- include "codegen.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: alpine/curl
+      #image: python:3.10.14
+      command: ['sh', '-c']
+      args:
+        - |
+          curl http://{{ include "codegen.fullname" . }}:{{ .Values.service.port }}/v1/codegen -sS --fail-with-body \
+            -d '{"messages": "def print_hello_world():"}' \
+            -H 'Content-Type: application/json'
+  restartPolicy: Never
diff --git a/helm-charts/codegen/values.yaml b/helm-charts/codegen/values.yaml
index 06b6cae6..434c4050 100644
--- a/helm-charts/codegen/values.yaml
+++ b/helm-charts/codegen/values.yaml
@@ -37,8 +37,7 @@ llm-uservice:
     port: 9000
 # To override values in subchart tgi
 tgi:
-  LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
-  # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
+  LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf
 
 global:
   http_proxy: