From f5f1e323bba65247f35f93ba01b075d90f0d1ac9 Mon Sep 17 00:00:00 2001 From: Steve Zhang Date: Fri, 30 Aug 2024 13:54:42 +0800 Subject: [PATCH] Revert the LLM model for kubernetes GMS (#675) * revert the LLM model to meta-llama/CodeLlama-7b-hf Signed-off-by: zhlsunshine --- CodeGen/kubernetes/codegen_xeon.yaml | 2 +- CodeGen/tests/test_gmc_on_gaudi.sh | 2 +- CodeGen/tests/test_gmc_on_xeon.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CodeGen/kubernetes/codegen_xeon.yaml b/CodeGen/kubernetes/codegen_xeon.yaml index e4a95cedf..dd1675ce3 100644 --- a/CodeGen/kubernetes/codegen_xeon.yaml +++ b/CodeGen/kubernetes/codegen_xeon.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-service config: - MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: meta-llama/CodeLlama-7b-hf endpoint: /generate isDownstreamService: true diff --git a/CodeGen/tests/test_gmc_on_gaudi.sh b/CodeGen/tests/test_gmc_on_gaudi.sh index ed87d1aee..a63ef201f 100755 --- a/CodeGen/tests/test_gmc_on_gaudi.sh +++ b/CodeGen/tests/test_gmc_on_gaudi.sh @@ -34,7 +34,7 @@ function validate_codegen() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!" diff --git a/CodeGen/tests/test_gmc_on_xeon.sh b/CodeGen/tests/test_gmc_on_xeon.sh index ae3140564..97cb14d78 100755 --- a/CodeGen/tests/test_gmc_on_xeon.sh +++ b/CodeGen/tests/test_gmc_on_xeon.sh @@ -34,7 +34,7 @@ function validate_codegen() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!"