From a63e150eea1c5f9525efb2f62d534ecc8f777624 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Wed, 25 Sep 2024 16:14:21 +0800 Subject: [PATCH] helm CI: Fix timeout issue in helm CI (#456) - Increase timeout in 'helm test' and display pod status summary - Adapt llm-uservice chart test to parameter 'max_tokens' changes Signed-off-by: Lianhao Lu --- .github/workflows/_helm-e2e.yaml | 3 ++- .github/workflows/scripts/e2e/chart_test.sh | 5 ++++- helm-charts/common/llm-uservice/README.md | 2 +- .../common/llm-uservice/templates/tests/test-pod.yaml | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_helm-e2e.yaml b/.github/workflows/_helm-e2e.yaml index 8f470e31..9d101e6f 100644 --- a/.github/workflows/_helm-e2e.yaml +++ b/.github/workflows/_helm-e2e.yaml @@ -66,6 +66,7 @@ jobs: echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV echo "NAMESPACE=${CHART_NAME}-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV + echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV echo "should_cleanup=false" >> $GITHUB_ENV echo "skip_validate=false" >> $GITHUB_ENV @@ -130,7 +131,7 @@ jobs: else LOG_PATH=/home/$(whoami)/logs chart=${{ env.CHART_NAME }} - helm test -n $NAMESPACE $RELEASE_NAME --logs |tee ${LOG_PATH}/charts-${chart}.log + helm test -n $NAMESPACE $RELEASE_NAME --logs --timeout "$TEST_TIMEOUT_SECONDS" | tee ${LOG_PATH}/charts-${chart}.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "Chart ${chart} test failed, please check the logs in ${LOG_PATH}!" diff --git a/.github/workflows/scripts/e2e/chart_test.sh b/.github/workflows/scripts/e2e/chart_test.sh index 9b9f0460..98ced91e 100755 --- a/.github/workflows/scripts/e2e/chart_test.sh +++ b/.github/workflows/scripts/e2e/chart_test.sh @@ -61,7 +61,10 @@ function dump_failed_pod_logs() { function dump_all_pod_logs() { namespace=$1 - echo "-----DUMP POD STATUS AND LOG in NS $namespace------" + echo "-----DUMP POD STATUS AND LOG in NS $namespace-----" + echo "------SUMMARY of POD STATUS in NS $namespace------" + kubectl get pods -n $namespace -o wide + echo "--------------------------------------------------" pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}') for pod_name in $pods diff --git a/helm-charts/common/llm-uservice/README.md b/helm-charts/common/llm-uservice/README.md index 871d4ed2..d9069d34 100644 --- a/helm-charts/common/llm-uservice/README.md +++ b/helm-charts/common/llm-uservice/README.md @@ -40,7 +40,7 @@ Open another terminal and run the following command to verify the service if wor ```console curl http://localhost:9000/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml b/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml index ccee6022..d4657fc4 100644 --- a/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml +++ b/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml @@ -34,7 +34,7 @@ spec: {{- else }} curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' && break; {{- end }} curlcode=$?