From 9e49cc8228866b2eeac8f82ef688bb0608dfdae7 Mon Sep 17 00:00:00 2001 From: irisdingbj Date: Fri, 7 Jun 2024 22:35:17 +0000 Subject: [PATCH] fix wait error Signed-off-by: irisdingbj --- .github/workflows/scripts/e2e/gmc_test.sh | 47 ++++++++++++++++------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/.github/workflows/scripts/e2e/gmc_test.sh b/.github/workflows/scripts/e2e/gmc_test.sh index 228d81a3..c6940927 100755 --- a/.github/workflows/scripts/e2e/gmc_test.sh +++ b/.github/workflows/scripts/e2e/gmc_test.sh @@ -21,8 +21,8 @@ function install_gmc() { kubectl apply -f $(pwd)/config/manager/gmc-manager.yaml # Wait until the gmc controller pod is ready - GMC_CONTROLLER_POD=$(kubectl get pods --namespace=$SYSTEM_NAMESPACE | awk 'NR>1 {print $1; exit}') - wait_until_pod_ready "gmc-controller" $GMC_CONTROLLER_POD $SYSTEM_NAMESPACE + wait_until_pod_ready "gmc-controller" $SYSTEM_NAMESPACE "gmc-controller" + kubectl get pods -n $SYSTEM_NAMESPACE } function validate_gmc() { @@ -43,36 +43,31 @@ function cleanup_gmc() { function validate_chatqna() { - kubectl get pods -n $SYSTEM_NAMESPACE # todo select gaudi or xeon kubectl create ns $APP_NAMESPACE sed -i "s|namespace: chatqa|namespace: $APP_NAMESPACE|g" $(pwd)/config/samples/chatQnA_xeon.yaml kubectl apply -f $(pwd)/config/samples/chatQnA_xeon.yaml - - - output=$(kubectl get pods) - echo $output - # Wait until the router service is ready echo "Waiting for the chatqa router service to be ready..." - ROUTER_POD=$(kubectl get pods --namespace=$APP_NAMESPACE -l app=router-service | awk 'NR>1 {print $1; exit}') - wait_until_pod_ready "chatqna router" $ROUTER_POD $APP_NAMESPACE + wait_until_pod_ready "chatqna router" $APP_NAMESPACE "router-service" + output=$(kubectl get pods -n $APP_NAMESPACE) + echo $output # Wait until the tgi pod is ready TGI_POD_NAME=$(kubectl get pods --namespace=$APP_NAMESPACE | grep ^tgi-service | awk '{print $1}') kubectl describe pod $TGI_POD_NAME -n $APP_NAMESPACE - wait_until_pod_ready "tgi service" $TGI_POD_NAME $APP_NAMESPACE + kubectl wait --for=condition=ready pod/TGI_POD_NAME --namespace=$APP_NAMESPACE --timeout=300s # deploy client pod for testing kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity # wait for client pod ready - CLIENT_POD=$(kubectl get pods --namespace=$APP_NAMESPACE -l app=client-test | awk 'NR>1 {print $1; exit}') - wait_until_pod_ready "client-test" $CLIENT_POD $APP_NAMESPACE + wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test" # giving time to populating data sleep 120 + kubectl get pods -n $APP_NAMESPACE # send request to chatqnA export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) @@ -128,9 +123,33 @@ function init_gmc() { function wait_until_pod_ready() { echo "Waiting for the $1 to be ready..." - kubectl wait --for=condition=ready pod/$2 --namespace=$3 --timeout=300s + max_retries=30 + retry_count=0 + while ! is_pod_ready $2 $3; do + if [ $retry_count -ge $max_retries ]; then + echo "$1 is not ready after waiting for a significant amount of time" + exit 1 + fi + echo "$1 is not ready yet. Retrying in 10 seconds..." + sleep 10 + output=$(kubectl get pods -n $2) + echo $output + retry_count=$((retry_count + 1)) + done } +function is_pod_ready() { + if [ "$2" == "gmc-controller" ]; then + pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') + else + pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}') + fi + if [ "$pod_status" == "True" ]; then + return 0 + else + return 1 + fi +} if [ $# -eq 0 ]; then echo "Usage: $0 "