From 88caf0df947866ffe609cf60805282970f887429 Mon Sep 17 00:00:00 2001
From: Ying Chun Guo <yingchun.guo@intel.com>
Date: Fri, 26 Jul 2024 14:16:45 +0800
Subject: [PATCH] remove chart_test scripts and add script to dump pod status
 (#227)

Signed-off-by: Yingchun Guo <yingchun.guo@intel.com>
---
 .github/workflows/chart-e2e.yaml            |   3 +-
 .github/workflows/scripts/e2e/chart_test.sh | 129 +++++++-------------
 2 files changed, 44 insertions(+), 88 deletions(-)

diff --git a/.github/workflows/chart-e2e.yaml b/.github/workflows/chart-e2e.yaml
index da20c9f1..423ed37e 100644
--- a/.github/workflows/chart-e2e.yaml
+++ b/.github/workflows/chart-e2e.yaml
@@ -120,7 +120,6 @@ jobs:
           helm-charts/update_dependency.sh && helm dependency update ${{ env.CHART_FOLDER}}
           value_file="values.yaml"
           if [ "${{ matrix.hardware }}" == "gaudi" ]; then
-            # EXTRAPARAM="--set tgi.image.tag=1.2.1"
             if [ -f ${{ env.CHART_FOLDER}}/gaudi-values.yaml ]; then
               value_file="gaudi-values.yaml"
             else
@@ -137,6 +136,7 @@ jobs:
               $RELEASE_NAME ${{ env.CHART_FOLDER}} ; then
             echo "Failed to install chart ${{ matrix.example }}"
             echo "skip_validate=true" >> $GITHUB_ENV
+            .github/workflows/scripts/e2e/chart_test.sh dump_pods_status $NAMESPACE
             exit 1
           fi
           sleep 120
@@ -162,6 +162,7 @@ jobs:
             if [[ -f $LOG_PATH/charts-${chart}.log ]] && \
             [[ $(grep -c "^Phase:.*Failed" $LOG_PATH/charts-${chart}.log) != 0 ]]; then
                 teststatus=false
+                .github/workflows/scripts/e2e/chart_test.sh dump_failed_pod_logs $NAMESPACE $LOG_PATH/charts-${chart}.log
             else
                 teststatus=true
             fi
diff --git a/.github/workflows/scripts/e2e/chart_test.sh b/.github/workflows/scripts/e2e/chart_test.sh
index d3a09145..3dd7b13c 100755
--- a/.github/workflows/scripts/e2e/chart_test.sh
+++ b/.github/workflows/scripts/e2e/chart_test.sh
@@ -2,112 +2,67 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-LOG_PATH=.
-USER_ID=$(whoami)
-CHART_MOUNT=/home/$USER_ID/.cache/huggingface/hub
-# IMAGE_REPO is $OPEA_IMAGE_REPO, or else ""
-IMAGE_REPO=${OPEA_IMAGE_REPO:-""}
+#set -xe
 
-function init_codegen() {
-    # insert a prefix before opea/.*, the prefix is IMAGE_REPO
-    find .. -name '*values.yaml' -type f -exec sed -i "s#repository: opea/*#repository: ${IMAGE_REPO}opea/#g" {} \;
-    # set huggingface token
-    find . -name '*values.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
-    # replace the mount dir "Volume: *" with "Volume: $CHART_MOUNT"
-    find . -name '*values.yaml' -type f -exec sed -i "s#modelUseHostPath: .*#modelUseHostPath: $CHART_MOUNT#g" {} \;
-    # replace the pull policy "IfNotPresent" with "Always"
-    find .. -name '*values.yaml' -type f -exec sed -i "s#pullPolicy: IfNotPresent#pullPolicy: Always#g" {} \;
-}
+function dump_pods_status() {
+    namespace=$1
+    echo "-----DUMP POD STATUS in NS $namespace------"
 
-function init_chatqna() {
-    # replace volume: /mnt with volume: $CHART_MOUNT
-    find . -name '*values.yaml' -type f -exec sed -i "s#modelUseHostPath: .*#modelUseHostPath: $CHART_MOUNT#g" {} \;
-    # replace the repository "image: opea/*" with "image: ${IMAGE_REPO}opea/"
-    find .. -name '*values.yaml' -type f -exec sed -i "s#repository: opea/*#repository: ${IMAGE_REPO}opea/#g" {} \;
-    # set huggingface token
-    find . -name '*values.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
-    # replace the pull policy "IfNotPresent" with "Always"
-    find .. -name '*values.yaml' -type f -exec sed -i "s#pullPolicy: IfNotPresent#pullPolicy: Always#g" {} \;
-}
+    # get pod status
+    outputs=$(kubectl get pods -n $namespace -owide)
+    echo $outputs
+    echo "-----------------------------------"
 
-function validate_codegen() {
-    # validate mega service
-    ip_address=$(kubectl get svc $RELEASE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
-    port=$(kubectl get svc $RELEASE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
-    # Curl the Mega Service
-    curl http://${ip_address}:${port}/v1/codegen \
-    -H "Content-Type: application/json" \
-    -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' > $LOG_PATH/curl_codegen.log
-    exit_code=$?
-    if [ $exit_code -ne 0 ]; then
-        echo "Megaservice codegen failed, please check the logs in ${LOG_PATH}!"
-        exit 1
-    fi
+    # Get all pods in the namespace and their statuses
+    pods=$(kubectl get pods -n $namespace --no-headers)
 
-    echo "Checking response results, make sure the output is reasonable. "
-    local status=false
-    if [[ -f $LOG_PATH/curl_codegen.log ]] && \
-    [[ $(grep -c "print" $LOG_PATH/curl_codegen.log) != 0 ]]; then
-        status=true
-    fi
+    # Loop through each pod
+    echo "$pods" | while read -r line; do
+        pod_name=$(echo $line | awk '{print $1}')
+        ready=$(echo $line | awk '{print $2}')
+        status=$(echo $line | awk '{print $3}')
 
-    if [ $status == false ]; then
-        echo "Response check failed, please check the logs in artifacts!"
-    else
-        echo "Response check succeed!"
-    fi
+        # Extract the READY count
+        ready_count=$(echo $ready | cut -d'/' -f1)
+        required_count=$(echo $ready | cut -d'/' -f2)
+
+        # Check if the pod is not in "Running" status or READY count is less than required
+        if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then
+            echo "Pod: $pod_name"
+            echo "Details:"
+            kubectl describe pod $pod_name -n $namespace
+            echo "-----------------------------------"
+        fi
+    done
 }
 
-function validate_chatqna() {
-    sleep 60
-    set -xe
-    ip_address=$(kubectl get svc $RELEASE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
-    port=$(kubectl get svc $RELEASE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
-    # Curl the Mega Service
-    curl http://${ip_address}:${port}/v1/chatqna -H "Content-Type: application/json" -d '{
-        "messages": "What is the revenue of Nike in 2023?"}' > ${LOG_PATH}/curl_megaservice.log
-    exit_code=$?
+function dump_failed_pod_logs() {
+    namespace=$1
+    logfile=$2
 
-    echo "Checking response results, make sure the output is reasonable. "
-    local status=false
-    if [[ -f $LOG_PATH/curl_megaservice.log ]] && \
-    [[ $(grep -c "billion" $LOG_PATH/curl_megaservice.log) != 0 ]]; then
-        status=true
-    fi
+    failed_test_suite=$(awk '/TEST SUITE:/{suite=$0} /Phase:/{if($2=="Failed"){print suite; exit}}' "$logfile")
+    failed_svc_name=$(echo "$failed_test_suite" | sed 's/^[ \t]*//;s/^TEST SUITE:[ \t]*//;s/-testpod$//')
 
-    if [ $status == false ]; then
-        echo "Response check failed, please check the logs in artifacts!"
-        exit 1
-    else
-        echo "Response check succeed!"
+    if [[ -n $failed_svc_name ]]; then
+        # Get the exact pod name
+        pod_name=$(kubectl get pods -n $namespace | grep -v 'testpod' | grep $failed_svc_name | awk '{print $1}')
+        echo "------DUMP POD $pod_name LOG in NS $namespace---------"
+        kubectl logs $pod_name -n $namespace
     fi
 }
 
+
 if [ $# -eq 0 ]; then
     echo "Usage: $0 <function_name>"
     exit 1
 fi
 
 case "$1" in
-    init_codegen)
-        pushd helm-charts/codegen
-        init_codegen
-        popd
-        ;;
-    validate_codegen)
-        RELEASE_NAME=$2
-        NAMESPACE=$3
-        validate_codegen
-        ;;
-    init_chatqna)
-        pushd helm-charts/chatqna
-        init_chatqna
-        popd
+    dump_pods_status)
+        dump_pods_status $2
         ;;
-    validate_chatqna)
-        RELEASE_NAME=$2
-        NAMESPACE=$3
-        validate_chatqna
+    dump_failed_pod_logs)
+        dump_failed_pod_logs $2 $3
         ;;
     *)
         echo "Unknown function: $1"