Full day fail summary (#29609)

* Update summarize_fail.py * Update recent_fail_summary.yaml * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update recent_fail_summary.yaml * Update recent_fail_summary.yaml * Create build_fail_defs.yaml * Update build_fail_defs.yaml * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update recent_fail_summary.yaml * Update summarize_fail.py * Update summarize_fail.py * Update recent_fail_summary.yaml * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update summarize_fail.py * Update recent_fail_summary.yaml * Update recent_fail_summary.yaml * Update summarize_fail.py * Update recent_fail_summary.yaml * Update summarize_fail.py * Update summarize_fail.py * Update recent_fail_summary.yaml * Restyled by prettier-yaml * Restyled by autopep8 * Restyled by isort * Update summarize_fail.py --------- Co-authored-by: Restyled.io <[email protected]>
project-chip · Jan 24, 2024 · 5508689 · 5508689
1 parent ae593cd
commit 5508689
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 55 deletions.
diff --git a/.github/workflows/recent_fail_summary.yaml b/.github/workflows/recent_fail_summary.yaml
@@ -15,7 +15,7 @@
 name: Recent Fail Summary
 on:
   schedule:
-    - cron: "0 0 * * *"
+    - cron: "10 0 * * *"
   workflow_dispatch:
 
 concurrency:
@@ -25,24 +25,34 @@ jobs:
   list_workflows:
     name: Summarize Recent Workflow Failures
     runs-on: ubuntu-latest
+    permissions: write-all
     steps:
       - uses: actions/checkout@v4
-      - run: pip install pandas python-slugify
+      - run: pip install pandas python-slugify pyyaml tabulate
       - name: Run Summarization Script
         run: python scripts/tools/summarize_fail.py
         env:
           GH_TOKEN: ${{ github.token }}
+      - name: Update Docs
+        uses: test-room-7/action-update-file@v1
+        with:
+          file-path: docs/daily_pass_percentage.md
+          commit-msg: Update daily pass percentage
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          branch: daily_pass_percentage
       - name: Upload Logs
         uses: actions/upload-artifact@v3
         with:
           name: workflow-fail-summary
           path: |
-            run_list.json
+            fail_run_list.json
+            all_run_list.json
             recent_fails.csv
             recent_fails_frequency.csv
             failure_cause_summary.csv
-            workflow_fail_rate.csv
+            workflow_pass_rate.csv
+            workflow_pass_rate.sqlite3
             recent_fails_logs
-            workflow_fail_rate
+            workflow_pass_rate
           retention-days: 5
 
diff --git a/scripts/tools/build_fail_defs.yaml b/scripts/tools/build_fail_defs.yaml
@@ -0,0 +1,20 @@
+CodeQL:
+    No space left on device:
+        short: Ran out of space
+        detail: Exception with signature "No space left on device"
+    Check that the disk containing the database directory has ample free space.:
+        short: Ran out of space
+        detail:
+            Fatal internal error with message indicating that disk space most
+            likely ran out
+Build example:
+    Could not find a version that satisfies the requirement:
+        short: Requirements issue
+        detail: Unable to install a requirements in Python requirements.txt
+    No module named:
+        short: Missing module
+        detail: Expected module was missing
+Full builds:
+    No space left on device:
+        short: Ran out of space
+        detail: Exception with signature "No space left on device
diff --git a/scripts/tools/summarize_fail.py b/scripts/tools/summarize_fail.py
@@ -1,38 +1,31 @@
+import datetime
 import logging
 import os
+import sqlite3
 import subprocess
 
 import pandas as pd
+import yaml
 from slugify import slugify
 
-error_catalog = {
-    "CodeQL": {
-        "No space left on device": {
-            "short": "Ran out of space",
-            "detail": "Exception with signature \"No space left on device\""
-        },
-        "Check that the disk containing the database directory has ample free space.": {
-            "short": "Ran out of space",
-            "detail": "Fatal internal error with message indicating that disk space most likely ran out"
-        }
-    },
-    "Build example": {
-        "Could not find a version that satisfies the requirement": {
-            "short": "Requirements issue",
-            "detail": "Unable to install a requirements in Python requirements.txt"
-        },
-        "No module named": {
-            "short": "Missing module",
-            "detail": "Expected module was missing"
-        }
-    },
-    "Full builds": {
-        "No space left on device": {
-            "short": "Ran out of space",
-            "detail": "Exception with signature \"No space left on device\""
-        }
-    }
-}
+yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
+
+with open("scripts/tools/build_fail_defs.yaml", "r") as fail_defs:
+    try:
+        error_catalog = yaml.safe_load(fail_defs)
+    except Exception:
+        logging.exception("Could not load fail definition file.")
+
+
+def pass_fail_rate(workflow):
+    logging.info(f"Checking recent pass/fail rate of workflow {workflow}.")
+    workflow_fail_rate_output_path = f"workflow_pass_rate/{slugify(workflow)}"
+    if not os.path.exists(workflow_fail_rate_output_path):
+        os.makedirs(workflow_fail_rate_output_path)
+        subprocess.run(
+            f"gh run list -R project-chip/connectedhomeip -b master -w '{workflow}' -L 500 --created {yesterday} --json conclusion > {workflow_fail_rate_output_path}/run_list.json", shell=True)
+    else:
+        logging.info("This workflow has already been processed.")
 
 
 def process_fail(id, pr, start_time, workflow):
@@ -57,28 +50,20 @@ def process_fail(id, pr, start_time, workflow):
                     root_cause = error_catalog[workflow_category][error_message]["short"]
                     break
 
-    logging.info(f"Checking recent pass/fail rate of workflow {workflow}.")
-    workflow_fail_rate_output_path = f"workflow_pass_rate/{slugify(workflow)}"
-    if not os.path.exists(workflow_fail_rate_output_path):
-        os.makedirs(workflow_fail_rate_output_path)
-        subprocess.run(
-            f"gh run list -R project-chip/connectedhomeip -b master -w '{workflow}' --json conclusion > {workflow_fail_rate_output_path}/run_list.json", shell=True)
-    else:
-        logging.info("This workflow has already been processed.")
-
     return [pr, workflow, root_cause]
 
 
 def main():
-    logging.info("Gathering recent fails information into run_list.json.")
-    subprocess.run("gh run list -R project-chip/connectedhomeip -b master -s failure --json databaseId,displayTitle,startedAt,workflowName > run_list.json", shell=True)
+    logging.info("Gathering recent fails information into fail_run_list.json.")
+    subprocess.run(
+        f"gh run list -R project-chip/connectedhomeip -b master -s failure -L 500 --created {yesterday} --json databaseId,displayTitle,startedAt,workflowName > fail_run_list.json", shell=True)
 
-    logging.info("Reading run_list.json into a DataFrame.")
-    df = pd.read_json("run_list.json")
+    logging.info("Reading fail_run_list.json into a DataFrame.")
+    df = pd.read_json("fail_run_list.json")
 
-    logging.info("Listing recent fails.")
+    logging.info(f"Listing recent fails from {yesterday}.")
     df.columns = ["ID", "Pull Request", "Start Time", "Workflow"]
-    print("Recent Fails:")
+    print(f"Recent Fails From {yesterday}:")
     print(df.to_string(columns=["Pull Request", "Workflow"], index=False))
     print()
     df.to_csv("recent_fails.csv", index=False)
@@ -91,6 +76,17 @@ def main():
     print()
     frequency.to_csv("recent_workflow_fails_frequency.csv")
 
+    logging.info("Gathering recent runs information into all_run_list.json.")
+    subprocess.run(
+        f"gh run list -R project-chip/connectedhomeip -b master -L 5000 --created {yesterday} --json workflowName > all_run_list.json", shell=True)
+
+    logging.info("Reading all_run_list.json into a DataFrame.")
+    all_df = pd.read_json("all_run_list.json")
+
+    logging.info("Gathering pass/fail rate info.")
+    all_df.columns = ["Workflow"]
+    all_df.apply(lambda row: pass_fail_rate(row["Workflow"]), axis=1)
+
     logging.info("Conducting fail information parsing.")
     root_causes = df.apply(lambda row: process_fail(row["ID"], row["Pull Request"],
                            row["Start Time"], row["Workflow"]), axis=1, result_type="expand")
@@ -100,19 +96,25 @@ def main():
     print()
     root_causes.to_csv("failure_cause_summary.csv")
 
-    logging.info("Listing percent fail rate of recent fails by workflow.")
-    fail_rate = {}
+    logging.info("Listing percent pass rate of recent fails by workflow.")
+    pass_rate = {}
     for workflow in next(os.walk("workflow_pass_rate"))[1]:
         try:
             info = pd.read_json(f"workflow_pass_rate/{workflow}/run_list.json")
             info = info[info["conclusion"].str.len() > 0]
-            fail_rate[workflow] = [info.value_counts(normalize=True).mul(100).round()["failure"]]
         except Exception:
             logging.exception(f"Recent runs info for {workflow} was not collected.")
-    fail_rate = pd.DataFrame.from_dict(fail_rate, 'index', columns=["Fail Rate"])
-    print("Recent Fail Rate of Each Workflow:")
-    print(fail_rate.to_string())
-    fail_rate.to_csv("workflow_fail_rate.csv")
+        try:
+            pass_rate[workflow] = [info.value_counts(normalize=True).mul(100).round()["success"]]
+        except Exception:
+            pass_rate[workflow] = [0.0]
+    pass_rate = pd.DataFrame.from_dict(pass_rate, 'index', columns=["Pass Rate"]).sort_values("Pass Rate")
+    print("Recent Pass Rate of Each Workflow:")
+    pass_rate.to_markdown("docs/daily_pass_percentage.md")
+    pass_rate_sql = sqlite3.connect("workflow_pass_rate.sqlite3")
+    pass_rate.to_sql("workflow_pass_rate", pass_rate_sql, if_exists="replace")
+    print(pass_rate.to_string())
+    pass_rate.to_csv("workflow_pass_rate.csv")
 
 
 if __name__ == "__main__":