From a822747ee523a7cfd31450a10e9ca3f66b536f44 Mon Sep 17 00:00:00 2001
From: Albin Larsson <albin.post@gmail.com>
Date: Fri, 15 Nov 2024 15:43:12 +0100
Subject: [PATCH 1/2] allow the check-expected-results scirpt to take
 induvidual paths as params

+ sorted table-like output
+ summary of failed/success -rate
---
 scripts/check-expected-results-count.py | 120 +++++++++++++++++++-----
 1 file changed, 98 insertions(+), 22 deletions(-)

diff --git a/scripts/check-expected-results-count.py b/scripts/check-expected-results-count.py
index b4da1e031f..ebfb82552a 100755
--- a/scripts/check-expected-results-count.py
+++ b/scripts/check-expected-results-count.py
@@ -1,7 +1,9 @@
 #!/usr/bin/env python3
 
+import argparse
 import glob
 import re
+from pathlib import Path
 
 import requests
 
@@ -21,34 +23,108 @@ def execute_sparql_query(endpoint, query):
     }
     data = {"query": query}
     response = requests.post(endpoint, headers=headers, data=data)
-    response.raise_for_status()  # Raise an exception for HTTP errors
+    response.raise_for_status()
     results = response.json()
     return len(results["results"]["bindings"])
 
 
-def verify_result_counts(folder_path, endpoint):
-    for filename in glob.iglob(folder_path + "**/**/*.rq", recursive=True):
-        with open(filename, "r") as file:
+def format_table(headers, rows, col_widths=None):
+    if not col_widths:
+        col_widths = [
+            max(len(str(row[i])) for row in [headers] + rows)
+            for i in range(len(headers))
+        ]
+
+    separator = "+" + "+".join("-" * (width + 2) for width in col_widths) + "+"
+    header = "|" + "|".join(f" {h:<{w}} " for h, w in zip(headers, col_widths)) + "|"
+
+    formatted_rows = []
+    for row in rows:
+        formatted_row = (
+            "|"
+            + "|".join(f" {str(cell):<{w}} " for cell, w in zip(row, col_widths))
+            + "|"
+        )
+        formatted_rows.append(formatted_row)
+
+    return "\n".join([separator, header, separator] + formatted_rows + [separator])
+
+
+def get_status_priority(status):
+    priority = {"FAIL": 0, "ERROR": 1, "NO COUNT": 2, "PASS": 3}
+    return priority.get(status, 4)
+
+
+def verify_result_counts(path, endpoint):
+    paths = (
+        [path]
+        if path.is_file()
+        else glob.iglob(str(path / "**/**/*.rq"), recursive=True)
+    )
+    paths = list(set(paths))
+    results = []
+    stats = {"pass": 0, "fail": 0, "error": 0, "no_count": 0}
+
+    headers = ["File", "Status", "Expected", "Actual", "Notes"]
+
+    for filepath in paths:
+        with open(filepath, "r") as file:
             content = file.read()
             expected_count = get_expected_count(content)
-            if expected_count is not None:
-                try:
-                    actual_count = execute_sparql_query(endpoint, content)
-                    if actual_count == expected_count:
-                        print(
-                            f"{filename}: PASS (Expected: {expected_count}, Actual: {actual_count})"
-                        )
-                    else:
-                        print(
-                            f"{filename}: FAIL (Expected: {expected_count}, Actual: {actual_count})"
-                        )
-                except requests.RequestException as e:
-                    print(f"{filename}: ERROR - {str(e)}")
-            else:
-                print(f"{filename}: No expected count found")
+
+            if expected_count is None:
+                results.append(
+                    [filepath, "NO COUNT", "-", "-", "No expected count found"]
+                )
+                stats["no_count"] += 1
+                continue
+
+            try:
+                actual_count = execute_sparql_query(endpoint, content)
+                if actual_count == expected_count:
+                    results.append(
+                        [filepath, "PASS", expected_count, actual_count, "-"]
+                    )
+                    stats["pass"] += 1
+                else:
+                    results.append(
+                        [
+                            filepath,
+                            "FAIL",
+                            expected_count,
+                            actual_count,
+                            "Count mismatch",
+                        ]
+                    )
+                    stats["fail"] += 1
+            except requests.RequestException as e:
+                results.append([filepath, "ERROR", "-", "-", str(e)])
+                stats["error"] += 1
+
+    # Sort results by status priority and then by filename
+    results.sort(key=lambda x: (get_status_priority(x[1]), x[0]))
+
+    print(format_table(headers, results))
+    print(f"\nSummary:")
+    print(
+        f"Total: {len(results)} | Passed: {stats['pass']} | Failed: {stats['fail']} | "
+        f"Errors: {stats['error']} | No Count: {stats['no_count']}"
+    )
 
 
 if __name__ == "__main__":
-    folder_path = "queries/generators"
-    endpoint = "https://query.wikidata.org/sparql"
-    verify_result_counts(folder_path, endpoint)
+    parser = argparse.ArgumentParser(description="Verify SPARQL query result counts")
+    parser.add_argument(
+        "--path",
+        type=Path,
+        default=Path("queries/generators"),
+        help="Path to single query file or directory of queries",
+    )
+    parser.add_argument(
+        "--endpoint",
+        default="https://query.wikidata.org/sparql",
+        help="SPARQL endpoint URL",
+    )
+
+    args = parser.parse_args()
+    verify_result_counts(args.path, args.endpoint)

From 356f5b33b3051193ddcfce4a42d991c2e5fb933e Mon Sep 17 00:00:00 2001
From: Ainali <ainali.jan@gmail.com>
Date: Fri, 15 Nov 2024 16:09:18 +0100
Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=97=B9=20Add=20words=20to=20jargon?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 jargon.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/jargon.txt b/jargon.txt
index 1fb840034b..6a3bb2cffb 100644
--- a/jargon.txt
+++ b/jargon.txt
@@ -5,6 +5,8 @@ allOrgs
 analytics
 Albin
 Aotearoa
+argparse
+ArgumentParser
 Arial
 austria
 authorsFile
@@ -156,6 +158,7 @@ parentOrgLabel
 parentOrgLabel
 parentOrgLabel
 parentOrgQID
+pathlib
 pdf
 PDFs
 pe