From a822747ee523a7cfd31450a10e9ca3f66b536f44 Mon Sep 17 00:00:00 2001 From: Albin Larsson Date: Fri, 15 Nov 2024 15:43:12 +0100 Subject: [PATCH 1/2] allow the check-expected-results scirpt to take induvidual paths as params + sorted table-like output + summary of failed/success -rate --- scripts/check-expected-results-count.py | 120 +++++++++++++++++++----- 1 file changed, 98 insertions(+), 22 deletions(-) diff --git a/scripts/check-expected-results-count.py b/scripts/check-expected-results-count.py index b4da1e031f..ebfb82552a 100755 --- a/scripts/check-expected-results-count.py +++ b/scripts/check-expected-results-count.py @@ -1,7 +1,9 @@ #!/usr/bin/env python3 +import argparse import glob import re +from pathlib import Path import requests @@ -21,34 +23,108 @@ def execute_sparql_query(endpoint, query): } data = {"query": query} response = requests.post(endpoint, headers=headers, data=data) - response.raise_for_status() # Raise an exception for HTTP errors + response.raise_for_status() results = response.json() return len(results["results"]["bindings"]) -def verify_result_counts(folder_path, endpoint): - for filename in glob.iglob(folder_path + "**/**/*.rq", recursive=True): - with open(filename, "r") as file: +def format_table(headers, rows, col_widths=None): + if not col_widths: + col_widths = [ + max(len(str(row[i])) for row in [headers] + rows) + for i in range(len(headers)) + ] + + separator = "+" + "+".join("-" * (width + 2) for width in col_widths) + "+" + header = "|" + "|".join(f" {h:<{w}} " for h, w in zip(headers, col_widths)) + "|" + + formatted_rows = [] + for row in rows: + formatted_row = ( + "|" + + "|".join(f" {str(cell):<{w}} " for cell, w in zip(row, col_widths)) + + "|" + ) + formatted_rows.append(formatted_row) + + return "\n".join([separator, header, separator] + formatted_rows + [separator]) + + +def get_status_priority(status): + priority = {"FAIL": 0, "ERROR": 1, "NO COUNT": 2, "PASS": 3} + return priority.get(status, 4) + + +def verify_result_counts(path, endpoint): + paths = ( + [path] + if path.is_file() + else glob.iglob(str(path / "**/**/*.rq"), recursive=True) + ) + paths = list(set(paths)) + results = [] + stats = {"pass": 0, "fail": 0, "error": 0, "no_count": 0} + + headers = ["File", "Status", "Expected", "Actual", "Notes"] + + for filepath in paths: + with open(filepath, "r") as file: content = file.read() expected_count = get_expected_count(content) - if expected_count is not None: - try: - actual_count = execute_sparql_query(endpoint, content) - if actual_count == expected_count: - print( - f"{filename}: PASS (Expected: {expected_count}, Actual: {actual_count})" - ) - else: - print( - f"{filename}: FAIL (Expected: {expected_count}, Actual: {actual_count})" - ) - except requests.RequestException as e: - print(f"{filename}: ERROR - {str(e)}") - else: - print(f"{filename}: No expected count found") + + if expected_count is None: + results.append( + [filepath, "NO COUNT", "-", "-", "No expected count found"] + ) + stats["no_count"] += 1 + continue + + try: + actual_count = execute_sparql_query(endpoint, content) + if actual_count == expected_count: + results.append( + [filepath, "PASS", expected_count, actual_count, "-"] + ) + stats["pass"] += 1 + else: + results.append( + [ + filepath, + "FAIL", + expected_count, + actual_count, + "Count mismatch", + ] + ) + stats["fail"] += 1 + except requests.RequestException as e: + results.append([filepath, "ERROR", "-", "-", str(e)]) + stats["error"] += 1 + + # Sort results by status priority and then by filename + results.sort(key=lambda x: (get_status_priority(x[1]), x[0])) + + print(format_table(headers, results)) + print(f"\nSummary:") + print( + f"Total: {len(results)} | Passed: {stats['pass']} | Failed: {stats['fail']} | " + f"Errors: {stats['error']} | No Count: {stats['no_count']}" + ) if __name__ == "__main__": - folder_path = "queries/generators" - endpoint = "https://query.wikidata.org/sparql" - verify_result_counts(folder_path, endpoint) + parser = argparse.ArgumentParser(description="Verify SPARQL query result counts") + parser.add_argument( + "--path", + type=Path, + default=Path("queries/generators"), + help="Path to single query file or directory of queries", + ) + parser.add_argument( + "--endpoint", + default="https://query.wikidata.org/sparql", + help="SPARQL endpoint URL", + ) + + args = parser.parse_args() + verify_result_counts(args.path, args.endpoint) From 356f5b33b3051193ddcfce4a42d991c2e5fb933e Mon Sep 17 00:00:00 2001 From: Ainali Date: Fri, 15 Nov 2024 16:09:18 +0100 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=97=B9=20Add=20words=20to=20jargon?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- jargon.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jargon.txt b/jargon.txt index 1fb840034b..6a3bb2cffb 100644 --- a/jargon.txt +++ b/jargon.txt @@ -5,6 +5,8 @@ allOrgs analytics Albin Aotearoa +argparse +ArgumentParser Arial austria authorsFile @@ -156,6 +158,7 @@ parentOrgLabel parentOrgLabel parentOrgLabel parentOrgQID +pathlib pdf PDFs pe