Skip to content

Commit

Permalink
Merge pull request #446 from govdirectory/Abbe98-patch-1
Browse files Browse the repository at this point in the history
allow the check-expected-results scirpt to take induvidual paths as params
  • Loading branch information
Ainali authored Nov 15, 2024
2 parents e3e5d1d + 356f5b3 commit 45c781c
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 22 deletions.
3 changes: 3 additions & 0 deletions jargon.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ allOrgs
analytics
Albin
Aotearoa
argparse
ArgumentParser
Arial
austria
authorsFile
Expand Down Expand Up @@ -156,6 +158,7 @@ parentOrgLabel
parentOrgLabel
parentOrgLabel
parentOrgQID
pathlib
pdf
PDFs
pe
Expand Down
120 changes: 98 additions & 22 deletions scripts/check-expected-results-count.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env python3

import argparse
import glob
import re
from pathlib import Path

import requests

Expand All @@ -21,34 +23,108 @@ def execute_sparql_query(endpoint, query):
}
data = {"query": query}
response = requests.post(endpoint, headers=headers, data=data)
response.raise_for_status() # Raise an exception for HTTP errors
response.raise_for_status()
results = response.json()
return len(results["results"]["bindings"])


def verify_result_counts(folder_path, endpoint):
for filename in glob.iglob(folder_path + "**/**/*.rq", recursive=True):
with open(filename, "r") as file:
def format_table(headers, rows, col_widths=None):
if not col_widths:
col_widths = [
max(len(str(row[i])) for row in [headers] + rows)
for i in range(len(headers))
]

separator = "+" + "+".join("-" * (width + 2) for width in col_widths) + "+"
header = "|" + "|".join(f" {h:<{w}} " for h, w in zip(headers, col_widths)) + "|"

formatted_rows = []
for row in rows:
formatted_row = (
"|"
+ "|".join(f" {str(cell):<{w}} " for cell, w in zip(row, col_widths))
+ "|"
)
formatted_rows.append(formatted_row)

return "\n".join([separator, header, separator] + formatted_rows + [separator])


def get_status_priority(status):
priority = {"FAIL": 0, "ERROR": 1, "NO COUNT": 2, "PASS": 3}
return priority.get(status, 4)


def verify_result_counts(path, endpoint):
paths = (
[path]
if path.is_file()
else glob.iglob(str(path / "**/**/*.rq"), recursive=True)
)
paths = list(set(paths))
results = []
stats = {"pass": 0, "fail": 0, "error": 0, "no_count": 0}

headers = ["File", "Status", "Expected", "Actual", "Notes"]

for filepath in paths:
with open(filepath, "r") as file:
content = file.read()
expected_count = get_expected_count(content)
if expected_count is not None:
try:
actual_count = execute_sparql_query(endpoint, content)
if actual_count == expected_count:
print(
f"{filename}: PASS (Expected: {expected_count}, Actual: {actual_count})"
)
else:
print(
f"{filename}: FAIL (Expected: {expected_count}, Actual: {actual_count})"
)
except requests.RequestException as e:
print(f"{filename}: ERROR - {str(e)}")
else:
print(f"{filename}: No expected count found")

if expected_count is None:
results.append(
[filepath, "NO COUNT", "-", "-", "No expected count found"]
)
stats["no_count"] += 1
continue

try:
actual_count = execute_sparql_query(endpoint, content)
if actual_count == expected_count:
results.append(
[filepath, "PASS", expected_count, actual_count, "-"]
)
stats["pass"] += 1
else:
results.append(
[
filepath,
"FAIL",
expected_count,
actual_count,
"Count mismatch",
]
)
stats["fail"] += 1
except requests.RequestException as e:
results.append([filepath, "ERROR", "-", "-", str(e)])
stats["error"] += 1

# Sort results by status priority and then by filename
results.sort(key=lambda x: (get_status_priority(x[1]), x[0]))

print(format_table(headers, results))
print(f"\nSummary:")
print(
f"Total: {len(results)} | Passed: {stats['pass']} | Failed: {stats['fail']} | "
f"Errors: {stats['error']} | No Count: {stats['no_count']}"
)


if __name__ == "__main__":
folder_path = "queries/generators"
endpoint = "https://query.wikidata.org/sparql"
verify_result_counts(folder_path, endpoint)
parser = argparse.ArgumentParser(description="Verify SPARQL query result counts")
parser.add_argument(
"--path",
type=Path,
default=Path("queries/generators"),
help="Path to single query file or directory of queries",
)
parser.add_argument(
"--endpoint",
default="https://query.wikidata.org/sparql",
help="SPARQL endpoint URL",
)

args = parser.parse_args()
verify_result_counts(args.path, args.endpoint)

0 comments on commit 45c781c

Please sign in to comment.