diff --git a/README.md b/README.md index a005f9b..3d93dca 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ These are the planned features ## User stories * as a user I want to know in advance if the query is returning what I expect -* as a user I want to find all the bands in Wikidata without having to know how it is modeled +* as a user I want to [find all the rock bands starting with 'M'](http://snapquery.bitplan.com/query/wikidata.org/snapquery-examples/bands) in Wikidata without having to know how it is modeled * as a user I want pay someone to help me get the information from Wikidata that I need * as a user I want to know how a query performed in the past so I can trust that the underlying model is stable and I get the expected results diff --git a/snapquery/samples/meta_query.yaml b/snapquery/samples/meta_query.yaml index c2576c7..57ee43d 100644 --- a/snapquery/samples/meta_query.yaml +++ b/snapquery/samples/meta_query.yaml @@ -14,6 +14,60 @@ WHERE records>0 GROUP BY endpoint_name ORDER BY 1 DESC +'query_failures_by_category': + sql: | + SELECT + count(*) as count, + nq.domain, + nq.namespace, + error_category + FROM QueryStats qs + JOIN NamedQuery nq + ON qs.query_id=nq.query_id + WHERE error_category IS NOT NULL + + GROUP BY error_category,nq.namespace,nq.domain + ORDER BY 1 DESC +'query_failures_by_category_grouped': + sql: | + SELECT + count(*) AS count, + GROUP_CONCAT(DISTINCT nq.domain) AS domains, + GROUP_CONCAT(DISTINCT nq.namespace) AS namespaces, + error_category + FROM QueryStats qs + JOIN NamedQuery nq ON qs.query_id = nq.query_id + WHERE error_category IS NOT NULL + GROUP BY error_category + ORDER BY count DESC; +'query_failures_by_category_grouped_counted': + sql: | + SELECT + error_category, + SUM(entry_count) AS total_count, + GROUP_CONCAT(DISTINCT domain_counts ORDER BY domain_count DESC) AS domain_counts, + GROUP_CONCAT(DISTINCT namespace_counts ORDER BY namespace_count DESC) AS namespace_counts, + GROUP_CONCAT(DISTINCT endpoint_counts ORDER BY endpoint_count DESC) AS endpoint_counts + FROM ( + SELECT + error_category, + domain, + namespace, + endpoint_name, + COUNT(*) AS entry_count, + domain || ' (' || SUM(COUNT(*)) OVER (PARTITION BY error_category, domain) || ')' AS domain_counts, + namespace || ' (' || SUM(COUNT(*)) OVER (PARTITION BY error_category, namespace) || ')' AS namespace_counts, + endpoint_name || ' (' || SUM(COUNT(*)) OVER (PARTITION BY error_category, endpoint_name) || ')' AS endpoint_counts, + SUM(COUNT(*)) OVER (PARTITION BY error_category, domain) AS domain_count, + SUM(COUNT(*)) OVER (PARTITION BY error_category, namespace) AS namespace_count, + SUM(COUNT(*)) OVER (PARTITION BY error_category, endpoint_name) AS endpoint_count + FROM QueryStats qs + JOIN NamedQuery nq ON qs.query_id = nq.query_id + WHERE error_category IS NOT NULL + GROUP BY error_category, domain, namespace, endpoint_name + ) sub + GROUP BY error_category + ORDER BY total_count DESC; 'query_failures_by_database_count': sql: | SELECT @@ -147,3 +201,18 @@ database, method FROM Endpoint +'scholia_jinja_for_loops': + sql: | + SELECT + count(*), + substr( + sparql, + instr(sparql, '{% for') + length('{% for'), -- Start position right after "{% for" + instr(substr(sparql, instr(sparql, '{% for')), '%}') - length('{% for') -- Length of substring + ) as for_loop_content + FROM + NamedQuery + WHERE + sparql LIKE '%{% for%' ESCAPE '\' and for_loop_content like "%in%" + group by for_loop_content + order by 1 desc diff --git a/snapquery/snapquery_core.py b/snapquery/snapquery_core.py index bd8c744..385e8cd 100644 --- a/snapquery/snapquery_core.py +++ b/snapquery/snapquery_core.py @@ -387,7 +387,9 @@ class QueryDetails: """ query_id: str - params: str + params: str # e.g. q - q1,q2, + default_params: str # e.g. Q80 - Q58631663, Q125422124 + default_param_types: str # e.g. Q5 - Q191067,Q43229 param_count: int lines: int size: int @@ -417,11 +419,15 @@ def from_sparql(cls, query_id: str, sparql: str) -> "QueryDetails": ) # Assuming Params is a class that can parse SPARQL queries to extract parameters params = ",".join(sparql_params.params) if sparql_params.params else None param_count = len(sparql_params.params) - + # @TODO get parameters + default_params=None + default_param_types=None # Create and return the QueryDetails instance return cls( query_id=query_id, params=params, + default_params=default_params, + default_param_types=default_param_types, param_count=param_count, lines=lines, size=size, @@ -435,7 +441,13 @@ def get_samples(cls) -> dict[str, "QueryDetails"]: samples = { "snapquery-examples": [ QueryDetails( - query_id="scholia.test", params="q", param_count=1, lines=1, size=50 + query_id="scholia.test", + params="q", + default_params="Q80", + default_param_types="Q5", + param_count=1, + lines=1, + size=50 ) ] } diff --git a/tests/test_snapquery_core.py b/tests/test_snapquery_core.py index 3379919..c509252 100644 --- a/tests/test_snapquery_core.py +++ b/tests/test_snapquery_core.py @@ -2,9 +2,8 @@ from lodstorage.query import Query -from snapquery.snapquery_core import NamedQuery, NamedQueryManager +from snapquery.snapquery_core import NamedQuery, NamedQueryManager, QueryPrefixMerger -from snapquery_core import QueryPrefixMerger class TestQueryPrefixMerger(TestCase):