Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
tholzheim committed Jul 10, 2024
2 parents c10c635 + a8f973f commit 7693ab8
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ These are the planned features
## User stories

* as a user I want to know in advance if the query is returning what I expect
* as a user I want to find all the bands in Wikidata without having to know how it is modeled
* as a user I want to [find all the rock bands starting with 'M'](http://snapquery.bitplan.com/query/wikidata.org/snapquery-examples/bands) in Wikidata without having to know how it is modeled
* as a user I want pay someone to help me get the information from Wikidata that I need
* as a user I want to know how a query performed in the past so I can trust that the underlying model is stable and I
get the expected results
Expand Down
69 changes: 69 additions & 0 deletions snapquery/samples/meta_query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,60 @@
WHERE records>0
GROUP BY endpoint_name
ORDER BY 1 DESC
'query_failures_by_category':
sql: |
SELECT
count(*) as count,
nq.domain,
nq.namespace,
error_category
FROM QueryStats qs
JOIN NamedQuery nq
ON qs.query_id=nq.query_id
WHERE error_category IS NOT NULL
GROUP BY error_category,nq.namespace,nq.domain
ORDER BY 1 DESC
'query_failures_by_category_grouped':
sql: |
SELECT
count(*) AS count,
GROUP_CONCAT(DISTINCT nq.domain) AS domains,
GROUP_CONCAT(DISTINCT nq.namespace) AS namespaces,
error_category
FROM QueryStats qs
JOIN NamedQuery nq ON qs.query_id = nq.query_id
WHERE error_category IS NOT NULL
GROUP BY error_category
ORDER BY count DESC;
'query_failures_by_category_grouped_counted':
sql: |
SELECT
error_category,
SUM(entry_count) AS total_count,
GROUP_CONCAT(DISTINCT domain_counts ORDER BY domain_count DESC) AS domain_counts,
GROUP_CONCAT(DISTINCT namespace_counts ORDER BY namespace_count DESC) AS namespace_counts,
GROUP_CONCAT(DISTINCT endpoint_counts ORDER BY endpoint_count DESC) AS endpoint_counts
FROM (
SELECT
error_category,
domain,
namespace,
endpoint_name,
COUNT(*) AS entry_count,
domain || ' (' || SUM(COUNT(*)) OVER (PARTITION BY error_category, domain) || ')' AS domain_counts,
namespace || ' (' || SUM(COUNT(*)) OVER (PARTITION BY error_category, namespace) || ')' AS namespace_counts,
endpoint_name || ' (' || SUM(COUNT(*)) OVER (PARTITION BY error_category, endpoint_name) || ')' AS endpoint_counts,
SUM(COUNT(*)) OVER (PARTITION BY error_category, domain) AS domain_count,
SUM(COUNT(*)) OVER (PARTITION BY error_category, namespace) AS namespace_count,
SUM(COUNT(*)) OVER (PARTITION BY error_category, endpoint_name) AS endpoint_count
FROM QueryStats qs
JOIN NamedQuery nq ON qs.query_id = nq.query_id
WHERE error_category IS NOT NULL
GROUP BY error_category, domain, namespace, endpoint_name
) sub
GROUP BY error_category
ORDER BY total_count DESC;
'query_failures_by_database_count':
sql: |
SELECT
Expand Down Expand Up @@ -147,3 +201,18 @@
database,
method
FROM Endpoint
'scholia_jinja_for_loops':
sql: |
SELECT
count(*),
substr(
sparql,
instr(sparql, '{% for') + length('{% for'), -- Start position right after "{% for"
instr(substr(sparql, instr(sparql, '{% for')), '%}') - length('{% for') -- Length of substring
) as for_loop_content
FROM
NamedQuery
WHERE
sparql LIKE '%{% for%' ESCAPE '\' and for_loop_content like "%in%"
group by for_loop_content
order by 1 desc
18 changes: 15 additions & 3 deletions snapquery/snapquery_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,9 @@ class QueryDetails:
"""

query_id: str
params: str
params: str # e.g. q - q1,q2,
default_params: str # e.g. Q80 - Q58631663, Q125422124
default_param_types: str # e.g. Q5 - Q191067,Q43229
param_count: int
lines: int
size: int
Expand Down Expand Up @@ -417,11 +419,15 @@ def from_sparql(cls, query_id: str, sparql: str) -> "QueryDetails":
) # Assuming Params is a class that can parse SPARQL queries to extract parameters
params = ",".join(sparql_params.params) if sparql_params.params else None
param_count = len(sparql_params.params)

# @TODO get parameters
default_params=None
default_param_types=None
# Create and return the QueryDetails instance
return cls(
query_id=query_id,
params=params,
default_params=default_params,
default_param_types=default_param_types,
param_count=param_count,
lines=lines,
size=size,
Expand All @@ -435,7 +441,13 @@ def get_samples(cls) -> dict[str, "QueryDetails"]:
samples = {
"snapquery-examples": [
QueryDetails(
query_id="scholia.test", params="q", param_count=1, lines=1, size=50
query_id="scholia.test",
params="q",
default_params="Q80",
default_param_types="Q5",
param_count=1,
lines=1,
size=50
)
]
}
Expand Down
3 changes: 1 addition & 2 deletions tests/test_snapquery_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from lodstorage.query import Query

from snapquery.snapquery_core import NamedQuery, NamedQueryManager
from snapquery.snapquery_core import NamedQuery, NamedQueryManager, QueryPrefixMerger

from snapquery_core import QueryPrefixMerger


class TestQueryPrefixMerger(TestCase):
Expand Down

0 comments on commit 7693ab8

Please sign in to comment.