diff --git a/snapquery/snapquery_cmd.py b/snapquery/snapquery_cmd.py index 00f9a1e..c563273 100644 --- a/snapquery/snapquery_cmd.py +++ b/snapquery/snapquery_cmd.py @@ -5,14 +5,17 @@ """ import sys from argparse import ArgumentParser + +from lodstorage.query import EndpointManager from ngwidgets.cmd import WebserverCmd + from snapquery.snapquery_webserver import SnapQueryWebServer -from lodstorage.query import EndpointManager + class SnapQueryCmd(WebserverCmd): """ Command line for diagrams server - """ + """ def getArgParser(self, description: str, version_msg) -> ArgumentParser: """ @@ -24,7 +27,7 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser: "-ep", "--endpointPath", default=None, - help="path to yaml file to configure endpoints to use for queries", + help="path to yaml file to configure endpoints to use for queries", ) parser.add_argument( "-en", @@ -40,22 +43,22 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser: ) return parser - def handle_args(self) -> bool: """ handle the command line args """ # Call the superclass handle_args to maintain base class behavior handled = super().handle_args() - endpoints = EndpointManager.getEndpoints(self.args.endpointPath,lang='sparql') + endpoints = EndpointManager.getEndpoints(self.args.endpointPath, lang="sparql") # Check if listing of endpoints is requested if self.args.listEndpoints: # List endpoints for endpoint in endpoints.values(): print(endpoint) - handled=True # Operation handled + handled = True # Operation handled return handled - + + def main(argv: list = None): """ main call diff --git a/snapquery/snapquery_core.py b/snapquery/snapquery_core.py index 75474dd..a007749 100644 --- a/snapquery/snapquery_core.py +++ b/snapquery/snapquery_core.py @@ -3,3 +3,211 @@ @author: wf """ +import os +from dataclasses import field +from pathlib import Path +from typing import Optional + +from lodstorage.query import EndpointManager +from lodstorage.sparql import SPARQL +from lodstorage.sql import SQLDB, EntityInfo +from lodstorage.yamlable import lod_storable + + +@lod_storable +class NamedQuery: + """ + A named query that encapsulates the details and SPARQL query for a specific purpose. + + Attributes: + namespace (str): The namespace of the query, which helps in categorizing the query. + name (str): The unique name or identifier of the query within its namespace. + title (str): A brief one-line title that describes the query. + description (str): A detailed multiline description of what the query does and the data it accesses. + sparql (str): The SPARQL query string. This might be hidden in future to encapsulate query details. + query_id (str): A unique identifier for the query, generated from namespace and name, used as a primary key. + """ + + query_id: str = field(init=False) + + # namespace + namespace: str + # name/id + name: str + # one line title + title: str + # multiline description + description: str + # sparql query (to be hidden later) + sparql: str + + def __post_init__(self): + """ + Post-initialization processing to construct a unique identifier for the query + based on its namespace and name. + """ + self.query_id = f"{self.namespace}.{self.name}" + + +class NamedQueryManager: + """ + Manages the storage, retrieval, and execution of named SPARQL queries. + """ + + def __init__(self, db_path: str = None, debug: bool = False): + """ + Initializes the NamedQueryManager with a specific database path and a debug mode. + + Args: + db_path (Optional[str]): The file path to the SQLite database. If None, the default cache path is used. + debug (bool): If True, enables debug mode which may provide additional logging and error reporting. + + Attributes: + debug (bool): Stores the debug state. + sql_db (SQLDB): An instance of SQLDB to manage the SQLite database interactions. + endpoints (dict): A dictionary of SPARQL endpoints configured for use. + """ + if db_path is None: + db_path = NamedQueryManager.get_cache_path() + self.debug = debug + self.sql_db = SQLDB(dbname=db_path, debug=debug) + self.endpoints = EndpointManager.getEndpoints(lang="sparql") + + @classmethod + def get_cache_path(cls) -> str: + home = str(Path.home()) + cache_dir = f"{home}/.solutions/snapquery/storage" + os.makedirs(cache_dir, exist_ok=True) + cache_path = f"{cache_dir}/named_queries.db" + return cache_path + + @classmethod + def from_samples( + cls, db_path: Optional[str] = None, debug: bool = False + ) -> "NamedQueryManager": + """ + Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data. + + Args: + db_path (Optional[str]): Path to the database file. If None, the default path is used. + debug (bool): If True, enables debug mode which may provide additional logging. + + Returns: + NamedQueryManager: An instance of the manager initialized with the database at `db_path`. + """ + if db_path is None: + db_path = cls.get_cache_path() + + nqm = NamedQueryManager(debug=debug) + path_obj = Path(db_path) + if not path_obj.exists() or path_obj.stat().st_size == 0: + sample_queries = cls.get_samples() + list_of_records = [] + for _query_name, named_query in sample_queries.items(): + record = { + "namespace": named_query.namespace, + "name": named_query.name, + "title": named_query.title, + "description": named_query.description, + "sparql": named_query.sparql, + } + list_of_records.append(record) + entityInfo = EntityInfo( + list_of_records, name="NamedQuery", primaryKey="query_id" + ) + nqm.sql_db.createTable(list_of_records, "NamedQuery", withDrop=True) + nqm.sql_db.store(list_of_records, entityInfo) + return nqm + + @classmethod + def get_samples(cls) -> dict[str, NamedQuery]: + """ + get samples + """ + samples = { + "wikidata-examples.cats": NamedQuery( + namespace="wikidata-examples", + name="cats", + title="Cats on Wikidata", + description="This query retrieves all items classified under 'house cat' (Q146) on Wikidata.", + sparql=""" +SELECT ?item ?itemLabel +WHERE { + ?item wdt:P31 wd:Q146. # Must be a cat + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } +} +""", + ), + "wikidata-examples.horses": NamedQuery( + namespace="wikidata-examples", + name="horses", + title="Horses on Wikidata", + description="This query retrieves information about horses, including parents, gender, and approximate birth and death years.", + sparql=""" +SELECT DISTINCT ?horse ?horseLabel ?mother ?motherLabel ?father ?fatherLabel +(year(?birthdate) as ?birthyear) (year(?deathdate) as ?deathyear) ?genderLabel +WHERE { + ?horse wdt:P31/wdt:P279* wd:Q726 . # Instance and subclasses of horse (Q726) + OPTIONAL{?horse wdt:P25 ?mother .} # Mother + OPTIONAL{?horse wdt:P22 ?father .} # Father + OPTIONAL{?horse wdt:P569 ?birthdate .} # Birth date + OPTIONAL{?horse wdt:P570 ?deathdate .} # Death date + OPTIONAL{?horse wdt:P21 ?gender .} # Gender + SERVICE wikibase:label { + bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr,ar,be,bg,bn,ca,cs,da,de,el,en,es,et,fa,fi,he,hi,hu,hy,id,it,ja,jv,ko,nb,nl,eo,pa,pl,pt,ro,ru,sh,sk,sr,sv,sw,te,th,tr,uk,yue,vec,vi,zh" + } +} +ORDER BY ?horse +""", + ), + } + return samples + + def query( + self, + name: str, + namespace: str = "wikidata-examples", + endpoint_name: str = "wikidata", + ): + """ + Execute a named SPARQL query using a specified endpoint and return the results. + + Args: + name (str): The name of the named query to execute. + namespace (str): The namespace of the named query, default is 'wikidata-examples'. + endpoint_name (str): The name of the endpoint to send the SPARQL query to, default is 'wikidata'. + + Returns: + Dict[str, Any]: The results of the SPARQL query in JSON format. + + Raises: + ValueError: If no named query matches the given name and namespace. + Exception: If the SPARQL query execution fails or the endpoint returns an error. + """ + sql_query = f"""SELECT + sparql +FROM + NamedQuery +WHERE + name = ? AND namespace = ?""" + query_records = self.sql_db.query(sql_query, (name, namespace)) + + if not query_records: + msg = f"NamedQuery not found for the specified name '{name}' and namespace '{namespace}'." + raise ValueError(msg) + + query_count = len(query_records) + if query_count != 1: + msg = f"multiple entries ({query_count}) for name '{name}' and namespace '{namespace}'" + raise ValueError(msg) + + sparql_query = query_records[0]["sparql"] + + if not endpoint_name in self.endpoints: + msg = f"Invalid endpoint {endpoint_name}" + ValueError(msg) + + endpoint = self.endpoints.get(endpoint_name) + sparql = SPARQL(endpoint.endpoint) + lod = sparql.queryAsListOfDicts(sparql_query) + return lod diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 76e1eb2..a7517ce 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -1,24 +1,25 @@ -''' +""" Created on 2024-05-03 @author: wf -''' -from ngwidgets.basetest import Basetest +""" from lodstorage.query import EndpointManager +from ngwidgets.basetest import Basetest + class TestEndpoints(Basetest): """ test endpoint handling according to https://github.com/WolfgangFahl/snapquery/issues/1 """ - + def setUp(self, debug=False, profile=True): Basetest.setUp(self, debug=debug, profile=profile) - + def testEndpoints(self): """ test the endpoint handling """ - em=EndpointManager() - ep_names=em.getEndpointNames() + em = EndpointManager() + ep_names = em.getEndpointNames() self.assertTrue("wikidata" in ep_names) pass diff --git a/tests/test_namedqueries.py b/tests/test_namedqueries.py new file mode 100644 index 0000000..65ebb4e --- /dev/null +++ b/tests/test_namedqueries.py @@ -0,0 +1,36 @@ +""" +Created on 2024-05-03 + +@author: wf +""" +import json +from ngwidgets.basetest import Basetest + +from snapquery.snapquery_core import NamedQueryManager + + +class TestNamedQueryManager(Basetest): + """ + test the named query Manage + """ + + def setUp(self, debug=False, profile=True): + Basetest.setUp(self, debug=debug, profile=profile) + + def testNamedQueries(self): + """ + test getting a named query manager + """ + db_path = "/tmp/named_queries.db" + nqm = NamedQueryManager.from_samples(db_path=db_path) + for name, ex_count in [("x-invalid", -1), ("cats", 223)]: + try: + lod = nqm.query(name) + if self.debug: + print(f"{name}:") + print(json.dumps(lod,default=str,indent=2)) + self.assertEqual(ex_count, len(lod)) + except Exception as ex: + if self.debug: + print(f"{name}:Exception {str(ex)}") + self.assertEqual(-1, ex_count)