fixes #2

WolfgangFahl · May 3, 2024 · 364b883 · 364b883
1 parent 56fa2fe
commit 364b883
Show file tree

Hide file tree

Showing 4 changed files with 262 additions and 14 deletions.
diff --git a/snapquery/snapquery_cmd.py b/snapquery/snapquery_cmd.py
@@ -5,14 +5,17 @@
 """
 import sys
 from argparse import ArgumentParser
+
+from lodstorage.query import EndpointManager
 from ngwidgets.cmd import WebserverCmd
+
 from snapquery.snapquery_webserver import SnapQueryWebServer
-from lodstorage.query import EndpointManager
+
 
 class SnapQueryCmd(WebserverCmd):
     """
     Command line for diagrams server
-    """       
+    """
 
     def getArgParser(self, description: str, version_msg) -> ArgumentParser:
         """
@@ -24,7 +27,7 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser:
             "-ep",
             "--endpointPath",
             default=None,
-            help="path to yaml file to configure endpoints to use for queries",         
+            help="path to yaml file to configure endpoints to use for queries",
         )
         parser.add_argument(
             "-en",
@@ -40,22 +43,22 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser:
         )
         return parser
 
-
     def handle_args(self) -> bool:
         """
         handle the command line args
         """
         # Call the superclass handle_args to maintain base class behavior
         handled = super().handle_args()
-        endpoints = EndpointManager.getEndpoints(self.args.endpointPath,lang='sparql')
+        endpoints = EndpointManager.getEndpoints(self.args.endpointPath, lang="sparql")
         # Check if listing of endpoints is requested
         if self.args.listEndpoints:
             # List endpoints
             for endpoint in endpoints.values():
                 print(endpoint)
-            handled=True  # Operation handled
+            handled = True  # Operation handled
         return handled
-
+
+
 def main(argv: list = None):
     """
     main call

diff --git a/snapquery/snapquery_core.py b/snapquery/snapquery_core.py
@@ -3,3 +3,211 @@
 
 @author: wf
 """
+import os
+from dataclasses import field
+from pathlib import Path
+from typing import Optional
+
+from lodstorage.query import EndpointManager
+from lodstorage.sparql import SPARQL
+from lodstorage.sql import SQLDB, EntityInfo
+from lodstorage.yamlable import lod_storable
+
+
+@lod_storable
+class NamedQuery:
+    """
+    A named query that encapsulates the details and SPARQL query for a specific purpose.
+
+    Attributes:
+        namespace (str): The namespace of the query, which helps in categorizing the query.
+        name (str): The unique name or identifier of the query within its namespace.
+        title (str): A brief one-line title that describes the query.
+        description (str): A detailed multiline description of what the query does and the data it accesses.
+        sparql (str): The SPARQL query string. This might be hidden in future to encapsulate query details.
+        query_id (str): A unique identifier for the query, generated from namespace and name, used as a primary key.
+    """
+
+    query_id: str = field(init=False)
+
+    # namespace
+    namespace: str
+    # name/id
+    name: str
+    # one line title
+    title: str
+    # multiline description
+    description: str
+    # sparql query (to be hidden later)
+    sparql: str
+
+    def __post_init__(self):
+        """
+        Post-initialization processing to construct a unique identifier for the query
+        based on its namespace and name.
+        """
+        self.query_id = f"{self.namespace}.{self.name}"
+
+
+class NamedQueryManager:
+    """
+    Manages the storage, retrieval, and execution of named SPARQL queries.
+    """
+
+    def __init__(self, db_path: str = None, debug: bool = False):
+        """
+        Initializes the NamedQueryManager with a specific database path and a debug mode.
+
+        Args:
+            db_path (Optional[str]): The file path to the SQLite database. If None, the default cache path is used.
+            debug (bool): If True, enables debug mode which may provide additional logging and error reporting.
+
+        Attributes:
+            debug (bool): Stores the debug state.
+            sql_db (SQLDB): An instance of SQLDB to manage the SQLite database interactions.
+            endpoints (dict): A dictionary of SPARQL endpoints configured for use.
+        """
+        if db_path is None:
+            db_path = NamedQueryManager.get_cache_path()
+        self.debug = debug
+        self.sql_db = SQLDB(dbname=db_path, debug=debug)
+        self.endpoints = EndpointManager.getEndpoints(lang="sparql")
+
+    @classmethod
+    def get_cache_path(cls) -> str:
+        home = str(Path.home())
+        cache_dir = f"{home}/.solutions/snapquery/storage"
+        os.makedirs(cache_dir, exist_ok=True)
+        cache_path = f"{cache_dir}/named_queries.db"
+        return cache_path
+
+    @classmethod
+    def from_samples(
+        cls, db_path: Optional[str] = None, debug: bool = False
+    ) -> "NamedQueryManager":
+        """
+        Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data.
+
+        Args:
+            db_path (Optional[str]): Path to the database file. If None, the default path is used.
+            debug (bool): If True, enables debug mode which may provide additional logging.
+
+        Returns:
+            NamedQueryManager: An instance of the manager initialized with the database at `db_path`.
+        """
+        if db_path is None:
+            db_path = cls.get_cache_path()
+
+        nqm = NamedQueryManager(debug=debug)
+        path_obj = Path(db_path)
+        if not path_obj.exists() or path_obj.stat().st_size == 0:
+            sample_queries = cls.get_samples()
+            list_of_records = []
+            for _query_name, named_query in sample_queries.items():
+                record = {
+                    "namespace": named_query.namespace,
+                    "name": named_query.name,
+                    "title": named_query.title,
+                    "description": named_query.description,
+                    "sparql": named_query.sparql,
+                }
+                list_of_records.append(record)
+            entityInfo = EntityInfo(
+                list_of_records, name="NamedQuery", primaryKey="query_id"
+            )
+            nqm.sql_db.createTable(list_of_records, "NamedQuery", withDrop=True)
+            nqm.sql_db.store(list_of_records, entityInfo)
+        return nqm
+
+    @classmethod
+    def get_samples(cls) -> dict[str, NamedQuery]:
+        """
+        get samples
+        """
+        samples = {
+            "wikidata-examples.cats": NamedQuery(
+                namespace="wikidata-examples",
+                name="cats",
+                title="Cats on Wikidata",
+                description="This query retrieves all items classified under 'house cat' (Q146) on Wikidata.",
+                sparql="""
+SELECT ?item ?itemLabel
+WHERE {
+  ?item wdt:P31 wd:Q146. # Must be a cat
+  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+}
+""",
+            ),
+            "wikidata-examples.horses": NamedQuery(
+                namespace="wikidata-examples",
+                name="horses",
+                title="Horses on Wikidata",
+                description="This query retrieves information about horses, including parents, gender, and approximate birth and death years.",
+                sparql="""
+SELECT DISTINCT ?horse ?horseLabel ?mother ?motherLabel ?father ?fatherLabel 
+(year(?birthdate) as ?birthyear) (year(?deathdate) as ?deathyear) ?genderLabel
+WHERE {
+  ?horse wdt:P31/wdt:P279* wd:Q726 .     # Instance and subclasses of horse (Q726)
+  OPTIONAL{?horse wdt:P25 ?mother .}     # Mother
+  OPTIONAL{?horse wdt:P22 ?father .}     # Father
+  OPTIONAL{?horse wdt:P569 ?birthdate .} # Birth date
+  OPTIONAL{?horse wdt:P570 ?deathdate .} # Death date
+  OPTIONAL{?horse wdt:P21 ?gender .}     # Gender
+  SERVICE wikibase:label {
+    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr,ar,be,bg,bn,ca,cs,da,de,el,en,es,et,fa,fi,he,hi,hu,hy,id,it,ja,jv,ko,nb,nl,eo,pa,pl,pt,ro,ru,sh,sk,sr,sv,sw,te,th,tr,uk,yue,vec,vi,zh"
+  }
+}
+ORDER BY ?horse
+""",
+            ),
+        }
+        return samples
+
+    def query(
+        self,
+        name: str,
+        namespace: str = "wikidata-examples",
+        endpoint_name: str = "wikidata",
+    ):
+        """
+        Execute a named SPARQL query using a specified endpoint and return the results.
+
+        Args:
+            name (str): The name of the named query to execute.
+            namespace (str): The namespace of the named query, default is 'wikidata-examples'.
+            endpoint_name (str): The name of the endpoint to send the SPARQL query to, default is 'wikidata'.
+
+        Returns:
+            Dict[str, Any]: The results of the SPARQL query in JSON format.
+
+        Raises:
+            ValueError: If no named query matches the given name and namespace.
+            Exception: If the SPARQL query execution fails or the endpoint returns an error.
+        """
+        sql_query = f"""SELECT 
+    sparql 
+FROM 
+    NamedQuery 
+WHERE 
+    name = ? AND namespace = ?"""
+        query_records = self.sql_db.query(sql_query, (name, namespace))
+
+        if not query_records:
+            msg = f"NamedQuery not found for the specified name '{name}' and namespace '{namespace}'."
+            raise ValueError(msg)
+
+        query_count = len(query_records)
+        if query_count != 1:
+            msg = f"multiple entries ({query_count}) for name '{name}' and namespace '{namespace}'"
+            raise ValueError(msg)
+
+        sparql_query = query_records[0]["sparql"]
+
+        if not endpoint_name in self.endpoints:
+            msg = f"Invalid endpoint {endpoint_name}"
+            ValueError(msg)
+
+        endpoint = self.endpoints.get(endpoint_name)
+        sparql = SPARQL(endpoint.endpoint)
+        lod = sparql.queryAsListOfDicts(sparql_query)
+        return lod
diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py
@@ -1,24 +1,25 @@
-'''
+"""
 Created on 2024-05-03
 
 @author: wf
-'''
-from ngwidgets.basetest import Basetest
+"""
 from lodstorage.query import EndpointManager
+from ngwidgets.basetest import Basetest
+
 
 class TestEndpoints(Basetest):
     """
     test endpoint handling according to https://github.com/WolfgangFahl/snapquery/issues/1
     """
-    
+
     def setUp(self, debug=False, profile=True):
         Basetest.setUp(self, debug=debug, profile=profile)
-        
+
     def testEndpoints(self):
         """
         test the endpoint handling
         """
-        em=EndpointManager()
-        ep_names=em.getEndpointNames()
+        em = EndpointManager()
+        ep_names = em.getEndpointNames()
         self.assertTrue("wikidata" in ep_names)
         pass
diff --git a/tests/test_namedqueries.py b/tests/test_namedqueries.py
@@ -0,0 +1,36 @@
+"""
+Created on 2024-05-03
+
+@author: wf
+"""
+import json
+from ngwidgets.basetest import Basetest
+
+from snapquery.snapquery_core import NamedQueryManager
+
+
+class TestNamedQueryManager(Basetest):
+    """
+    test the named query Manage
+    """
+
+    def setUp(self, debug=False, profile=True):
+        Basetest.setUp(self, debug=debug, profile=profile)
+
+    def testNamedQueries(self):
+        """
+        test getting a named query manager
+        """
+        db_path = "/tmp/named_queries.db"
+        nqm = NamedQueryManager.from_samples(db_path=db_path)
+        for name, ex_count in [("x-invalid", -1), ("cats", 223)]:
+            try:
+                lod = nqm.query(name)
+                if self.debug:
+                    print(f"{name}:")
+                    print(json.dumps(lod,default=str,indent=2))
+                self.assertEqual(ex_count, len(lod))
+            except Exception as ex:
+                if self.debug:
+                    print(f"{name}:Exception {str(ex)}")
+                self.assertEqual(-1, ex_count)