-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement strategies for SPARQL query functionality
Currently, rdfproxy relies on SPARQLWrapper for querying triplestores; this is not always ideal, since SPARQLWrapper occasionally gets blacklisted e.g. by wikidata and caused severe performance issues in the past. The change introduces SPARQLQuery strategies for better control over what query functionality should run in RDFProxy. The default strategy, SPARQLWrapperStrategy, implements exactly the previous SPARQLWrapper behavior. An HttpxStrategy implements the query functionality required by RDFProxy using raw httpx instead of SPARQLWrapper.
- Loading branch information
Showing
2 changed files
with
73 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
"""Strategy classes for SPARQL query functionality.""" | ||
|
||
import abc | ||
from collections.abc import Iterator | ||
|
||
from SPARQLWrapper import JSON, QueryResult, SPARQLWrapper | ||
import httpx | ||
|
||
|
||
class SPARQLStrategy(abc.ABC): | ||
def __init__(self, endpoint: str): | ||
self.endpoint = endpoint | ||
|
||
@abc.abstractmethod | ||
def query(self, sparql_query: str) -> Iterator[dict[str, str]]: | ||
raise NotImplementedError | ||
|
||
@staticmethod | ||
def _get_bindings_from_bindings_dict(bindings_dict: dict) -> Iterator[dict]: | ||
bindings = map( | ||
lambda binding: {k: v["value"] for k, v in binding.items()}, | ||
bindings_dict["results"]["bindings"], | ||
) | ||
return bindings | ||
|
||
|
||
class SPARQLWrapperStrategy(SPARQLStrategy): | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
self._sparql_wrapper = SPARQLWrapper(self.endpoint) | ||
self._sparql_wrapper.setReturnFormat(JSON) | ||
|
||
def query(self, sparql_query: str) -> Iterator[dict[str, str]]: | ||
self._sparql_wrapper.setQuery(sparql_query) | ||
|
||
result: QueryResult = self._sparql_wrapper.query() | ||
return self._get_bindings_from_bindings_dict(result.convert()) | ||
|
||
|
||
class HttpxStrategy(SPARQLStrategy): | ||
def query(self, sparql_query: str) -> Iterator[dict[str, str]]: | ||
result: httpx.Response = self._httpx_run_sparql_query(sparql_query) | ||
return self._get_bindings_from_bindings_dict(result.json()) | ||
|
||
def _httpx_run_sparql_query( | ||
self, query: str, headers: dict | None = None | ||
) -> httpx.Response: | ||
data = {"output": "json", "query": query} | ||
headers = ( | ||
{ | ||
"Accept": "application/sparql-results+json", | ||
} | ||
if headers is None | ||
else headers | ||
) | ||
|
||
response = httpx.post( | ||
self.endpoint, | ||
headers=headers, | ||
data=data, | ||
) | ||
|
||
return response |