Skip to content

Commit

Permalink
feat: implement strategies for SPARQL query functionality
Browse files Browse the repository at this point in the history
Currently, rdfproxy relies on SPARQLWrapper for querying triplestores;
this is not always ideal, since SPARQLWrapper occasionally gets
blacklisted e.g. by wikidata and caused severe performance issues in
the past.

The change introduces SPARQLQuery strategies for better control over
what query functionality should run in RDFProxy. The default strategy,
SPARQLWrapperStrategy, implements exactly the previous SPARQLWrapper behavior.
An HttpxStrategy implements the query functionality required by
RDFProxy using raw httpx instead of SPARQLWrapper.
  • Loading branch information
lu-pl committed Dec 10, 2024
1 parent 2d0bab9 commit 1959073
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 11 deletions.
20 changes: 9 additions & 11 deletions rdfproxy/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from SPARQLWrapper import JSON, SPARQLWrapper
from rdfproxy.mapper import ModelBindingsMapper
from rdfproxy.sparql_strategies import SPARQLStrategy, SPARQLWrapperStrategy
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.models import Page, QueryParameters
from rdfproxy.utils.sparql_utils import (
Expand All @@ -32,15 +33,16 @@ class SPARQLModelAdapter(Generic[_TModelInstance]):
"""

def __init__(
self, target: str | SPARQLWrapper, query: str, model: type[_TModelInstance]
self,
target: str,
query: str,
model: type[_TModelInstance],
sparql_strategy: SPARQLStrategy = SPARQLWrapperStrategy,
) -> None:
self._query = query
self._model = model

self.sparql_wrapper: SPARQLWrapper = (
SPARQLWrapper(target) if isinstance(target, str) else target
)
self.sparql_wrapper.setReturnFormat(JSON)
self.sparql_strategy = sparql_strategy(target)

def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]:
"""Run a query against an endpoint and return a Page model object."""
Expand All @@ -52,9 +54,7 @@ def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]:
offset=calculate_offset(query_parameters.page, query_parameters.size),
)

items_query_bindings: Iterator[dict] = query_with_wrapper(
query=items_query, sparql_wrapper=self.sparql_wrapper
)
items_query_bindings: Iterator[dict] = self.sparql_strategy.query(items_query)

mapper = ModelBindingsMapper(self._model, *items_query_bindings)

Expand All @@ -75,7 +75,5 @@ def _get_count(self, query: str) -> int:
Helper for SPARQLModelAdapter.query.
"""
result: Iterator[dict] = query_with_wrapper(
query=query, sparql_wrapper=self.sparql_wrapper
)
result: Iterator[dict] = self.sparql_strategy.query(query)
return int(next(result)["cnt"])
64 changes: 64 additions & 0 deletions rdfproxy/sparql_strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Strategy classes for SPARQL query functionality."""

import abc
from collections.abc import Iterator

from SPARQLWrapper import JSON, QueryResult, SPARQLWrapper
import httpx


class SPARQLStrategy(abc.ABC):
def __init__(self, endpoint: str):
self.endpoint = endpoint

@abc.abstractmethod
def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
raise NotImplementedError

@staticmethod
def _get_bindings_from_bindings_dict(bindings_dict: dict) -> Iterator[dict]:
bindings = map(
lambda binding: {k: v["value"] for k, v in binding.items()},
bindings_dict["results"]["bindings"],
)
return bindings


class SPARQLWrapperStrategy(SPARQLStrategy):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self._sparql_wrapper = SPARQLWrapper(self.endpoint)
self._sparql_wrapper.setReturnFormat(JSON)

def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
self._sparql_wrapper.setQuery(sparql_query)

result: QueryResult = self._sparql_wrapper.query()
return self._get_bindings_from_bindings_dict(result.convert())


class HttpxStrategy(SPARQLStrategy):
def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
result: httpx.Response = self._httpx_run_sparql_query(sparql_query)
return self._get_bindings_from_bindings_dict(result.json())

def _httpx_run_sparql_query(
self, query: str, headers: dict | None = None
) -> httpx.Response:
data = {"output": "json", "query": query}
headers = (
{
"Accept": "application/sparql-results+json",
}
if headers is None
else headers
)

response = httpx.post(
self.endpoint,
headers=headers,
data=data,
)

return response

0 comments on commit 1959073

Please sign in to comment.