From 7110e270dd755196918b20018ff9908cadea4b3e Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Mon, 2 Dec 2024 12:06:10 +0100 Subject: [PATCH 1/2] test: implement basic tests for query injections with prefixes --- tests/unit/test_inject_subquery.py | 122 +++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 tests/unit/test_inject_subquery.py diff --git a/tests/unit/test_inject_subquery.py b/tests/unit/test_inject_subquery.py new file mode 100644 index 0000000..e95494d --- /dev/null +++ b/tests/unit/test_inject_subquery.py @@ -0,0 +1,122 @@ +"""Unit tests for inject_subquery.""" + +from typing import NamedTuple + +import pytest +from rdfproxy.utils.sparql_utils import inject_subquery + + +class InjectSubqueryParameter(NamedTuple): + query: str + subquery: str + expected: str + + +inject_subquery_parameters = [ + InjectSubqueryParameter( + query="select * where {?s ?p ?o .}", + subquery="select * where {?s ?p ?o .}", + expected="select * where {?s ?p ?o . {select * where {?s ?p ?o .}} }", + ), + InjectSubqueryParameter( + query="select * where {?s ?p ?o .}", + subquery="prefix : select * where {?s ?p ?o .}", + expected="select * where {?s ?p ?o . {select * where {?s ?p ?o .}} }", + ), + InjectSubqueryParameter( + query="prefix : select * where {?s ?p ?o .}", + subquery="prefix : select * where {?s ?p ?o .}", + expected="prefix : select * where {?s ?p ?o . {select * where {?s ?p ?o .}} }", + ), + InjectSubqueryParameter( + query="PREFIX crm: select * where {?s ?p ?o .}", + subquery="select * where {?s ?p ?o .}", + expected="PREFIX crm: select * where {?s ?p ?o . {select * where {?s ?p ?o .}} }", + ), + InjectSubqueryParameter( + query=""" + PREFIX crm: + PREFIX lrmoo: + PREFIX star: + PREFIX skos: + PREFIX r11: + PREFIX r11pros: + + SELECT + ?location + ?location__location_descriptive_name + + WHERE { + ?location a crm:E53_Place. + + ?location crm:P3_has_note ?location__location_descriptive_name. + } + """, + subquery="select * where {?s ?p ?o .}", + expected=""" + PREFIX crm: + PREFIX lrmoo: + PREFIX star: + PREFIX skos: + PREFIX r11: + PREFIX r11pros: + + SELECT + ?location + ?location__location_descriptive_name + + WHERE { + ?location a crm:E53_Place. + + ?location crm:P3_has_note ?location__location_descriptive_name. + {select * where {?s ?p ?o .}} } + """, + ), + InjectSubqueryParameter( + query=""" + PREFIX : + PREFIX crm: + PREFIX lrmoo: + PREFIX star: + PREFIX skos: + PREFIX r11: + PREFIX r11pros: + + SELECT + ?location + ?location__location_descriptive_name + + WHERE { + ?location a crm:E53_Place. + + ?location crm:P3_has_note ?location__location_descriptive_name. + } + """, + subquery="select * where {?s ?p ?o .}", + expected=""" + PREFIX : + PREFIX crm: + PREFIX lrmoo: + PREFIX star: + PREFIX skos: + PREFIX r11: + PREFIX r11pros: + + SELECT + ?location + ?location__location_descriptive_name + + WHERE { + ?location a crm:E53_Place. + + ?location crm:P3_has_note ?location__location_descriptive_name. + {select * where {?s ?p ?o .}} } + """, + ), +] + + +@pytest.mark.parametrize(["query", "subquery", "expected"], inject_subquery_parameters) +def test_inject_subquery(query, subquery, expected): + injected = inject_subquery(query=query, subquery=subquery) + assert injected == expected From 16c2970f27b56c5f0f07df478e4b1b0483726ed2 Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Mon, 2 Dec 2024 12:07:37 +0100 Subject: [PATCH 2/2] fix: remove SPARQL prefixes from subqueries before query injections Subquery injections did not remove SPARQL prefixes before injection, resulting in invalid SPARQL. The change defines a regex for removing PREFIX definitions and strips the subquery before injection. Fixes #154. --- rdfproxy/utils/sparql_utils.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/rdfproxy/utils/sparql_utils.py b/rdfproxy/utils/sparql_utils.py index 62bcc9e..fa192cc 100644 --- a/rdfproxy/utils/sparql_utils.py +++ b/rdfproxy/utils/sparql_utils.py @@ -35,13 +35,25 @@ def replace_query_select_clause(query: str, repl: str) -> str: return modified_query +def _remove_sparql_prefixes(query: str) -> str: + """Remove SPARQL prefixes from a query. + + This is needed for subquery injection, because subqueries cannot have prefixes. + Note that this is not generic, all prefixes are simply ut from the subquery + and do not get appended to the outer query prefixes. + """ + prefix_pattern = re.compile(r"PREFIX\s+\w*:\s?<[^>]+>\s*", flags=re.I) + cleaned_query = re.sub(prefix_pattern, "", query).strip() + return cleaned_query + + def inject_subquery(query: str, subquery: str) -> str: """Inject a SPARQL query with a subquery.""" if (tail := re.search(r"}[^}]*\Z", query)) is None: raise QueryConstructionException("Unable to inject subquery.") tail_index: int = tail.start() - injected: str = f"{query[:tail_index]} {{{subquery}}} {query[tail_index:]}" + injected: str = f"{query[:tail_index]} {{{_remove_sparql_prefixes(subquery)}}} {query[tail_index:]}" return injected