Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse complete SPARQL queries using ANTLR #790

Merged
merged 28 commits into from
Sep 16, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b5823f5
Parse `ConstructQuery` and `Query` rules in ANTLR
Qup42 Sep 15, 2022
f6d0ca5
Add actual location of EXPECT failures for `ParseExceptionTest.cpp`
Qup42 Sep 15, 2022
0692f13
Generalize code
Qup42 Sep 15, 2022
780b6de
Code Review
Qup42 Sep 15, 2022
bf2ae75
Make Parsing of Sparql Query a static function of `SparqlParser`
Qup42 Sep 15, 2022
7a2e2b8
Inline `parseWithAntlr` in `SparqlParser`
Qup42 Sep 15, 2022
1f3c17f
Purge obsolete Code
Qup42 Sep 15, 2022
7940e12
Hopefully fix build for gcc12
Qup42 Sep 15, 2022
e62dbfa
Fix tests
Qup42 Sep 16, 2022
5520f34
Extract visible Variables into common base Class
Qup42 Sep 16, 2022
2b3b92c
Shorten test
Qup42 Sep 16, 2022
5d3166e
Generalize ParsedQuery for VisibleVariables being in all Clauses
Qup42 Sep 16, 2022
950b258
Enable more Checks for `ParsedQuery`
Qup42 Sep 16, 2022
ebd7ebc
Extract code
Qup42 Sep 16, 2022
6604b17
Code Review
Qup42 Sep 16, 2022
8ceca1e
Code Review
Qup42 Sep 16, 2022
7e31e9b
Add tests for `Query`
Qup42 Sep 16, 2022
ccde5e4
Code Review
Qup42 Sep 16, 2022
cacb100
Fix AD_CHECK
Qup42 Sep 16, 2022
886c1ef
Fix e2e tests
Qup42 Sep 16, 2022
7d52db2
Finally fix e2e tests
Qup42 Sep 16, 2022
d559fae
Apply suggestions from code review
Qup42 Sep 16, 2022
4d455b0
Code Review
Qup42 Sep 16, 2022
d77e5b7
Really fix e2e tests
Qup42 Sep 16, 2022
5ac04ef
Add Tests for `ConstructQuery` and complete Tests for `Query`
Qup42 Sep 16, 2022
d79707f
Re-add expression descriptor to Exception message in ParsedQuery checks
Qup42 Sep 16, 2022
a5629d8
Resolve introduced TODO
Qup42 Sep 16, 2022
c348f1e
Add extremely basic tests for visible Variables
Qup42 Sep 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1120,7 +1120,7 @@ queries:
checks:
- num_cols: 2
- num_rows: 11
- selected: ["?x", "?t"]
- selected: [ "?x", "?t", "?ql_textscore_t" ]
- contains_row: ["<Grete_Hermann>","Hermann's algorithm for primary decomposition is still in use now."]


Expand Down
8 changes: 4 additions & 4 deletions src/parser/ConstructClause.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ struct ConstructClause : ClauseBase {
explicit ConstructClause(ad_utility::sparql_types::Triples triples)
: triples_(std::move(triples)) {}

vector<const Variable*> containedVariables() {
vector<const Variable*> out;
// Yields all variables that appear in this `ConstructClause`. Variables that
// appear multiple times are also yielded multiple times.
cppcoro::generator<const Variable> containedVariables() const {
for (const auto& triple : triples_) {
for (const auto& varOrTerm : triple) {
if (auto variable = std::get_if<Variable>(&varOrTerm)) {
out.emplace_back(variable);
co_yield *variable;
}
}
}
return out;
}
};
} // namespace parsedQuery
60 changes: 32 additions & 28 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,27 @@ Variable ParsedQuery::addInternalBind(

// ________________________________________________________________________
void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
auto checkVariableIsVisible = [this](const Variable& var,
const std::string& locationDescription) {
if (!ad_utility::contains(getVisibleVariables(), var)) {
throw ParseException("Variable " + var.name() + " was used in " +
locationDescription +
" is not visible in the Query Body.");
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}
};
auto checkUsedVariablesAreVisible =
[this](const sparqlExpression::SparqlExpressionPimpl& expression,
const std::string& locationDescription) {
[&checkVariableIsVisible](
const sparqlExpression::SparqlExpressionPimpl& expression,
const std::string& locationDescription) {
for (const auto* var : expression.containedVariables()) {
if (!ad_utility::contains(getVisibleVariables(), *var)) {
throw ParseException("Variable " + var->name() + " used in " +
locationDescription +
expression.getDescriptor() +
" is not visible in the Query Body.");
}
checkVariableIsVisible(*var, locationDescription);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}
};

// Process groupClause
auto processVariable = [this](const Variable& groupKey) {
if (!ad_utility::contains(getVisibleVariables(), groupKey)) {
throw ParseException(
"Variable " + groupKey.name() +
" was used in an GROUP BY but is not visible in the query body.");
}
auto processVariable = [this,
&checkVariableIsVisible](const Variable& groupKey) {
checkVariableIsVisible(groupKey, "GROUP BY");

_groupByVariables.emplace_back(groupKey.name());
};
Expand Down Expand Up @@ -169,19 +170,22 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
_havingClauses = std::move(modifiers.havingClauses_);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved

// Process orderClause
// TODO<qup42, joka921> Check that all variables that are part of an
// expression that is ordered on are visible in the Query Body.
auto processVariableOrderKey = [this](VariableOrderKey orderKey) {
auto processVariableOrderKey = [this, &checkVariableIsVisible](
VariableOrderKey orderKey) {
// Check whether grouping is done. The variable being ordered by
// must then be either grouped or the result of an alias in the select.
const vector<Variable>& groupByVariables = _groupByVariables;
if (!groupByVariables.empty() &&
!ad_utility::contains(groupByVariables, orderKey.variable_) &&
(hasConstructClause() ||
!ad_utility::contains_if(selectClause().getAliases(),
[&orderKey](const Alias& alias) {
return alias._target == orderKey.variable_;
}))) {
if (groupByVariables.empty()) {
checkVariableIsVisible(orderKey.variable_, "ORDERY BY");
} else if (!ad_utility::contains(groupByVariables, orderKey.variable_) &&
// `ConstructClause` has no Aliases. So the variable can never be
// the result of an Alias.
(hasConstructClause() ||
!ad_utility::contains_if(selectClause().getAliases(),
[&orderKey](const Alias& alias) {
return alias._target ==
orderKey.variable_;
}))) {
throw ParseException(
"Variable " + orderKey.variable_.name() +
" was used in an ORDER BY "
Expand Down Expand Up @@ -298,13 +302,13 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
return;
}

for (const auto* variable : constructClause().containedVariables()) {
if (!ad_utility::contains(_groupByVariables, *variable)) {
throw ParseException("Variable " + variable->name() +
for (const auto& variable : constructClause().containedVariables()) {
if (!ad_utility::contains(_groupByVariables, variable)) {
throw ParseException("Variable " + variable.name() +
" is used but not "
"aggregated despite the query not being "
"grouped by " +
variable->name() + ".");
variable.name() + ".");
}
}
}
Expand Down
11 changes: 2 additions & 9 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class ParsedQuery {
auto addVariable = [&variable](auto& clause) {
clause.addVisibleVariable(variable);
};
std::visit(ad_utility::OverloadCallOperator{addVariable}, _clause);
std::visit(addVariable, _clause);
}

// Add variables, that were found in the SubQuery body.
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -149,14 +149,7 @@ class ParsedQuery {

// Returns all variables that are visible in the Query Body.
const std::vector<Variable>& getVisibleVariables() {
// TODO: clang-tidy was complaining with visit.
if (hasSelectClause()) {
return selectClause().getVisibleVariables();
} else if (hasConstructClause()) {
return constructClause().getVisibleVariables();
} else {
AD_FAIL();
}
return std::visit(&parsedQuery::ClauseBase::getVisibleVariables, _clause);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}

auto& children() { return _rootGraphPattern._graphPatterns; }
Expand Down
11 changes: 4 additions & 7 deletions src/parser/SparqlParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,10 @@ ParsedQuery SparqlParser::parseQuery(std::string query) {
std::move(query),
{{INTERNAL_PREDICATE_PREFIX_NAME, INTERNAL_PREDICATE_PREFIX_IRI}}};
auto resultOfParseAndRemainingText = p.parseTypesafe(&AntlrParser::query);
if (!resultOfParseAndRemainingText.remainingText_.empty()) {
// TODO: add Exception Metadata
throw ParseException(
"A query was successfully parsed, but the following remainder of the "
"input was ignored by the parsed: " +
resultOfParseAndRemainingText.remainingText_);
}
// The query rule ends with <EOF> so the parse always has to consume the whole
// input. If this is not the case a ParseException should have been thrown at
// an earlier point.
AD_CHECK(resultOfParseAndRemainingText.remainingText_.empty());
return std::move(resultOfParseAndRemainingText.resultOfParse_);
}

Expand Down
8 changes: 6 additions & 2 deletions src/parser/SparqlParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@

using std::string;

// A simple parser of SPARQL.
// No supposed to feature the complete query language.
// The SPARQL parser used by QLever. The actual parsing is delegated to a parser
// that is based on ANTLR4, which recognises the complete SPARQL 1.1 QL grammar.
// For valid SPARQL queries that are not supported by QLever a reasonable error
// message is given. The only thing that is still parsed manually by this class
// are FILTER clauses, because QLever doesn't support arbitrary expressions in
// filters yet.
class SparqlParser {
public:
explicit SparqlParser(const string& query);
Expand Down
18 changes: 15 additions & 3 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,7 @@ PathTuples joinPredicateAndObject(VarOrPath predicate, ObjectList objectList) {
// TODO The fulltext index should perform the splitting of its keywords,
// and not the SparqlParser.
if (PropertyPath* path = std::get_if<PropertyPath>(&predicate)) {
if (path->asString() == CONTAINS_WORD_PREDICATE ||
// TODO _NS no longer needed?
path->asString() == CONTAINS_WORD_PREDICATE_NS) {
if (path->asString() == CONTAINS_WORD_PREDICATE) {
if (const Literal* literal =
unwrapVariant<VarOrTerm, GraphTerm, Literal>(object)) {
object = Literal{stripAndLowercaseKeywordLiteral(literal->literal())};
Expand Down Expand Up @@ -947,13 +945,27 @@ Node Visitor::visitTypesafe(Parser::ObjectRContext* ctx) {
// ___________________________________________________________________________
vector<TripleWithPropertyPath> SparqlQleverVisitor::visitTypesafe(
SparqlAutomaticParser::TriplesSameSubjectPathContext* ctx) {
auto setTextscoreVisibleIfPresent = [this](VarOrTerm& subject,
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
VarOrPath& predicate) {
if (auto* var = std::get_if<Variable>(&subject)) {
if (auto* propertyPath = std::get_if<PropertyPath>(&predicate)) {
if (propertyPath->asString() == CONTAINS_ENTITY_PREDICATE ||
propertyPath->asString() == CONTAINS_WORD_PREDICATE) {
addVisibleVariable(
absl::StrCat(TEXTSCORE_VARIABLE_PREFIX, var->name().substr(1)));
}
}
}
};

if (ctx->varOrTerm()) {
vector<TripleWithPropertyPath> triples;
auto subject = visitTypesafe(ctx->varOrTerm());
auto tuples = visitTypesafe(ctx->propertyListPathNotEmpty());
for (auto& [predicate, object] : tuples) {
// TODO<clang,c++20> clang does not yet support emplace_back for
// aggregates.
setTextscoreVisibleIfPresent(subject, predicate);
triples.push_back(TripleWithPropertyPath{subject, std::move(predicate),
std::move(object)});
}
Expand Down
11 changes: 7 additions & 4 deletions test/SparqlAntlrParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,8 +905,6 @@ TEST(SparqlParser, SelectQuery) {
m::SelectQuery(m::Select({Variable{"?x"}}), DummyGraphPatternMatcher),
m::pq::Having({{SparqlFilter::FilterType::GT, "?x", "5"}}),
m::pq::OrderKeys({{Variable{"?y"}, false}})));
// TODO<qup42, joka921> enable Tests once the corresponding checks are
// implemented.

// Grouping by a variable or expression which contains a variable
// that is not visible in the query body is not allowed.
Expand All @@ -915,7 +913,7 @@ TEST(SparqlParser, SelectQuery) {
"SELECT (COUNT(?a) as ?d) WHERE { ?a ?b ?c } GROUP BY (?x - 10)");
// Ordering by a variable or expression which contains a variable that is not
// visible in the query body is not allowed.
// expectSelectQueryFails("SELECT ?a WHERE { ?a ?b ?c } ORDER BY ?x");
expectSelectQueryFails("SELECT ?a WHERE { ?a ?b ?c } ORDER BY ?x");
expectSelectQueryFails("SELECT ?a WHERE { ?a ?b ?c } ORDER BY (?x - 10)");
// All variables used in an aggregate must be visible in the query body.
expectSelectQueryFails(
Expand All @@ -936,9 +934,14 @@ TEST(SparqlParser, Query) {
auto expectQuery = ExpectCompleteParse<&Parser::query>{
{{INTERNAL_PREDICATE_PREFIX_NAME, INTERNAL_PREDICATE_PREFIX_IRI}}};
auto expectQueryFails = ExpectParseFails<&Parser::query>{};
// TODO: re-add simple tests that also assert the ParsedQuery.
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
// Test that `_originalString` is correctly set.
expectQuery("SELECT * WHERE { ?a <bar> ?foo }",
m::pq::OriginalString("SELECT * WHERE { ?a <bar> ?foo }"));
testing::AllOf(
m::SelectQuery(
m::AsteriskSelect(),
m::GraphPattern(m::Triples({{"?a", "<bar>", "?foo"}}))),
m::pq::OriginalString("SELECT * WHERE { ?a <bar> ?foo }")));
expectQuery("SELECT * WHERE { ?x ?y ?z }",
m::pq::OriginalString("SELECT * WHERE { ?x ?y ?z }"));
expectQuery(
Expand Down
2 changes: 1 addition & 1 deletion test/SparqlParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,7 @@ TEST(ParserTest, testSolutionModifiers) {
{
auto pq = SparqlParser::parseQuery(
"SELECT DISTINCT ?x ?ql_textscore_x ?y WHERE \t {?x "
"<test:myrel> ?y}\n"
"ql:contains-entity ?y}\n"
"ORDER BY ASC(?y) DESC(?ql_textscore_x) LIMIT 10 OFFSET 15");
ASSERT_TRUE(pq.hasSelectClause());
const auto& selectClause = pq.selectClause();
Expand Down