Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse complete SPARQL queries using ANTLR #790

Merged
merged 28 commits into from
Sep 16, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b5823f5
Parse `ConstructQuery` and `Query` rules in ANTLR
Qup42 Sep 15, 2022
f6d0ca5
Add actual location of EXPECT failures for `ParseExceptionTest.cpp`
Qup42 Sep 15, 2022
0692f13
Generalize code
Qup42 Sep 15, 2022
780b6de
Code Review
Qup42 Sep 15, 2022
bf2ae75
Make Parsing of Sparql Query a static function of `SparqlParser`
Qup42 Sep 15, 2022
7a2e2b8
Inline `parseWithAntlr` in `SparqlParser`
Qup42 Sep 15, 2022
1f3c17f
Purge obsolete Code
Qup42 Sep 15, 2022
7940e12
Hopefully fix build for gcc12
Qup42 Sep 15, 2022
e62dbfa
Fix tests
Qup42 Sep 16, 2022
5520f34
Extract visible Variables into common base Class
Qup42 Sep 16, 2022
2b3b92c
Shorten test
Qup42 Sep 16, 2022
5d3166e
Generalize ParsedQuery for VisibleVariables being in all Clauses
Qup42 Sep 16, 2022
950b258
Enable more Checks for `ParsedQuery`
Qup42 Sep 16, 2022
ebd7ebc
Extract code
Qup42 Sep 16, 2022
6604b17
Code Review
Qup42 Sep 16, 2022
8ceca1e
Code Review
Qup42 Sep 16, 2022
7e31e9b
Add tests for `Query`
Qup42 Sep 16, 2022
ccde5e4
Code Review
Qup42 Sep 16, 2022
cacb100
Fix AD_CHECK
Qup42 Sep 16, 2022
886c1ef
Fix e2e tests
Qup42 Sep 16, 2022
7d52db2
Finally fix e2e tests
Qup42 Sep 16, 2022
d559fae
Apply suggestions from code review
Qup42 Sep 16, 2022
4d455b0
Code Review
Qup42 Sep 16, 2022
d77e5b7
Really fix e2e tests
Qup42 Sep 16, 2022
5ac04ef
Add Tests for `ConstructQuery` and complete Tests for `Query`
Qup42 Sep 16, 2022
d79707f
Re-add expression descriptor to Exception message in ParsedQuery checks
Qup42 Sep 16, 2022
a5629d8
Resolve introduced TODO
Qup42 Sep 16, 2022
c348f1e
Add extremely basic tests for visible Variables
Qup42 Sep 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,8 @@ Awaitable<json> Server::composeResponseQleverJson(
query.hasSelectClause()
? qet.writeResultAsQLeverJson(query.selectClause(), limit, offset,
std::move(resultTable))
: qet.writeRdfGraphJson(query.constructClause(), limit, offset,
std::move(resultTable));
: qet.writeRdfGraphJson(query.constructClause().triples_, limit,
offset, std::move(resultTable));
requestTimer.stop();
}
j["resultsize"] = query.hasSelectClause() ? resultSize : j["res"].size();
Expand Down Expand Up @@ -510,11 +510,11 @@ Server::composeResponseSepValues(const ParsedQuery& query,
auto compute = [&] {
size_t limit = query._limitOffset._limit;
size_t offset = query._limitOffset._offset;
return query.hasSelectClause()
? qet.generateResults<format>(query.selectClause(), limit,
offset)
: qet.writeRdfGraphSeparatedValues<format>(
query.constructClause(), limit, offset, qet.getResult());
return query.hasSelectClause() ? qet.generateResults<format>(
query.selectClause(), limit, offset)
: qet.writeRdfGraphSeparatedValues<format>(
query.constructClause().triples_,
limit, offset, qet.getResult());
};
return computeInNewThread(compute);
}
Expand All @@ -530,8 +530,8 @@ ad_utility::streams::stream_generator Server::composeTurtleResponse(
}
size_t limit = query._limitOffset._limit;
size_t offset = query._limitOffset._offset;
return qet.writeRdfGraphTurtle(query.constructClause(), limit, offset,
qet.getResult());
return qet.writeRdfGraphTurtle(query.constructClause().triples_, limit,
offset, qet.getResult());
}

// _____________________________________________________________________________
Expand Down
6 changes: 5 additions & 1 deletion src/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ add_library(parser
SparqlParserHelpers.h SparqlParserHelpers.cpp
TripleComponent.h
GraphPatternOperation.cpp
PropertyPath.h PropertyPath.cpp Alias.h data/SolutionModifiers.h data/LimitOffsetClause.h data/SparqlFilter.h data/SparqlFilter.cpp data/OrderKey.h data/GroupKey.h ParseException.cpp SelectClause.cpp SelectClause.h GraphPatternOperation.cpp GraphPatternOperation.h GraphPattern.cpp GraphPattern.h)
PropertyPath.h PropertyPath.cpp Alias.h data/SolutionModifiers.h
data/LimitOffsetClause.h data/SparqlFilter.h data/SparqlFilter.cpp
data/OrderKey.h data/GroupKey.h ParseException.cpp SelectClause.cpp
SelectClause.h GraphPatternOperation.cpp GraphPatternOperation.h
GraphPattern.cpp GraphPattern.h ConstructClause.h)
target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2 absl::flat_hash_map util)

30 changes: 30 additions & 0 deletions src/parser/ConstructClause.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Julian Mundhahs ([email protected])

#pragma once

#include "parser/SelectClause.h"
#include "parser/data/Types.h"

namespace parsedQuery {
struct ConstructClause : ClauseBase {
ad_utility::sparql_types::Triples triples_;

ConstructClause() = default;
explicit ConstructClause(ad_utility::sparql_types::Triples triples)
: triples_(std::move(triples)) {}

// Yields all variables that appear in this `ConstructClause`. Variables that
// appear multiple times are also yielded multiple times.
cppcoro::generator<const Variable> containedVariables() const {
for (const auto& triple : triples_) {
for (const auto& varOrTerm : triple) {
if (auto variable = std::get_if<Variable>(&varOrTerm)) {
co_yield *variable;
}
}
}
}
};
} // namespace parsedQuery
91 changes: 44 additions & 47 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ string ParsedQuery::asString() const {
os << "{";
}
} else if (hasConstructClause()) {
const auto& constructClause = this->constructClause();
const auto& constructClause = this->constructClause().triples_;
os << "\n CONSTRUCT {\n\t";
for (const auto& triple : constructClause) {
os << triple[0].toSparql();
Expand Down Expand Up @@ -120,40 +120,34 @@ Variable ParsedQuery::addInternalBind(

// ________________________________________________________________________
void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
auto checkVariableIsVisible = [this](const Variable& var,
const std::string& locationDescription) {
if (!ad_utility::contains(getVisibleVariables(), var)) {
throw ParseException("Variable " + var.name() + " was used in " +
locationDescription +
" is not visible in the Query Body.");
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}
};
auto checkUsedVariablesAreVisible =
[this](const sparqlExpression::SparqlExpressionPimpl& expression,
const std::string& locationDescription) {
[&checkVariableIsVisible](
const sparqlExpression::SparqlExpressionPimpl& expression,
const std::string& locationDescription) {
for (const auto* var : expression.containedVariables()) {
// TODO: think of a solution to make this work with ConstructClause as
// well.
if (!ad_utility::contains(selectClause().getVisibleVariables(),
*var)) {
throw ParseException("Variable " + var->name() + " used in " +
locationDescription +
expression.getDescriptor() +
" is not visible in the Query Body.");
}
checkVariableIsVisible(*var, locationDescription);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}
};

// Process groupClause
// TODO<qup42, joka921> Check that all variables that are part of an
// expression that is grouped on are visible in the Query Body.
auto processVariable = [this](const Variable& groupKey) {
// TODO: implement for `ConstructClause`
if (hasSelectClause()) {
if (!ad_utility::contains(selectClause().getVisibleVariables(),
groupKey)) {
throw ParseException(
"Variable " + groupKey.name() +
" was used in an GROUP BY but is not visible in the query body.");
}
}
auto processVariable = [this,
&checkVariableIsVisible](const Variable& groupKey) {
checkVariableIsVisible(groupKey, "GROUP BY");

_groupByVariables.emplace_back(groupKey.name());
};
auto processExpression =
[this](sparqlExpression::SparqlExpressionPimpl groupKey) {
[this, &checkUsedVariablesAreVisible](
sparqlExpression::SparqlExpressionPimpl groupKey) {
checkUsedVariablesAreVisible(groupKey, "Group Key");
auto helperTarget = addInternalBind(std::move(groupKey));
_groupByVariables.emplace_back(helperTarget.name());
};
Expand All @@ -176,18 +170,22 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
_havingClauses = std::move(modifiers.havingClauses_);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved

// Process orderClause
// TODO<qup42, joka921> Check that all variables that are part of an
// expression that is ordered on are visible in the Query Body.
auto processVariableOrderKey = [this](VariableOrderKey orderKey) {
auto processVariableOrderKey = [this, &checkVariableIsVisible](
VariableOrderKey orderKey) {
// Check whether grouping is done. The variable being ordered by
// must then be either grouped or the result of an alias in the select.
const vector<Variable>& groupByVariables = _groupByVariables;
if (!groupByVariables.empty() &&
!ad_utility::contains(groupByVariables, orderKey.variable_) &&
!ad_utility::contains_if(selectClause().getAliases(),
[&orderKey](const Alias& alias) {
return alias._target == orderKey.variable_;
})) {
if (groupByVariables.empty()) {
checkVariableIsVisible(orderKey.variable_, "ORDERY BY");
} else if (!ad_utility::contains(groupByVariables, orderKey.variable_) &&
// `ConstructClause` has no Aliases. So the variable can never be
// the result of an Alias.
(hasConstructClause() ||
!ad_utility::contains_if(selectClause().getAliases(),
[&orderKey](const Alias& alias) {
return alias._target ==
orderKey.variable_;
}))) {
throw ParseException(
"Variable " + orderKey.variable_.name() +
" was used in an ORDER BY "
Expand All @@ -201,8 +199,10 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
// QLever currently only supports ordering by variables. To allow
// all `orderConditions`, the corresponding expression is bound to a new
// internal variable. Ordering is then done by this variable.
auto processExpressionOrderKey = [this](ExpressionOrderKey orderKey) {
if (!_groupByVariables.empty())
auto processExpressionOrderKey = [this, &checkUsedVariablesAreVisible](
ExpressionOrderKey orderKey) {
checkUsedVariablesAreVisible(orderKey.expression_, "Order Key");
if (!_groupByVariables.empty()) {
// TODO<qup42> Implement this by adding a hidden alias in the
// SELECT clause.
throw ParseException(
Expand All @@ -212,6 +212,7 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
"\"). Please assign this expression to a "
"new variable in the SELECT clause and then order by this "
"variable.");
}
auto additionalVariable = addInternalBind(std::move(orderKey.expression_));
_orderBy.emplace_back(additionalVariable, orderKey.isDescending_);
};
Expand Down Expand Up @@ -301,17 +302,13 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
return;
}

for (const auto& triple : constructClause()) {
for (const auto& varOrTerm : triple) {
if (auto variable = std::get_if<Variable>(&varOrTerm)) {
if (!ad_utility::contains(_groupByVariables, *variable)) {
throw ParseException("Variable " + variable->name() +
" is used but not "
"aggregated despite the query not being "
"grouped by " +
variable->name() + ".");
}
}
for (const auto& variable : constructClause().containedVariables()) {
if (!ad_utility::contains(_groupByVariables, variable)) {
throw ParseException("Variable " + variable.name() +
" is used but not "
"aggregated despite the query not being "
"grouped by " +
variable.name() + ".");
}
}
}
Expand Down
65 changes: 34 additions & 31 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,28 @@
#include <variant>
#include <vector>

#include "../engine/ResultType.h"
#include "../engine/sparqlExpressions/SparqlExpressionPimpl.h"
#include "../util/Algorithm.h"
#include "../util/Exception.h"
#include "../util/HashMap.h"
#include "../util/OverloadCallOperator.h"
#include "../util/StringUtils.h"
#include "./TripleComponent.h"
#include "Alias.h"
#include "ParseException.h"
#include "PropertyPath.h"
#include "data/GroupKey.h"
#include "data/LimitOffsetClause.h"
#include "data/OrderKey.h"
#include "data/SolutionModifiers.h"
#include "data/SparqlFilter.h"
#include "data/Types.h"
#include "data/VarOrTerm.h"
#include "engine/ResultType.h"
#include "engine/sparqlExpressions/SparqlExpressionPimpl.h"
#include "parser/Alias.h"
#include "parser/ConstructClause.h"
#include "parser/GraphPattern.h"
#include "parser/GraphPatternOperation.h"
#include "parser/ParseException.h"
#include "parser/PropertyPath.h"
#include "parser/SelectClause.h"
#include "parser/TripleComponent.h"
#include "parser/data/GroupKey.h"
#include "parser/data/LimitOffsetClause.h"
#include "parser/data/OrderKey.h"
#include "parser/data/SolutionModifiers.h"
#include "parser/data/SparqlFilter.h"
#include "parser/data/Types.h"
#include "parser/data/VarOrTerm.h"
#include "util/Algorithm.h"
#include "util/Exception.h"
#include "util/HashMap.h"
#include "util/OverloadCallOperator.h"
#include "util/StringUtils.h"

using std::string;
using std::vector;
Expand Down Expand Up @@ -88,7 +89,7 @@ class ParsedQuery {

using SelectClause = parsedQuery::SelectClause;

using ConstructClause = ad_utility::sparql_types::Triples;
using ConstructClause = parsedQuery::ConstructClause;

ParsedQuery() = default;

Expand Down Expand Up @@ -131,22 +132,24 @@ class ParsedQuery {
return std::get<ConstructClause>(_clause);
}

// Add a variable, that was found in the SubQuery body, when query has a
// Select Clause.
bool registerVariableVisibleInQueryBody(const Variable& variable) {
if (!hasSelectClause()) return false;
selectClause().addVisibleVariable(variable);
return true;
// Add a variable, that was found in the SubQuery body.
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
void registerVariableVisibleInQueryBody(const Variable& variable) {
auto addVariable = [&variable](auto& clause) {
clause.addVisibleVariable(variable);
};
std::visit(addVariable, _clause);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}

// Add variables, that were found in the SubQuery body, when query has a
// Select Clause.
bool registerVariablesVisibleInQueryBody(const vector<Variable>& variables) {
if (!hasSelectClause()) return false;
// Add variables, that were found in the SubQuery body.
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
void registerVariablesVisibleInQueryBody(const vector<Variable>& variables) {
for (const auto& var : variables) {
selectClause().addVisibleVariable(var);
registerVariableVisibleInQueryBody(var);
}
return true;
}

// Returns all variables that are visible in the Query Body.
const std::vector<Variable>& getVisibleVariables() {
return std::visit(&parsedQuery::ClauseBase::getVisibleVariables, _clause);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}

auto& children() { return _rootGraphPattern._graphPatterns; }
Expand Down
23 changes: 12 additions & 11 deletions src/parser/SelectClause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@
#include "util/OverloadCallOperator.h"

using namespace parsedQuery;

// ____________________________________________________________________
void ClauseBase::addVisibleVariable(const Variable& variable) {
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
if (!ad_utility::contains(visibleVariables_, variable)) {
visibleVariables_.emplace_back(variable);
}
}

const vector<Variable>& ClauseBase::getVisibleVariables() {
return visibleVariables_;
}

// ____________________________________________________________________
[[nodiscard]] bool SelectClause::isAsterisk() const {
return std::holds_alternative<Asterisk>(varsAndAliasesOrAsterisk_);
Expand Down Expand Up @@ -42,17 +54,6 @@ void SelectClause::setSelected(std::vector<Variable> variables) {
setSelected(v);
}

// ____________________________________________________________________
void SelectClause::addVisibleVariable(const Variable& variable) {
if (!ad_utility::contains(visibleVariables_, variable)) {
visibleVariables_.emplace_back(variable);
}
}

const vector<Variable>& SelectClause::getVisibleVariables() {
return visibleVariables_;
}

// ____________________________________________________________________
[[nodiscard]] const std::vector<Variable>& SelectClause::getSelectedVariables()
const {
Expand Down
Loading