Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse complete SPARQL queries using ANTLR #790

Merged
merged 28 commits into from
Sep 16, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b5823f5
Parse `ConstructQuery` and `Query` rules in ANTLR
Qup42 Sep 15, 2022
f6d0ca5
Add actual location of EXPECT failures for `ParseExceptionTest.cpp`
Qup42 Sep 15, 2022
0692f13
Generalize code
Qup42 Sep 15, 2022
780b6de
Code Review
Qup42 Sep 15, 2022
bf2ae75
Make Parsing of Sparql Query a static function of `SparqlParser`
Qup42 Sep 15, 2022
7a2e2b8
Inline `parseWithAntlr` in `SparqlParser`
Qup42 Sep 15, 2022
1f3c17f
Purge obsolete Code
Qup42 Sep 15, 2022
7940e12
Hopefully fix build for gcc12
Qup42 Sep 15, 2022
e62dbfa
Fix tests
Qup42 Sep 16, 2022
5520f34
Extract visible Variables into common base Class
Qup42 Sep 16, 2022
2b3b92c
Shorten test
Qup42 Sep 16, 2022
5d3166e
Generalize ParsedQuery for VisibleVariables being in all Clauses
Qup42 Sep 16, 2022
950b258
Enable more Checks for `ParsedQuery`
Qup42 Sep 16, 2022
ebd7ebc
Extract code
Qup42 Sep 16, 2022
6604b17
Code Review
Qup42 Sep 16, 2022
8ceca1e
Code Review
Qup42 Sep 16, 2022
7e31e9b
Add tests for `Query`
Qup42 Sep 16, 2022
ccde5e4
Code Review
Qup42 Sep 16, 2022
cacb100
Fix AD_CHECK
Qup42 Sep 16, 2022
886c1ef
Fix e2e tests
Qup42 Sep 16, 2022
7d52db2
Finally fix e2e tests
Qup42 Sep 16, 2022
d559fae
Apply suggestions from code review
Qup42 Sep 16, 2022
4d455b0
Code Review
Qup42 Sep 16, 2022
d77e5b7
Really fix e2e tests
Qup42 Sep 16, 2022
5ac04ef
Add Tests for `ConstructQuery` and complete Tests for `Query`
Qup42 Sep 16, 2022
d79707f
Re-add expression descriptor to Exception message in ParsedQuery checks
Qup42 Sep 16, 2022
a5629d8
Resolve introduced TODO
Qup42 Sep 16, 2022
c348f1e
Add extremely basic tests for visible Variables
Qup42 Sep 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ if ("${CMAKE_GENERATOR}" STREQUAL "Ninja")
endif ()

if (("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") AND
(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12") AND
(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "12.1"))
(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12") AND
(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "12.2"))
message(STATUS "Adding -Wno-restrict for g++12.0 because of false positives")
add_compile_options(-Wno-restrict)
else()
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ boost::asio::awaitable<void> Server::processQuery(
<< (pinResult ? " [pin result]" : "")
<< (pinSubtrees ? " [pin subresults]" : "") << "\n"
<< query << std::endl;
ParsedQuery pq = SparqlParser(query).parse();
ParsedQuery pq = SparqlParser::parseQuery(query);

// The following code block determines the media type to be used for the
// result. The media type is either determined by the "Accept:" header of
Expand Down
54 changes: 38 additions & 16 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,6 @@ using std::vector;
string ParsedQuery::asString() const {
std::ostringstream os;

// PREFIX
os << "PREFIX: {";
for (size_t i = 0; i < _prefixes.size(); ++i) {
os << "\n\t" << _prefixes[i].asString();
if (i + 1 < _prefixes.size()) {
os << ',';
}
}
os << "\n}";

bool usesSelect = hasSelectClause();
bool usesAsterisk = usesSelect && selectClause().isAsterisk();

Expand Down Expand Up @@ -130,6 +120,22 @@ Variable ParsedQuery::addInternalBind(

// ________________________________________________________________________
void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
auto checkUsedVariablesAreVisible =
[this](const sparqlExpression::SparqlExpressionPimpl& expression,
const std::string& locationDescription) {
for (const auto* var : expression.containedVariables()) {
// TODO: think of a solution to make this work with ConstructClause as
// well.
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
if (!ad_utility::contains(selectClause().getVisibleVariables(),
*var)) {
throw ParseException("Variable " + var->name() + " used in " +
locationDescription +
expression.getDescriptor() +
" is not visible in the Query Body.");
}
}
};

// Process groupClause
// TODO<qup42, joka921> Check that all variables that are part of an
// expression that is grouped on are visible in the Query Body.
Expand Down Expand Up @@ -219,6 +225,8 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
// Process limitOffsetClause
_limitOffset = modifiers.limitOffset_;

// Check that the query is valid

auto checkAliasOutNamesHaveNoOverlapWith =
[this](const auto& container, const std::string& message) {
for (const auto& alias : selectClause().getAliases()) {
Expand All @@ -228,8 +236,6 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
}
};

// Check that the query is valid

if (hasSelectClause()) {
if (!_groupByVariables.empty()) {
ad_utility::HashSet<string> groupVariables{};
Expand Down Expand Up @@ -287,15 +293,31 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
throw ParseException("The variable name " + a._target.name() +
" used in an alias was already selected on.");
}
// TODO<qup42, joka921> Check that all variables used in the expression of
// Aliases are visible in the QueryBody.

checkUsedVariablesAreVisible(a._expression, "Alias");
}
} else if (hasConstructClause()) {
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
if (_groupByVariables.empty()) {
return;
}

for (const auto& triple : constructClause()) {
for (const auto& varOrTerm : triple) {
if (auto variable = std::get_if<Variable>(&varOrTerm)) {
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
if (!ad_utility::contains(_groupByVariables, *variable)) {
throw ParseException("Variable " + variable->name() +
" is used but not "
"aggregated despite the query not being "
"grouped by " +
variable->name() + ".");
}
}
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}

void ParsedQuery::merge(const ParsedQuery& p) {
_prefixes.insert(_prefixes.begin(), p._prefixes.begin(), p._prefixes.end());

auto& children = _rootGraphPattern._graphPatterns;
auto& otherChildren = p._rootGraphPattern._graphPatterns;
children.insert(children.end(), otherChildren.begin(), otherChildren.end());
Expand Down
3 changes: 0 additions & 3 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,6 @@ class ParsedQuery {

ParsedQuery() = default;

// The ql prefix for QLever specific additions is always defined.
vector<SparqlPrefix> _prefixes = {SparqlPrefix(
INTERNAL_PREDICATE_PREFIX_NAME, INTERNAL_PREDICATE_PREFIX_IRI)};
GraphPattern _rootGraphPattern;
vector<SparqlFilter> _havingClauses;
size_t _numGraphPatterns = 1;
Expand Down
127 changes: 9 additions & 118 deletions src/parser/SparqlParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,99 +19,16 @@ SparqlParser::SparqlParser(const string& query) : lexer_(query), query_(query) {
LOG(DEBUG) << "Parsing " << query << std::endl;
}

namespace {
// Converts the PrefixMap to the legacy data format used by ParsedQuery
vector<SparqlPrefix> convertPrefixMap(
const SparqlQleverVisitor::PrefixMap& map) {
vector<SparqlPrefix> prefixes;
for (auto const& [label, iri] : map) {
prefixes.emplace_back(label, iri);
}
return prefixes;
}
} // namespace

// _____________________________________________________________________________
ParsedQuery SparqlParser::parse() {
ParsedQuery result;
std::string originalString = query_;
// parsePrologue parses all the prefixes which are stored in a member
// PrefixMap. This member is returned on parse.
SparqlQleverVisitor::PrefixMap prefixes = parseWithAntlr(
&AntlrParser::prologue,
{{INTERNAL_PREDICATE_PREFIX_NAME, INTERNAL_PREDICATE_PREFIX_IRI}});

if (lexer_.accept("construct")) {
result._originalString = std::move(originalString);
result._prefixes = convertPrefixMap(prefixes);
parseQuery(&result, CONSTRUCT_QUERY);
} else if (lexer_.peek("select")) {
result = parseWithAntlr(&AntlrParser::selectQuery, prefixes);
result._originalString = std::move(originalString);
result._prefixes = convertPrefixMap(prefixes);
} else {
throw ParseException("Query must either be a SELECT or CONSTRUCT.");
ParsedQuery SparqlParser::parseQuery(std::string_view query) {
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
sparqlParserHelpers::ParserAndVisitor p{
query, {{INTERNAL_PREDICATE_PREFIX_NAME, INTERNAL_PREDICATE_PREFIX_IRI}}};
auto resultOfParseAndRemainingText = p.parseTypesafe(&AntlrParser::query);
if (!resultOfParseAndRemainingText.remainingText_.empty()) {
// TODO: add Exception Metadata
throw ParseException("Query couldn't be parsed completely. Trailing: " +
resultOfParseAndRemainingText.remainingText_);
}
lexer_.expectEmpty();

return result;
}

// _____________________________________________________________________________
void SparqlParser::parseQuery(ParsedQuery* query, QueryType queryType) {
AD_CHECK(queryType == CONSTRUCT_QUERY);
query->_clause = parseWithAntlr(&AntlrParser::constructTemplate, *query)
.value_or(ad_utility::sparql_types::Triples{});

parseWhere(query);

parseSolutionModifiers(query);

if (query->_groupByVariables.empty()) {
return;
}

AD_CHECK(query->hasConstructClause());
auto& constructClause = query->constructClause();
for (const auto& triple : constructClause) {
for (const auto& varOrTerm : triple) {
if (auto variable = std::get_if<Variable>(&varOrTerm)) {
if (!ad_utility::contains(query->_groupByVariables, *variable)) {
throw ParseException("Variable " + variable->name() +
" is used but not "
"aggregated despite the query not being "
"grouped by " +
variable->name() + ".\n" + lexer_.input());
}
}
}
}
}

// _____________________________________________________________________________
SparqlQleverVisitor::PrefixMap SparqlParser::getPrefixMap(
const ParsedQuery& parsedQuery) {
SparqlQleverVisitor::PrefixMap prefixMap;
for (const auto& prefixDef : parsedQuery._prefixes) {
prefixMap[prefixDef._prefix] = prefixDef._uri;
}
return prefixMap;
}

// _____________________________________________________________________________
void SparqlParser::parseWhere(ParsedQuery* query) {
auto [pattern, visibleVariables] =
parseWithAntlr(&AntlrParser::whereClause, *query);
query->_rootGraphPattern = std::move(pattern);
query->registerVariablesVisibleInQueryBody(visibleVariables);
}

// _____________________________________________________________________________
void SparqlParser::parseSolutionModifiers(ParsedQuery* query) {
query->addSolutionModifiers(
parseWithAntlr(&AntlrParser::solutionModifier, *query));

lexer_.accept("}");
return std::move(resultOfParseAndRemainingText.resultOfParse_);
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -324,32 +241,6 @@ SparqlFilter SparqlParser::parseRegexFilter(bool expectKeyword) {
return f;
}

// ________________________________________________________________________
template <typename ContextType>
auto SparqlParser::parseWithAntlr(
ContextType* (SparqlAutomaticParser::*F)(void),
const ParsedQuery& parsedQuery)
-> decltype((std::declval<sparqlParserHelpers::ParserAndVisitor>())
.parseTypesafe(F)
.resultOfParse_) {
return parseWithAntlr(F, getPrefixMap(parsedQuery));
}

// ________________________________________________________________________
template <typename ContextType>
auto SparqlParser::parseWithAntlr(
ContextType* (SparqlAutomaticParser::*F)(void),
SparqlQleverVisitor::PrefixMap prefixMap)
-> decltype((std::declval<sparqlParserHelpers::ParserAndVisitor>())
.parseTypesafe(F)
.resultOfParse_) {
sparqlParserHelpers::ParserAndVisitor p{lexer_.getUnconsumedInput(),
std::move(prefixMap)};
auto resultOfParseAndRemainingText = p.parseTypesafe(F);
lexer_.reset(std::move(resultOfParseAndRemainingText.remainingText_));
return std::move(resultOfParseAndRemainingText.resultOfParse_);
}

namespace {
// The legacy way of expanding prefixes in an IRI. Currently used only by
// `parserFilterExpression` below.
Expand Down
28 changes: 1 addition & 27 deletions src/parser/SparqlParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@

using std::string;

enum QueryType { CONSTRUCT_QUERY, SELECT_QUERY };

// A simple parser of SPARQL.
// No supposed to feature the complete query language.
class SparqlParser {
public:
explicit SparqlParser(const string& query);
ParsedQuery parse();
static ParsedQuery parseQuery(std::string_view query);
Qup42 marked this conversation as resolved.
Show resolved Hide resolved

/// Parse the expression of a filter statement (without the `FILTER` keyword).
/// This helper method is needed as long as the set of expressions supported
Expand All @@ -32,34 +30,10 @@ class SparqlParser {
const SparqlQleverVisitor::PrefixMap& prefixMap);

private:
void parseQuery(ParsedQuery* query, QueryType queryType);
void parseWhere(ParsedQuery* query);
void parseSolutionModifiers(ParsedQuery* query);
// Returns true if it found a filter
std::optional<SparqlFilter> parseFilter(bool failOnNoFilter = true);

SparqlLexer lexer_;
string query_;
SparqlFilter parseRegexFilter(bool expectKeyword);

// Helper function that converts the prefix map from `parsedQuery` (a vector
// of pairs of prefix and IRI) to the prefix map we need for the
// `SparqlQleverVisitor` (a hash map from prefixes to IRIs).
static SparqlQleverVisitor::PrefixMap getPrefixMap(
const ParsedQuery& parsedQuery);
// Parse the clause with the prefixes of the given ParsedQuery.
template <typename ContextType>
auto parseWithAntlr(ContextType* (SparqlAutomaticParser::*F)(void),
const ParsedQuery& parsedQuery)
-> decltype((std::declval<sparqlParserHelpers::ParserAndVisitor>())
.parseTypesafe(F)
.resultOfParse_);

// Parse the clause with the given explicitly specified prefixes.
template <typename ContextType>
auto parseWithAntlr(ContextType* (SparqlAutomaticParser::*F)(void),
SparqlQleverVisitor::PrefixMap prefixMap)
-> decltype((std::declval<sparqlParserHelpers::ParserAndVisitor>())
.parseTypesafe(F)
.resultOfParse_);
};
8 changes: 4 additions & 4 deletions src/parser/SparqlParserHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ namespace sparqlParserHelpers {
using std::string;

// _____________________________________________________________________________
ParserAndVisitor::ParserAndVisitor(string input)
: input_{std::move(input)}, visitor_{} {
ParserAndVisitor::ParserAndVisitor(string_view input)
: input_{input}, visitor_{} {
// The default in ANTLR is to log all errors to the console and to continue
// the parsing. We need to turn parse errors into exceptions instead to
// propagate them to the user.
Expand All @@ -22,9 +22,9 @@ ParserAndVisitor::ParserAndVisitor(string input)
}

// _____________________________________________________________________________
ParserAndVisitor::ParserAndVisitor(string input,
ParserAndVisitor::ParserAndVisitor(string_view input,
SparqlQleverVisitor::PrefixMap prefixes)
: ParserAndVisitor{std::move(input)} {
: ParserAndVisitor{input} {
visitor_.setPrefixMapManually(std::move(prefixes));
}
} // namespace sparqlParserHelpers
6 changes: 3 additions & 3 deletions src/parser/SparqlParserHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ struct ResultOfParseAndRemainingText {

struct ParserAndVisitor {
private:
string input_;
string_view input_;
antlr4::ANTLRInputStream stream_{input_};
Qup42 marked this conversation as resolved.
Show resolved Hide resolved
SparqlAutomaticLexer lexer_{&stream_};
antlr4::CommonTokenStream tokens_{&lexer_};
Expand All @@ -37,8 +37,8 @@ struct ParserAndVisitor {
public:
SparqlAutomaticParser parser_{&tokens_};
SparqlQleverVisitor visitor_;
explicit ParserAndVisitor(string input);
ParserAndVisitor(string input, SparqlQleverVisitor::PrefixMap prefixes);
explicit ParserAndVisitor(string_view input);
ParserAndVisitor(string_view input, SparqlQleverVisitor::PrefixMap prefixes);

template <typename ContextType>
auto parseTypesafe(ContextType* (SparqlAutomaticParser::*F)(void)) {
Expand Down
Loading