Skip to content

Commit

Permalink
Fix segmentation fault in KQL parser when the input query exceeds the…
Browse files Browse the repository at this point in the history
… `max_query_size` (#59626)

* Fix_kql_issue_found_by_wingfuzz

This commit fix the issues:
 #59036
 #59037

both issues are same reason, the input query exceed the max_query_size,
so the condition isEnd() of token is not meet and cause the assertion failure

* fix_kql_issue_found_by_wingfuzz: use isValid instead of TokenType::EndOfStream

* fix_kql_issue_found_by_wingfuzz: make functional test result consist

* fix_kql_issue_found_by_wingfuzz: update test case for makeseries

* fix_kql_issue_found_by_wingfuzz: disable makeseries

* fix_kql_issue_found_by_wingfuzz:
 use isvalid() function to replace isEnd() function of TokenIterator to check the end of stream

* fix_kql_issue_found_by_wingfuzz: add test case for max_query_size

* fix_kql_issue_found_by_wingfuzz: fix AST structure

* fix_kql_issue_found_by_wingfuzz: make sure the max query size test is in the dialect of kusto

* fix_kql_issue_found_by_wingfuzz : restore max query size after test

* fix_kql_issue_found_by_wingfuzz : fix typo

---------

Co-authored-by: János Benjamin Antal <[email protected]>
  • Loading branch information
kashwy and antaljanosbenjamin authored Feb 26, 2024
1 parent d98fbcb commit 795c1a9
Show file tree
Hide file tree
Showing 47 changed files with 2,623 additions and 51 deletions.
5 changes: 3 additions & 2 deletions src/Interpreters/executeQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int QUERY_WAS_CANCELLED;
extern const int INCORRECT_DATA;
extern const int SUPPORT_IS_DISABLED;
}

namespace FailPoints
Expand Down Expand Up @@ -709,7 +708,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
{
if (settings.dialect == Dialect::kusto && !internal)
{
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Kusto dialect is disabled until these two bugs will be fixed: https://github.com/ClickHouse/ClickHouse/issues/59037 and https://github.com/ClickHouse/ClickHouse/issues/59036");
ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert);
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
}
else if (settings.dialect == Dialect::prql && !internal)
{
Expand Down
12 changes: 6 additions & 6 deletions src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ bool IParserKQLFunction::directMapping(

int argument_count = 0;
const auto begin = pos;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (pos != begin)
out.append(", ");
Expand Down Expand Up @@ -148,11 +148,11 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser:
if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket)
return {};

if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon)
if (!isValidKQLPos(pos) || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Need more argument(s) in function: {}", fn_name);

std::vector<String> tokens;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (pos->type == TokenType::OpeningRoundBracket)
++round_bracket_count;
Expand Down Expand Up @@ -191,7 +191,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser:
{
++pos;
String array_index;
while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket)
while (isValidKQLPos(pos) && pos->type != TokenType::ClosingSquareBracket)
{
array_index += getExpression(pos);
++pos;
Expand Down Expand Up @@ -246,7 +246,7 @@ IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParse

const auto * begin = pos->begin;
std::stack<DB::TokenType> scopes;
while (!pos->isEnd() && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket)))
while (isValidKQLPos(pos) && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket)))
{
const auto token_type = pos->type;
if (isOpeningBracket(token_type))
Expand Down Expand Up @@ -357,7 +357,7 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos)
{
++pos;
String array_index;
while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket)
while (isValidKQLPos(pos) && pos->type != TokenType::ClosingSquareBracket)
{
array_index += getExpression(pos);
++pos;
Expand Down
7 changes: 4 additions & 3 deletions src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <Parsers/Kusto/ParserKQLDateTypeTimespan.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/Utilities.h>
#include <Parsers/ParserSetQuery.h>
#include "Poco/String.h"
#include <format>
Expand Down Expand Up @@ -51,7 +52,7 @@ bool DatatypeDatetime::convertImpl(String & out, IParser::Pos & pos)
else
{
auto start = pos;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
++pos;
if (pos->type == TokenType::ClosingRoundBracket)
Expand All @@ -77,7 +78,7 @@ bool DatatypeDynamic::convertImpl(String & out, IParser::Pos & pos)
if (pos->type == TokenType::OpeningCurlyBrace)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Property bags are not supported for now in {}", function_name);

while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket)
while (isValidKQLPos(pos) && pos->type != TokenType::ClosingRoundBracket)
{
if (const auto token_type = pos->type; token_type == TokenType::BareWord || token_type == TokenType::Number
|| token_type == TokenType::QuotedIdentifier || token_type == TokenType::StringLiteral)
Expand Down Expand Up @@ -117,7 +118,7 @@ bool DatatypeGuid::convertImpl(String & out, IParser::Pos & pos)
else
{
auto start = pos;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
++pos;
if (pos->type == TokenType::ClosingRoundBracket)
Expand Down
5 changes: 3 additions & 2 deletions src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/Utilities.h>
#include <Parsers/ParserSetQuery.h>
#include "Poco/String.h"
namespace DB::ErrorCodes
Expand Down Expand Up @@ -521,7 +522,7 @@ bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos)
String second;
int arg_count = 0;
std::vector<String> args;
while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket)
while (isValidKQLPos(pos) && pos->type != TokenType::ClosingRoundBracket)
{
String arg = getConvertedArgument(fn_name, pos);
args.insert(args.begin(), arg);
Expand Down Expand Up @@ -588,7 +589,7 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos)
String arguments;
int arg_count = 0;

while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket)
while (isValidKQLPos(pos) && pos->type != TokenType::ClosingRoundBracket)
{
String arg = getConvertedArgument(fn_name, pos);
if (pos->type == TokenType::Comma)
Expand Down
4 changes: 2 additions & 2 deletions src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <Parsers/Kusto/KustoFunctions/IParserKQLFunction.h>
#include <Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h>
#include <Parsers/Kusto/KustoFunctions/KQLStringFunctions.h>

#include <Parsers/Kusto/Utilities.h>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <Poco/String.h>
Expand Down Expand Up @@ -590,7 +590,7 @@ bool StrCatDelim::convertImpl(String & out, IParser::Pos & pos)
int arg_count = 0;
String args;

while (!pos->isEnd() && pos->type != TokenType::Semicolon && pos->type != TokenType::ClosingRoundBracket)
while (isValidKQLPos(pos) && pos->type != TokenType::Semicolon && pos->type != TokenType::ClosingRoundBracket)
{
++pos;
String arg = getConvertedArgument(fn_name, pos);
Expand Down
3 changes: 2 additions & 1 deletion src/Parsers/Kusto/ParserKQLExtend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <Parsers/Kusto/ParserKQLOperators.h>
#include <Parsers/Kusto/ParserKQLProject.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/Utilities.h>
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>

Expand Down Expand Up @@ -44,7 +45,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)

int32_t round_bracket_count = 0;
int32_t square_bracket_count = 0;
while (!npos->isEnd())
while (isValidKQLPos(npos))
{
if (npos->type == TokenType::OpeningRoundBracket)
++round_bracket_count;
Expand Down
7 changes: 4 additions & 3 deletions src/Parsers/Kusto/ParserKQLMVExpand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <Parsers/Kusto/ParserKQLMakeSeries.h>
#include <Parsers/Kusto/ParserKQLOperators.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/Utilities.h>
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
Expand Down Expand Up @@ -49,7 +50,7 @@ bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_ex
String to_type;
--expr_end_pos;

while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (pos->type == TokenType::OpeningRoundBracket)
++bracket_count;
Expand Down Expand Up @@ -125,9 +126,9 @@ bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_ex

if (String(pos->begin, pos->end) == "limit")
break;
if (!pos->isEnd())
if (isValidKQLPos(pos))
++pos;
if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon)
if (!isValidKQLPos(pos) || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon)
{
if (expr_end_pos < expr_begin_pos)
{
Expand Down
9 changes: 5 additions & 4 deletions src/Parsers/Kusto/ParserKQLMakeSeries.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <Parsers/Kusto/ParserKQLMakeSeries.h>
#include <Parsers/Kusto/ParserKQLOperators.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/Utilities.h>
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>

Expand Down Expand Up @@ -39,7 +40,7 @@ bool ParserKQLMakeSeries ::parseAggregationColumns(AggregationColumns & aggregat
ParserToken close_bracket(TokenType::ClosingRoundBracket);
ParserToken comma(TokenType::Comma);

while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
String alias;
String aggregation_fun;
Expand Down Expand Up @@ -96,7 +97,7 @@ bool ParserKQLMakeSeries ::parseFromToStepClause(FromToStepClause & from_to_step
auto step_pos = begin;
auto end_pos = begin;

while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
while (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (String(pos->begin, pos->end) == "from")
from_pos = pos;
Expand Down Expand Up @@ -175,7 +176,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr &
Tokens tokens(src.c_str(), src.c_str() + src.size());
IParser::Pos pos(tokens, max_depth);
String res;
while (!pos->isEnd())
while (isValidKQLPos(pos))
{
String tmp = String(pos->begin, pos->end);
if (tmp == "parseDateTime64BestEffortOrNull")
Expand All @@ -201,7 +202,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr &
std::vector<String> group_expression_tokens;
Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size());
IParser::Pos pos(tokens, max_depth);
while (!pos->isEnd())
while (isValidKQLPos(pos))
{
if (String(pos->begin, pos->end) == "AS")
{
Expand Down
13 changes: 7 additions & 6 deletions src/Parsers/Kusto/ParserKQLOperators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <Parsers/Kusto/ParserKQLOperators.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/Utilities.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/formatAST.h>
Expand Down Expand Up @@ -148,7 +149,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector<String> & tokens, IParser::P

String logic_op = (kql_op == "has_all") ? " and " : " or ";

while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon)
while (isValidKQLPos(token_pos) && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon)
{
auto tmp_arg = IParserKQLFunction::getExpression(token_pos);
if (token_pos->type == TokenType::Comma)
Expand Down Expand Up @@ -217,7 +218,7 @@ String genInOpExprCis(std::vector<String> & tokens, DB::IParser::Pos & token_pos
--token_pos;

new_expr += ch_op;
while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon)
while (isValidKQLPos(token_pos) && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon)
{
auto tmp_arg = String(token_pos->begin, token_pos->end);
if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket
Expand Down Expand Up @@ -329,7 +330,7 @@ bool KQLOperators::convert(std::vector<String> & tokens, IParser::Pos & pos)
{
auto begin = pos;

if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
if (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
KQLOperatorValue op_value = KQLOperatorValue::none;

Expand All @@ -339,14 +340,14 @@ bool KQLOperators::convert(std::vector<String> & tokens, IParser::Pos & pos)
if (token == "!")
{
++pos;
if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon)
if (!isValidKQLPos(pos) || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon)
throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator");
op = "!" + String(pos->begin, pos->end);
}
else if (token == "matches")
{
++pos;
if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
if (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (String(pos->begin, pos->end) == "regex")
op += " regex";
Expand All @@ -360,7 +361,7 @@ bool KQLOperators::convert(std::vector<String> & tokens, IParser::Pos & pos)
}

++pos;
if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
if (isValidKQLPos(pos) && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (String(pos->begin, pos->end) == "~")
op += "~";
Expand Down
Loading

0 comments on commit 795c1a9

Please sign in to comment.