diff --git a/zetasql/parser/bison_parser.h b/zetasql/parser/bison_parser.h index 45679e5f2..c20277f0a 100644 --- a/zetasql/parser/bison_parser.h +++ b/zetasql/parser/bison_parser.h @@ -252,6 +252,12 @@ class BisonParser { *ast_nodes = std::move(*allocated_ast_nodes_); } + void InitAllocatedASTNodes() { + for (const auto& ast_node : *allocated_ast_nodes_) { + ast_node->InitFields(); + } + } + private: // Identifiers and literal values are allocated from this arena. Not owned. // Only valid during Parse(). diff --git a/zetasql/parser/bison_parser.y b/zetasql/parser/bison_parser.y index ede18f474..729702887 100644 --- a/zetasql/parser/bison_parser.y +++ b/zetasql/parser/bison_parser.y @@ -507,6 +507,7 @@ class DashedIdentifierTmpNode final : public zetasql::ASTNode { %token '=' "=" %token KW_NOT_EQUALS_C_STYLE "!=" %token KW_NOT_EQUALS_SQL_STYLE "<>" +%token KW_EQUALS_C_STYLE "==" %token '<' "<" %token KW_LESS_EQUALS "<=" %token '>' ">" @@ -553,8 +554,9 @@ class DashedIdentifierTmpNode final : public zetasql::ASTNode { %left ".*" %left "OR" %left "AND" +%left "XOR" %left UNARY_NOT_PRECEDENCE -%nonassoc "=" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "IN" "DISTINCT" "BETWEEN" "IS" "NOT_SPECIAL" +%nonassoc "=" "==" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "IN" "DISTINCT" "BETWEEN" "IS" "NOT_SPECIAL" %left "|" %left "^" %left "&" @@ -750,6 +752,7 @@ using zetasql::ASTDropStatement; %token KW_WINDOW "WINDOW" %token KW_WITH "WITH" %token KW_UNNEST "UNNEST" +%token KW_XOR "XOR" // These keywords may not be used in the grammar currently but are reserved // for future use. @@ -5213,6 +5216,7 @@ unnest_expression_with_opt_alias_and_offset: // This rule returns the JavaCC operator id for the operator. comparative_operator: "=" { $$ = zetasql::ASTBinaryExpression::EQ; } + | "==" { $$ = zetasql::ASTBinaryExpression::EQ;} | "!=" { $$ = zetasql::ASTBinaryExpression::NE; } | "<>" { $$ = zetasql::ASTBinaryExpression::NE2; } | "<" { $$ = zetasql::ASTBinaryExpression::LT; } @@ -5393,6 +5397,21 @@ expression: $$ = MAKE_NODE(ASTDotIdentifier, @2, @3, {$1, $3}); } } + | expression "XOR" expression %prec "XOR" + { + // NOT has lower precedence but can be parsed unparenthesized in the + // rhs because it is not ambiguous. This is not allowed. Other + // expressions with lower precedence wouldn't be parsed as children, so + // we don't have to check for those. + if (IsUnparenthesizedNotExpression($3)) { + YYERROR_UNEXPECTED_AND_ABORT_AT(@3); + } + auto* binary_expression = + MAKE_NODE(ASTBinaryExpression, @1, @3, {$1, $3}); + binary_expression->set_op( + zetasql::ASTBinaryExpression::XOR); + $$ = binary_expression; + } | expression "OR" expression %prec "OR" { if ($1->node_kind() == zetasql::AST_OR_EXPR && @@ -7166,6 +7185,7 @@ reserved_keyword_rule: | "SOME" | "TREAT" | "WITHIN" + | "XOR" // END_RESERVED_KEYWORD_RULE -- Do not remove this! ; diff --git a/zetasql/parser/flex_tokenizer.l b/zetasql/parser/flex_tokenizer.l index 8231fc346..ad696c4bc 100644 --- a/zetasql/parser/flex_tokenizer.l +++ b/zetasql/parser/flex_tokenizer.l @@ -647,6 +647,7 @@ window { return BisonParserImpl::token::KW_WINDOW; } with { return BisonParserImpl::token::KW_WITH; } within { return BisonParserImpl::token::KW_WITHIN; } write { return BisonParserImpl::token::KW_WRITE; } +xor { return BisonParserImpl::token::KW_XOR; } zone { return BisonParserImpl::token::KW_ZONE; } /* END_KEYWORDS -- Do not remove! */ @@ -833,6 +834,9 @@ zone { return BisonParserImpl::token::KW_ZONE; } "!=" { return BisonParserImpl::token::KW_NOT_EQUALS_C_STYLE; } +"==" { + return BisonParserImpl::token::KW_EQUALS_C_STYLE; +} "<=" { return BisonParserImpl::token::KW_LESS_EQUALS; } /* Don't recognize these in ARRAY<> or STRUCT<> context. */ diff --git a/zetasql/parser/keywords.cc b/zetasql/parser/keywords.cc index a36ca1258..8155ab8d5 100644 --- a/zetasql/parser/keywords.cc +++ b/zetasql/parser/keywords.cc @@ -302,6 +302,7 @@ constexpr KeywordInfoPOD kAllKeywords[] = { {"with", KW_WITH, KeywordInfo::kReserved}, {"within", KW_WITHIN, KeywordInfo::kReserved}, {"write", KW_WRITE}, + {"xor", KW_XOR, KeywordInfo::kReserved}, {"zone", KW_ZONE}, }; diff --git a/zetasql/parser/keywords_test.cc b/zetasql/parser/keywords_test.cc index 3bb0235d8..3e75daf18 100644 --- a/zetasql/parser/keywords_test.cc +++ b/zetasql/parser/keywords_test.cc @@ -237,7 +237,7 @@ TEST(ParserTest, DontAddNewReservedKeywords) { // allows new queries to work that will not work on older code. // Before changing this, co-ordinate with all engines to make sure the change // is done safely. - EXPECT_EQ(102 /* CAUTION */, num_reserved); + EXPECT_EQ(103 /* CAUTION */, num_reserved); } } // namespace diff --git a/zetasql/parser/parse_tree.cc b/zetasql/parser/parse_tree.cc index 6fda1b7da..333725a1f 100644 --- a/zetasql/parser/parse_tree.cc +++ b/zetasql/parser/parse_tree.cc @@ -750,6 +750,8 @@ std::string ASTBinaryExpression::GetSQLForOperator() const { return "%"; case CONCAT_OP: return "||"; + case XOR: + return "XOR"; case DISTINCT: return is_not_ ? "IS NOT DISTINCT FROM" : "IS DISTINCT FROM"; } diff --git a/zetasql/parser/parse_tree_manual.h b/zetasql/parser/parse_tree_manual.h index 193586635..5111690a6 100644 --- a/zetasql/parser/parse_tree_manual.h +++ b/zetasql/parser/parse_tree_manual.h @@ -1841,6 +1841,7 @@ class ASTBinaryExpression final : public ASTExpression { IDIVIDE, // "div", integer division MOD, // "%" CONCAT_OP, // "||" + XOR, // "XOR" DISTINCT, // "IS DISTINCT FROM" }; diff --git a/zetasql/parser/testdata/operator_precedence.test b/zetasql/parser/testdata/operator_precedence.test index 93ff2bba9..8dacfa84a 100644 --- a/zetasql/parser/testdata/operator_precedence.test +++ b/zetasql/parser/testdata/operator_precedence.test @@ -29,6 +29,26 @@ SELECT a AND b AND c OR d == +select a XOR b OR c +-- +QueryStatement [0-19] + Query [0-19] + Select [0-19] + SelectList [7-19] + SelectColumn [7-19] + OrExpr [7-19] + BinaryExpression(XOR) [7-14] + PathExpression [7-8] + Identifier(a) [7-8] + PathExpression [13-14] + Identifier(b) [13-14] + PathExpression [18-19] + Identifier(c) [18-19] +-- +SELECT + a XOR b OR c +== + select a AND b AND (c OR d) -- QueryStatement [0-27] @@ -293,6 +313,32 @@ SELECT (x + y) > z OR a < b == +select (x + y) == z OR a = b +-- +QueryStatement [0-28] + Query [0-28] + Select [0-28] + SelectList [7-28] + SelectColumn [7-28] + OrExpr [7-28] + BinaryExpression(=) [7-19] + BinaryExpression(+) [8-13] + PathExpression [8-9] + Identifier(x) [8-9] + PathExpression [12-13] + Identifier(y) [12-13] + PathExpression [18-19] + Identifier(z) [18-19] + BinaryExpression(=) [23-28] + PathExpression [23-24] + Identifier(a) [23-24] + PathExpression [27-28] + Identifier(b) [27-28] +-- +SELECT + (x + y) = z OR a = b +== + select NOT a OR f(x) -- QueryStatement [0-20] @@ -389,7 +435,7 @@ SELECT # operator precedence parsing setup. The JavaCC parser uses hierarchical # productions to implement operator precedence, and it is ambiguous there. The # Bison parser reproduces the JavaCC error behavior with a special case error. -select a {{like|=|<|>|<>|!=|>=|<=|+|-|*|/|^|&}} not b; +select a {{like|=|==|<|>|<>|!=|>=|<=|+|-|*|/|^|&}} not b; -- ALTERNATION GROUP: like -- @@ -403,6 +449,12 @@ ERROR: Syntax error: Unexpected keyword NOT [at 1:12] select a = not b; ^ -- +ALTERNATION GROUP: == +-- +ERROR: Syntax error: Unexpected keyword NOT [at 1:13] +select a == not b; + ^ +-- ALTERNATION GROUP: < -- ERROR: Syntax error: Unexpected keyword NOT [at 1:12] @@ -651,14 +703,15 @@ select a LIKE b = NOT b LIKE c; # all of these cases as unambiguous and therefore accepts some of them in the # grammar. These cases are caught using special case error handling in the # rules. -# "=" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "IN" "BETWEEN" "IS" "NOT for BETWEEN/IN/LIKE" +# "=" "==" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "IN" "BETWEEN" "IS" "NOT for BETWEEN/IN/LIKE" SELECT 1 - {{= 2|<> 2|!= 2|> 2|< 2|>= 2|LIKE 2|NOT LIKE 2|IN (1,2)|NOT IN (1,2)|IN (SELECT 1)|NOT IN (SELECT 1)|BETWEEN 2 AND 3|NOT BETWEEN 2 AND 3|IS TRUE|IS NOT TRUE|IS NULL|IS NOT NULL}} - {{= 2|<> 2|!= 2|> 2|< 2|>= 2|LIKE 2|NOT LIKE 2|IN (1,2)|NOT IN (1,2)|IN (SELECT 1)|NOT IN (SELECT 1)|BETWEEN 2 AND 3|NOT BETWEEN 2 AND 3|IS TRUE|IS NOT TRUE|IS NULL|IS NOT NULL}} + {{= 2|== 2|<> 2|!= 2|> 2|< 2|>= 2|LIKE 2|NOT LIKE 2|IN (1,2)|NOT IN (1,2)|IN (SELECT 1)|NOT IN (SELECT 1)|BETWEEN 2 AND 3|NOT BETWEEN 2 AND 3|IS TRUE|IS NOT TRUE|IS NULL|IS NOT NULL}} + {{= 2|== 2|<> 2|!= 2|> 2|< 2|>= 2|LIKE 2|NOT LIKE 2|IN (1,2)|NOT IN (1,2)|IN (SELECT 1)|NOT IN (SELECT 1)|BETWEEN 2 AND 3|NOT BETWEEN 2 AND 3|IS TRUE|IS NOT TRUE|IS NULL|IS NOT NULL}} -- ALTERNATION GROUPS: = 2,= 2 + == 2,= 2 <> 2,= 2 != 2,= 2 > 2,= 2 @@ -673,8 +726,26 @@ ERROR: Syntax error: Unexpected "=" [at 4:5] = 2 ^ -- +ALTERNATION GROUPS: + = 2,== 2 + == 2,== 2 + <> 2,== 2 + != 2,== 2 + > 2,== 2 + < 2,== 2 + >= 2,== 2 + LIKE 2,== 2 + NOT LIKE 2,== 2 + BETWEEN 2 AND 3,== 2 + NOT BETWEEN 2 AND 3,== 2 +-- +ERROR: Syntax error: Unexpected "==" [at 4:5] + == 2 + ^ +-- ALTERNATION GROUPS: = 2,<> 2 + == 2,<> 2 <> 2,<> 2 != 2,<> 2 > 2,<> 2 @@ -691,6 +762,7 @@ ERROR: Syntax error: Unexpected "<>" [at 4:5] -- ALTERNATION GROUPS: = 2,!= 2 + == 2,!= 2 <> 2,!= 2 != 2,!= 2 > 2,!= 2 @@ -707,6 +779,7 @@ ERROR: Syntax error: Unexpected "!=" [at 4:5] -- ALTERNATION GROUPS: = 2,> 2 + == 2,> 2 <> 2,> 2 != 2,> 2 > 2,> 2 @@ -723,6 +796,7 @@ ERROR: Syntax error: Unexpected ">" [at 4:5] -- ALTERNATION GROUPS: = 2,< 2 + == 2,< 2 <> 2,< 2 != 2,< 2 > 2,< 2 @@ -739,6 +813,7 @@ ERROR: Syntax error: Unexpected "<" [at 4:5] -- ALTERNATION GROUPS: = 2,>= 2 + == 2,>= 2 <> 2,>= 2 != 2,>= 2 > 2,>= 2 @@ -755,6 +830,7 @@ ERROR: Syntax error: Unexpected ">=" [at 4:5] -- ALTERNATION GROUPS: = 2,LIKE 2 + == 2,LIKE 2 <> 2,LIKE 2 != 2,LIKE 2 > 2,LIKE 2 @@ -771,6 +847,7 @@ ERROR: Syntax error: Unexpected keyword LIKE [at 4:5] -- ALTERNATION GROUPS: = 2,NOT LIKE 2 + == 2,NOT LIKE 2 <> 2,NOT LIKE 2 != 2,NOT LIKE 2 > 2,NOT LIKE 2 @@ -787,6 +864,7 @@ ERROR: Syntax error: Unexpected keyword NOT [at 4:5] -- ALTERNATION GROUPS: = 2,IN (1,2) + == 2,IN (1,2) <> 2,IN (1,2) != 2,IN (1,2) > 2,IN (1,2) @@ -803,6 +881,7 @@ ERROR: Syntax error: Unexpected keyword IN [at 4:5] -- ALTERNATION GROUPS: = 2,NOT IN (1,2) + == 2,NOT IN (1,2) <> 2,NOT IN (1,2) != 2,NOT IN (1,2) > 2,NOT IN (1,2) @@ -819,6 +898,7 @@ ERROR: Syntax error: Unexpected keyword NOT [at 4:5] -- ALTERNATION GROUPS: = 2,IN (SELECT 1) + == 2,IN (SELECT 1) <> 2,IN (SELECT 1) != 2,IN (SELECT 1) > 2,IN (SELECT 1) @@ -835,6 +915,7 @@ ERROR: Syntax error: Unexpected keyword IN [at 4:5] -- ALTERNATION GROUPS: = 2,NOT IN (SELECT 1) + == 2,NOT IN (SELECT 1) <> 2,NOT IN (SELECT 1) != 2,NOT IN (SELECT 1) > 2,NOT IN (SELECT 1) @@ -851,6 +932,7 @@ ERROR: Syntax error: Unexpected keyword NOT [at 4:5] -- ALTERNATION GROUPS: = 2,BETWEEN 2 AND 3 + == 2,BETWEEN 2 AND 3 <> 2,BETWEEN 2 AND 3 != 2,BETWEEN 2 AND 3 > 2,BETWEEN 2 AND 3 @@ -867,6 +949,7 @@ ERROR: Syntax error: Unexpected keyword BETWEEN [at 4:5] -- ALTERNATION GROUPS: = 2,NOT BETWEEN 2 AND 3 + == 2,NOT BETWEEN 2 AND 3 <> 2,NOT BETWEEN 2 AND 3 != 2,NOT BETWEEN 2 AND 3 > 2,NOT BETWEEN 2 AND 3 @@ -883,6 +966,7 @@ ERROR: Syntax error: Unexpected keyword NOT [at 4:5] -- ALTERNATION GROUPS: = 2,IS TRUE + == 2,IS TRUE <> 2,IS TRUE != 2,IS TRUE > 2,IS TRUE @@ -899,6 +983,7 @@ ERROR: Syntax error: Unexpected keyword IS [at 4:5] -- ALTERNATION GROUPS: = 2,IS NOT TRUE + == 2,IS NOT TRUE <> 2,IS NOT TRUE != 2,IS NOT TRUE > 2,IS NOT TRUE @@ -915,6 +1000,7 @@ ERROR: Syntax error: Unexpected keyword IS [at 4:5] -- ALTERNATION GROUPS: = 2,IS NULL + == 2,IS NULL <> 2,IS NULL != 2,IS NULL > 2,IS NULL @@ -931,6 +1017,7 @@ ERROR: Syntax error: Unexpected keyword IS [at 4:5] -- ALTERNATION GROUPS: = 2,IS NOT NULL + == 2,IS NOT NULL <> 2,IS NOT NULL != 2,IS NOT NULL > 2,IS NOT NULL @@ -959,6 +1046,20 @@ ERROR: Syntax error: Expression to the left of comparison must be parenthesized = 2 ^ -- +ALTERNATION GROUPS: + IN (1,2),== 2 + NOT IN (1,2),== 2 + IN (SELECT 1),== 2 + NOT IN (SELECT 1),== 2 + IS TRUE,== 2 + IS NOT TRUE,== 2 + IS NULL,== 2 + IS NOT NULL,== 2 +-- +ERROR: Syntax error: Expression to the left of comparison must be parenthesized [at 4:5] + == 2 + ^ +-- ALTERNATION GROUPS: IN (1,2),<> 2 NOT IN (1,2),<> 2