Skip to content

Commit

Permalink
Sync lexer punctuations with ZetaSQL (#182)
Browse files Browse the repository at this point in the history
* Sync lexer symbols with ZetaSQL

* Revert illegal input character test case
  • Loading branch information
apstndb authored Oct 29, 2024
1 parent 560e46c commit e6a5352
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 24 deletions.
42 changes: 41 additions & 1 deletion lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ func (l *Lexer) consumeToken() {
}

switch l.peek(0) {
case '(', ')', '{', '}', ';', ',', '[', ']', '~', '*', '/', '&', '^', '+', '-':
case '(', ')', '{', '}', ';', ',', '[', ']', '~', '*', '/', '&', '^', '%', ':',
// Belows are not yet used in Spanner.
'?', '\\', '$':
l.Token.Kind = token.TokenKind([]byte{l.skip()})
return
case '.':
Expand Down Expand Up @@ -151,6 +153,32 @@ func (l *Lexer) consumeToken() {
l.Token.Kind = ">"
}
return
case '+':
switch {
// KW_ADD_ASSIGN in ZetaSQL
case l.peekIs(1, '='):
l.skipN(2)
l.Token.Kind = "+="
default:
l.skip()
l.Token.Kind = "+"
}
return
case '-':
switch {
// KW_SUB_ASSIGN in ZetaSQL
case l.peekIs(1, '='):
l.skipN(2)
l.Token.Kind = "-="
// KW_LAMBDA_ARROW in ZetaSQL
case l.peekIs(1, '>'):
l.skipN(2)
l.Token.Kind = "->"
default:
l.skip()
l.Token.Kind = "-"
}
return
case '=':
switch {
case l.peekIs(1, '>'):
Expand All @@ -163,6 +191,9 @@ func (l *Lexer) consumeToken() {
return
case '|':
switch {
case l.peekIs(1, '>'):
l.skipN(2)
l.Token.Kind = "|>"
case l.peekIs(1, '|'):
l.skipN(2)
l.Token.Kind = "||"
Expand All @@ -177,7 +208,16 @@ func (l *Lexer) consumeToken() {
l.Token.Kind = "!="
return
}
l.skip()
l.Token.Kind = "!"
return
case '@':
// KW_DOUBLE_AT is not yet used in Cloud Spanner, but used in BigQuery.
if l.peekIs(1, '@') {
l.skipN(2)
l.Token.Kind = "@@"
return
}
if l.peekOk(1) && char.IsIdentStart(l.peek(1)) {
i := 1
for l.peekOk(i) && char.IsIdentPart(l.peek(i)) {
Expand Down
55 changes: 34 additions & 21 deletions lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,48 @@ import (
. "github.com/cloudspannerecosystem/memefish/token"
)

// Keep same order https://github.com/google/zetasql/blob/master/zetasql/parser/flex_tokenizer.l
var symbols = []string{
".",
",",
";",
"(",
")",
"{",
"}",
"[",
"{",
")",
"]",
"@",
"~",
"+",
"-",
"}",
"*",
"/",
"&",
"^",
"|",
"||",
",",
"=",
"<",
"<<",
"+=",
"-=",
"!=",
"<=",
"<>",
"<<",
"=>",
"->",
"<",
">",
">>",
">=",
"!=",
"||",
"|",
"^",
"&",
"+",
"-",
"/",
"~",
"?",
"!",
"%",
"|>",
"@",
"@@",
".",
":",
"\\",
";",
"$",
"<>", // <> is not a valid token in ZetaSQL, but it is a token in memefish
">>", // >> is not a valid token in ZetaSQL, but it is a token in memefish.
}

var lexerTestCases = []struct {
Expand Down Expand Up @@ -132,7 +145,7 @@ var lexerWrongTestCase = []struct {
pos Pos
message string
}{
{"?", 0, "illegal input character: '?'"},
{"\b", 0, "illegal input character: '\\b'"},
{`"foo`, 0, "unclosed string literal"},
{`R"foo`, 1, "unclosed raw string literal"},
{"'foo\n", 0, "unclosed string literal: newline appears in non triple-quoted"},
Expand Down
2 changes: 0 additions & 2 deletions split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ func TestSplitRawStatements(t *testing.T) {
want: []*memefish.RawStatement{
{Statement: "SELECT `1;2;3`", End: token.Pos(14)},
}},
// $` may become a valid token in the future, but it's reasonable to check its current behavior.
{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)},
} {
t.Run(test.desc, func(t *testing.T) {
stmts, err := memefish.SplitRawStatements("", test.input)
Expand Down

0 comments on commit e6a5352

Please sign in to comment.