Skip to content

Commit

Permalink
FIX: SimpleSQLGrammar quote parsing regression (#5700)
Browse files Browse the repository at this point in the history
* Update grammar and associated tests

* Grammar cleanup and commenting

* Add test output to help with future debugging

* Two more test scenarios added, plus a formatting change.

---------

Co-authored-by: Jason Lyle <[email protected]>
Co-authored-by: Daniel Mallorga <[email protected]>
Co-authored-by: rberezen <[email protected]>
  • Loading branch information
4 people authored Mar 21, 2024
1 parent 574cf58 commit 6f50035
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,36 +57,39 @@ TOKEN:
TOKEN : /* Numeric Constants */
{
< S_DOUBLE: ((<S_LONG>)? "." <S_LONG> ( ["e","E"] (["+", "-"])? <S_LONG>)?
|
<S_LONG> "." (["e","E"] (["+", "-"])? <S_LONG>)?
|
<S_LONG> ["e","E"] (["+", "-"])? <S_LONG>
)>
| < S_LONG: ( <DIGIT> )+ >
| < #DIGIT: ["0" - "9"] >
|
<S_LONG> "." (["e","E"] (["+", "-"])? <S_LONG>)?
|
<S_LONG> ["e","E"] (["+", "-"])? <S_LONG>
)>
| < S_LONG: ( <DIGIT> )+ >
| < #DIGIT: ["0" - "9"] >
}

TOKEN:
{
< COMPLEX_IDENTIFIER: (<S_IDENTIFIER> | <S_QUOTED_IDENTIFIER>) ((["\r","\n"," "])* "." (["\r","\n"," "])* (<S_IDENTIFIER> | <S_QUOTED_IDENTIFIER>))+ >
| < S_IDENTIFIER: ( <LETTER> | <UNICODE_LETTERS> )+ ( <DIGIT> | <LETTER> | <UNICODE_LETTERS> | <SPECIAL_CHARS> )* >
| < #LETTER: ["a"-"z", "A"-"Z", "_", "$"] >
| < #SPECIAL_CHARS: "$" | "_" | "#" | "@" >
| < S_IDENTIFIER: ( <LETTER> | <UNICODE_LETTERS> | <FIRST_CHAR_SPECIAL_CHARS> ) ( <LETTER> | <UNICODE_LETTERS> | <SPECIAL_CHARS> | <DIGIT> )* >
| < #LETTER: ["a"-"z", "A"-"Z"] >
| < #FIRST_CHAR_SPECIAL_CHARS: "$" | "_" >
| < #SPECIAL_CHARS: <FIRST_CHAR_SPECIAL_CHARS> | "#" | "@" >

| < #ESC_S_QUOTE_A: ( "''" ) > /* probably the closest to a universal standard */
| < #ESC_S_QUOTE_B: ( "\\'" ) > /* Valid in Postgres and MySQL (if NO_BACKSLASH_ESCAPES not enabled), NOT valid in Oracle or MSSQL */
| < #ESC_D_QUOTE_A: ( "\"\"" ) > /* probably the 2nd closest to a universal standard */
| < #ESC_D_QUOTE_B: ( "\\\"" ) > /* Valid in Postgres and MySQL (if NO_BACKSLASH_ESCAPES not enabled), NOT valid in Oracle or MSSQL */
| < #ESC_NON_QUOTE: "\\" ["n","t","b","r","f","\\","0"] >
| < #ESC_S_QUOTE: ( "''" ) > /* probably the closest to a universal standard */
| < #ESC_D_QUOTE: ( "\"\"" ) > /* probably the 2nd closest to a universal standard */
| < #ESC_ANY_CHAR: "\\" ~[] > // Matches any character following '\'
/* SQL-standard is that string literals are delimited only by single-quote, and double-quotes are only for identifiers... */
| < #S_QUOTED_STRING_HYBRID: ( "'" ( <ESC_S_QUOTE_A> | <ESC_S_QUOTE_B> | <ESC_D_QUOTE_B> | <ESC_NON_QUOTE> | ~["\\","'"] )* ("'" | "\\'")) >
/*
Negative match in hybrid string tokens looks for a single slash (i.e. "\\") so that slashes are matched
with the ESC_ANY_CHAR token rather than prematuraly ending a match
*/
| < #S_QUOTED_STRING_HYBRID: ( "'" ( <ESC_S_QUOTE> | <ESC_ANY_CHAR> | ~["\\","'"] )* ("'" | "\\'")) >
/* ... but many DBs tolerate double-quotes around string literals, including MySQL (unless you enable ANSI SQL mode), and MSSQL (if you disable SET QUOTED_IDENTIFIER) */
| < #D_QUOTED_STRING_HYBRID: ( "\"" ( <ESC_S_QUOTE_B> | <ESC_D_QUOTE_A> | <ESC_D_QUOTE_B> | <ESC_NON_QUOTE> | ~["\\","\""] )* ("\"" | "\\\"")) >
| < #D_QUOTED_STRING_HYBRID: ( "\"" ( <ESC_D_QUOTE> | <ESC_ANY_CHAR> | ~["\\","\""] )* ("\"" | "\\\"")) >
/* Finally... */
| < S_CHAR_LITERAL: (["U","E","N","R","B"]|"RB"|"_utf8")? (<S_QUOTED_STRING_HYBRID> | <D_QUOTED_STRING_HYBRID>) >

| < S_QUOTED_IDENTIFIER: "\"" (~["\n","\r","\""])+ "\"" | ("`" (~["\n","\r","`"])+ "`") | ( "[" ~["0"-"9","]"] (~["\n","\r","]"])* "]" ) >
| < EMPTY_QUOTE: "\"" "\"">
| < EMPTY_QUOTE: "\"" "\"">

/*
Built list from http://stackoverflow.com/a/37668315/45756
Expand Down Expand Up @@ -514,4 +517,3 @@ TOKEN: /* symbols */
"\u00A1"-"\uFF65" /* everything else */
] >
}

Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ class SimpleSqlGrammarTest extends Specification {
@Unroll
def test() {
when:
def tokenManager = new SimpleSqlGrammarTokenManager(new SimpleCharStream(new StringReader(input)));
def tokenManager = new SimpleSqlGrammarTokenManager(new SimpleCharStream(new StringReader(input)))
def grammar = new SimpleSqlGrammar(tokenManager)

def tokens = new ArrayList<String>()
Token token
System.out.println("----------------------------------------------------------------")
System.out.println("'" + input + "'")
while ((token = grammar.getNextToken()).kind != SimpleSqlGrammarConstants.EOF) {
System.out.println(" " + String.format('%1$-32s', SimpleSqlGrammarConstants.tokenImage[token.kind]) + ": '" + token.toString() + "'")
tokens.add(token.toString())
}

Expand All @@ -37,7 +40,7 @@ class SimpleSqlGrammarTest extends Specification {
"mysql escaped quotes '\\''" | ["mysql", " ", "escaped", " ", "quotes", " ", "'\\''"]
"invalid ' sql" | ["invalid", " ", "'", " ", "sql"]
"'invalid' ' sql" | ["'invalid'", " ", "'", " ", "sql"]
"utf8-〠@chars works" | ["utf8", "-", "〠@chars", " ", "works"]
"utf8-〠@chars works" | ["utf8", "-", "〠@chars", " ", "works"]
"single '\\' works" | ["single", " ", "'\\'", " ", "works"]
"double '\\\\' works" | ["double", " ", "'\\\\'", " ", "works"]
"unquoted \\\\ works" | ["unquoted", " ", "\\", "\\", " ", "works"]
Expand All @@ -52,5 +55,13 @@ class SimpleSqlGrammarTest extends Specification {
"This has a \\ and symbol ≤ (u2264)" | ["This", " ", "has", " ", "a", " ", "\\", " ", "and", " ", "symbol", " ", "", " ", "(", "u2264", ")"]
"This ≤ (u2264) is before the \\" | ["This", " ", "", " ", "(", "u2264", ")", " ", "is", " ", "before", " ", "the", " ", "\\"]
"This has an unicode char ÀÀÀÀÀÀ+++ãããioú≤₢" | ["This", " ", "has", " ", "an", " ", "unicode"," ", "char", " ", "ÀÀÀÀÀÀ", "+", "+", "+", "ãããioú", "", ""]
"select 'foo\\_bar' from sys.dual;" | ["select", " ", "'foo\\_bar'", " ", "from", " ", "sys.dual", ";"]
"select \"foo\\_bar\" from sys.dual;" | ["select", " ", "\"foo\\_bar\"", " ", "from", " ", "sys.dual", ";"]
"select 'foo\\sbar' from sys.dual;" | ["select", " ", "'foo\\sbar'", " ", "from", " ", "sys.dual", ";"]
"select \"foo\\sbar\" from sys.dual;" | ["select", " ", "\"foo\\sbar\"", " ", "from", " ", "sys.dual", ";"]
"select '' from sys.dual;" | ["select", " ", "''", " ", "from", " ", "sys.dual", ";"]
"select \"\" from sys.dual;" | ["select", " ", "\"\"", " ", "from", " ", "sys.dual", ";"]
"select q'~;\\~' from sys.dual;" | ["select", " ", "q", "'~;\\~'", " ", "from", " ", "sys.dual", ";"]
"select q'{\\\n;\n\\}' from sys.dual;" | ["select", " ", "q", "'{\\\n;\n\\}'", " ", "from", " ", "sys.dual", ";"]
}
}

0 comments on commit 6f50035

Please sign in to comment.