Skip to content

Commit

Permalink
Add support for quoted string escaping
Browse files Browse the repository at this point in the history
Adds support for escaped characters in strings.
There is a caveat with it being unable to differentiate
between single character `'\'` and a string prefixed
with escaped quote like `'\'abc'`
  • Loading branch information
iffyio committed Mar 14, 2024
1 parent 6b03a25 commit 1b9ff2a
Showing 1 changed file with 36 additions and 1 deletion.
37 changes: 36 additions & 1 deletion src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1217,9 +1217,20 @@ impl<'a> Tokenizer<'a> {

chars.next(); // consume the opening quote

let mut escape_on = false;
while let Some(&ch) = chars.peek() {
// Reading a quoted string can be ambiguous.
// The following two literals are allowed in different contexts
// 'abc\'xyz' -- as in any string literal with an escaped quote.
// '\' -- single character, the backslash is not escaped
// -- as in SELECT 'abc' LIKE 'xyz' ESCAPE '\'
// This means the tokenizer is expected to accept
// both '\' and '\''. However there is no context while
// reading to know which mode to enforce.
// We always enforce '\' only because it is the more common scenario.
let is_char_literal = s.as_str() == "\\";
match ch {
char if char == quote_style => {
char if (!escape_on || is_char_literal) && char == quote_style => {
chars.next(); // consume
if chars.peek().map(|c| *c == quote_style).unwrap_or(false) {
s.push(ch);
Expand All @@ -1235,6 +1246,9 @@ impl<'a> Tokenizer<'a> {
'\\' => {
// consume
chars.next();

escape_on = !escape_on;

// slash escaping is specific to MySQL dialect.
if dialect_of!(self is MySqlDialect) {
if let Some(next) = chars.peek() {
Expand All @@ -1258,6 +1272,7 @@ impl<'a> Tokenizer<'a> {
s.push(n);
chars.next(); // consume next
}
escape_on = false;
}
} else {
s.push(ch);
Expand All @@ -1266,6 +1281,7 @@ impl<'a> Tokenizer<'a> {
_ => {
chars.next(); // consume
s.push(ch);
escape_on = false;
}
}
}
Expand Down Expand Up @@ -2306,4 +2322,23 @@ mod tests {
check_unescape(r"Hello\0", None);
check_unescape(r"Hello\xCADRust", None);
}

#[test]
fn tokenize_quoted_string_escape() {
for (sql, expected) in [
(r#"'%a\'%b'"#, r#"%a\'%b"#),
(r#"'a\'\'b\'c\'d'"#, r#"a\'\'b\'c\'d"#),
(r#"'\\'"#, r#"\\"#),
(r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#),
(r#"'\'"#, r#"\"#),
(r#"''''"#, r#"'"#),
] {
let dialect = GenericDialect {};
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();

let expected = vec![Token::SingleQuotedString(expected.to_string())];

compare(expected, tokens);
}
}
}

0 comments on commit 1b9ff2a

Please sign in to comment.