Skip to content

Commit

Permalink
National strings: check if dialect supports backslash escape
Browse files Browse the repository at this point in the history
  • Loading branch information
hansott committed Jan 23, 2025
1 parent 3fb8f4b commit e13ac19
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
14 changes: 13 additions & 1 deletion src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use core::fmt::Debug;

use crate::dialect::*;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Tokenizer;
use crate::tokenizer::{Token, Tokenizer};
use crate::{ast::*, parser::ParserOptions};

#[cfg(test)]
Expand Down Expand Up @@ -215,6 +215,18 @@ impl TestedDialects {
pub fn verified_expr(&self, sql: &str) -> Expr {
self.expr_parses_to(sql, sql)
}

/// Check that the tokenizer returns the expected tokens for the given SQL.
pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
self.dialects.iter().for_each(|dialect| {
let mut tokenizer = Tokenizer::new(&**dialect, sql);
if let Some(options) = &self.options {
tokenizer = tokenizer.with_unescape(options.unescape);
}
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(expected, tokens);
});
}
}

/// Returns all available dialects.
Expand Down
32 changes: 31 additions & 1 deletion src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,10 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some('\'') => {
// N'...' - a <national character string literal>
let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
let backslash_escape =
self.dialect.supports_string_literal_backslash_escape();
let s =
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
Ok(Some(Token::NationalStringLiteral(s)))
}
_ => {
Expand Down Expand Up @@ -1890,6 +1893,7 @@ mod tests {
use crate::dialect::{
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect,
};
use crate::test_utils::all_dialects_where;
use core::fmt::Debug;

#[test]
Expand Down Expand Up @@ -3214,4 +3218,30 @@ mod tests {
let expected = vec![Token::SingleQuotedString("''".to_string())];
compare(expected, tokens);
}

#[test]
fn test_national_strings_backslash_escape_not_supported() {
all_dialects_where(|dialect| !dialect.supports_string_literal_backslash_escape())
.tokenizes_to(
"select n'''''\\'",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::NationalStringLiteral("''\\".to_string()),
],
);
}

#[test]
fn test_national_strings_backslash_escape_supported() {
all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape())
.tokenizes_to(
"select n'''''\\''",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::NationalStringLiteral("'''".to_string()),
],
);
}
}

0 comments on commit e13ac19

Please sign in to comment.