diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index f852152a0..e2a73de8a 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -131,4 +131,8 @@ impl Dialect for GenericDialect { fn supports_empty_projections(&self) -> bool { true } + + fn supports_nested_comments(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1343efca6..9ffbd8ed8 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -682,6 +682,12 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports nested comments + /// e.g. `/* /* nested */ */` + fn supports_nested_comments(&self) -> bool { + false + } + /// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem` /// as an alias assignment operator, rather than a boolean expression. /// For example: the following statements are equivalent for such a dialect: diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 6a13a386a..170b0a7c9 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -241,6 +241,10 @@ impl Dialect for PostgreSqlDialect { fn supports_empty_projections(&self) -> bool { true } + + fn supports_nested_comments(&self) -> bool { + true + } } pub fn parse_create(parser: &mut Parser) -> Option> { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index da61303b4..38bd33d60 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1855,28 +1855,33 @@ impl<'a> Tokenizer<'a> { ) -> Result, TokenizerError> { let mut s = String::new(); let mut nested = 1; - let mut last_ch = ' '; + let supports_nested_comments = self.dialect.supports_nested_comments(); loop { match chars.next() { - Some(ch) => { - if last_ch == '/' && ch == '*' { - nested += 1; - } else if last_ch == '*' && ch == '/' { - nested -= 1; - if nested == 0 { - s.pop(); - break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); - } + Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => { + chars.next(); // consume the '*' + s.push('/'); + s.push('*'); + nested += 1; + } + Some('*') if matches!(chars.peek(), Some('/')) => { + chars.next(); // consume the '/' + nested -= 1; + if nested == 0 { + break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); } + s.push('*'); + s.push('/'); + } + Some(ch) => { s.push(ch); - last_ch = ch; } None => { break self.tokenizer_error( chars.location(), "Unexpected EOF while in a multi-line comment", - ) + ); } } } @@ -2718,18 +2723,90 @@ mod tests { #[test] fn tokenize_nested_multiline_comment() { - let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); + let dialect = GenericDialect {}; + let test_cases = vec![ + ( + "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1", + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment( + "multi-line\n* \n/* comment \n /*comment*/*/ ".into(), + )), + Token::Whitespace(Whitespace::Space), + Token::Div, + Token::Word(Word { + value: "comment".to_string(), + quote_style: None, + keyword: Keyword::COMMENT, + }), + Token::Mul, + Token::Div, + Token::Number("1".to_string(), false), + ], + ), + ( + "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1", + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment( + "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(), + )), + Token::Number("1".to_string(), false), + ], + ), + ( + "SELECT 1/* a /* b */ c */0", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())), + Token::Number("0".to_string(), false), + ], + ), + ]; + + for (sql, expected) in test_cases { + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + compare(expected, tokens); + } + } + + #[test] + fn tokenize_nested_multiline_comment_empty() { + let sql = "select 1/*/**/*/0"; let dialect = GenericDialect {}; - let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())), Token::Number("0".to_string(), false), + ]; + + compare(expected, tokens); + } + + #[test] + fn tokenize_nested_comments_if_not_supported() { + let dialect = SQLiteDialect {}; + let sql = "SELECT 1/*/* nested comment */*/0"; + let tokens = Tokenizer::new(&dialect, sql).tokenize(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment*/*/ */ /comment".to_string(), + "/* nested comment ".to_string(), )), - Token::Number("1".to_string(), false), + Token::Mul, + Token::Div, + Token::Number("0".to_string(), false), ]; - compare(expected, tokens); + + compare(expected, tokens.unwrap()); } #[test]