From 36db1766577c3ec63a90d6c057fa76abe4a0397d Mon Sep 17 00:00:00 2001 From: bar sela Date: Tue, 14 Jan 2025 16:38:24 +0200 Subject: [PATCH] Support trailing commas in `FROM` clause (#1645) Co-authored-by: Ifeanyi Ubah --- src/dialect/mod.rs | 12 +++++++++++ src/dialect/snowflake.rs | 4 ++++ src/keywords.rs | 10 ++++++++++ src/parser/mod.rs | 42 +++++++++++++++++++++++++++++---------- tests/sqlparser_common.rs | 32 ++++++++++++++++++++++++++++- 5 files changed, 88 insertions(+), 12 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index c66982d1f..64dbc4b1b 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -404,6 +404,12 @@ pub trait Dialect: Debug + Any { self.supports_trailing_commas() } + /// Returns true if the dialect supports trailing commas in the `FROM` clause of a `SELECT` statement. + /// /// Example: `SELECT 1 FROM T, U, LIMIT 1` + fn supports_from_trailing_commas(&self) -> bool { + false + } + /// Returns true if the dialect supports double dot notation for object names /// /// Example @@ -775,6 +781,12 @@ pub trait Dialect: Debug + Any { keywords::RESERVED_FOR_IDENTIFIER.contains(&kw) } + // Returns reserved keywords when looking to parse a [TableFactor]. + /// See [Self::supports_from_trailing_commas] + fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] { + keywords::RESERVED_FOR_TABLE_FACTOR + } + /// Returns true if this dialect supports the `TABLESAMPLE` option /// before the table alias option. For example: /// diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 55343da18..6b8380d63 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -54,6 +54,10 @@ impl Dialect for SnowflakeDialect { true } + fn supports_from_trailing_commas(&self) -> bool { + true + } + // Snowflake supports double-dot notation when the schema name is not specified // In this case the default PUBLIC schema is used // diff --git a/src/keywords.rs b/src/keywords.rs index 8c8077f51..eb9e3ea6f 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -999,6 +999,16 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::END, ]; +// Global list of reserved keywords alloweed after FROM. +// Parser should call Dialect::get_reserved_keyword_after_from +// to allow for each dialect to customize the list. +pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[ + Keyword::INTO, + Keyword::LIMIT, + Keyword::HAVING, + Keyword::WHERE, +]; + /// Global list of reserved keywords that cannot be parsed as identifiers /// without special handling like quoting. Parser should call `Dialect::is_reserved_for_identifier` /// to allow for each dialect to customize the list. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3cf3c585e..ac764a535 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3940,7 +3940,11 @@ impl<'a> Parser<'a> { let trailing_commas = self.options.trailing_commas | self.dialect.supports_projection_trailing_commas(); - self.parse_comma_separated_with_trailing_commas(|p| p.parse_select_item(), trailing_commas) + self.parse_comma_separated_with_trailing_commas( + |p| p.parse_select_item(), + trailing_commas, + None, + ) } pub fn parse_actions_list(&mut self) -> Result, ParserError> { @@ -3966,20 +3970,32 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse a list of [TableWithJoins] + fn parse_table_with_joins(&mut self) -> Result, ParserError> { + let trailing_commas = self.dialect.supports_from_trailing_commas(); + + self.parse_comma_separated_with_trailing_commas( + Parser::parse_table_and_joins, + trailing_commas, + Some(self.dialect.get_reserved_keywords_for_table_factor()), + ) + } + /// Parse the comma of a comma-separated syntax element. /// Allows for control over trailing commas /// Returns true if there is a next element - fn is_parse_comma_separated_end_with_trailing_commas(&mut self, trailing_commas: bool) -> bool { + fn is_parse_comma_separated_end_with_trailing_commas( + &mut self, + trailing_commas: bool, + reserved_keywords: Option<&[Keyword]>, + ) -> bool { + let reserved_keywords = reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS); if !self.consume_token(&Token::Comma) { true } else if trailing_commas { let token = self.peek_token().token; match token { - Token::Word(ref kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - true - } + Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) => true, Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { true } @@ -3993,7 +4009,7 @@ impl<'a> Parser<'a> { /// Parse the comma of a comma-separated syntax element. /// Returns true if there is a next element fn is_parse_comma_separated_end(&mut self) -> bool { - self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas) + self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas, None) } /// Parse a comma-separated list of 1+ items accepted by `F` @@ -4001,7 +4017,7 @@ impl<'a> Parser<'a> { where F: FnMut(&mut Parser<'a>) -> Result, { - self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas) + self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas, None) } /// Parse a comma-separated list of 1+ items accepted by `F` @@ -4010,6 +4026,7 @@ impl<'a> Parser<'a> { &mut self, mut f: F, trailing_commas: bool, + reserved_keywords: Option<&[Keyword]>, ) -> Result, ParserError> where F: FnMut(&mut Parser<'a>) -> Result, @@ -4017,7 +4034,10 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if self.is_parse_comma_separated_end_with_trailing_commas(trailing_commas) { + if self.is_parse_comma_separated_end_with_trailing_commas( + trailing_commas, + reserved_keywords, + ) { break; } } @@ -10073,7 +10093,7 @@ impl<'a> Parser<'a> { // or `from`. let from = if self.parse_keyword(Keyword::FROM) { - self.parse_comma_separated(Parser::parse_table_and_joins)? + self.parse_table_with_joins()? } else { vec![] }; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b5b12891f..07a30bc08 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12957,8 +12957,38 @@ fn parse_update_from_before_select() { parse_sql_statements(query).unwrap_err() ); } - #[test] fn parse_overlaps() { verified_stmt("SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')"); } + +#[test] +fn test_trailing_commas_in_from() { + let dialects = all_dialects_where(|d| d.supports_from_trailing_commas()); + dialects.verified_only_select_with_canonical("SELECT 1, 2 FROM t,", "SELECT 1, 2 FROM t"); + + dialects + .verified_only_select_with_canonical("SELECT 1, 2 FROM t1, t2,", "SELECT 1, 2 FROM t1, t2"); + + let sql = "SELECT a, FROM b, LIMIT 1"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + let sql = "INSERT INTO a SELECT b FROM c,"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + let sql = "SELECT a FROM b, HAVING COUNT(*) > 1"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + let sql = "SELECT a FROM b, WHERE c = 1"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + // nasted + let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + // multiple_subqueries + dialects.verified_only_select_with_canonical( + "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2),", + "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2)", + ); +}