From 2296de2bc432282a814d083f67dcb6503dd4ecc0 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 15 Jun 2023 09:10:56 -0400 Subject: [PATCH 001/109] Add fn support_group_by_expr to Dialect trait (#896) --- src/dialect/duckdb.rs | 4 ++++ src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 4 ++++ src/dialect/postgresql.rs | 4 ++++ src/parser.rs | 2 +- 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 55f258e5..4e6e9d9a 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -28,4 +28,8 @@ impl Dialect for DuckDbDialect { fn supports_filter_during_aggregation(&self) -> bool { true } + + fn supports_group_by_expr(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 8d6ccb5e..8310954c 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -28,4 +28,8 @@ impl Dialect for GenericDialect { || ch == '#' || ch == '_' } + + fn supports_group_by_expr(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 48357501..8b3a5888 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -113,6 +113,10 @@ pub trait Dialect: Debug + Any { fn supports_within_after_array_aggregation(&self) -> bool { false } + /// Returns true if the dialects supports `group sets, roll up, or cube` expressions. + fn supports_group_by_expr(&self) -> bool { + false + } /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 4ba6229e..d131ff9c 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -42,6 +42,10 @@ impl Dialect for PostgreSqlDialect { fn supports_filter_during_aggregation(&self) -> bool { true } + + fn supports_group_by_expr(&self) -> bool { + true + } } pub fn parse_comment(parser: &mut Parser) -> Result { diff --git a/src/parser.rs b/src/parser.rs index b89077fb..2ba95ad6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -975,7 +975,7 @@ impl<'a> Parser<'a> { /// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple /// expr. fn parse_group_by_expr(&mut self) -> Result { - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { + if self.dialect.supports_group_by_expr() { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { self.expect_token(&Token::LParen)?; let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; From 75f18ecfda9e345e2940ea50d2c15e207baf0e29 Mon Sep 17 00:00:00 2001 From: dawg Date: Wed, 21 Jun 2023 21:12:58 +0200 Subject: [PATCH 002/109] Add support for DuckDB's CREATE MACRO statements (#897) --- src/ast/mod.rs | 85 +++++++++++++++++++++++++++++++++++++++ src/keywords.rs | 1 + src/parser.rs | 51 +++++++++++++++++++++++ src/tokenizer.rs | 4 ++ tests/sqlparser_duckdb.rs | 67 ++++++++++++++++++++++++++++++ 5 files changed, 208 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7f3b4742..7afb576b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1580,6 +1580,19 @@ pub enum Statement { params: CreateFunctionBody, }, /// ```sql + /// CREATE MACRO + /// ``` + /// + /// Supported variants: + /// 1. [DuckDB](https://duckdb.org/docs/sql/statements/create_macro) + CreateMacro { + or_replace: bool, + temporary: bool, + name: ObjectName, + args: Option>, + definition: MacroDefinition, + }, + /// ```sql /// CREATE STAGE /// ``` /// See @@ -2098,6 +2111,28 @@ impl fmt::Display for Statement { write!(f, "{params}")?; Ok(()) } + Statement::CreateMacro { + or_replace, + temporary, + name, + args, + definition, + } => { + write!( + f, + "CREATE {or_replace}{temp}MACRO {name}", + temp = if *temporary { "TEMPORARY " } else { "" }, + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + )?; + if let Some(args) = args { + write!(f, "({})", display_comma_separated(args))?; + } + match definition { + MacroDefinition::Expr(expr) => write!(f, " AS {expr}")?, + MacroDefinition::Table(query) => write!(f, " AS TABLE {query}")?, + } + Ok(()) + } Statement::CreateView { name, or_replace, @@ -4304,6 +4339,56 @@ impl fmt::Display for CreateFunctionUsing { } } +/// `NAME = ` arguments for DuckDB macros +/// +/// See [Create Macro - DuckDB](https://duckdb.org/docs/sql/statements/create_macro) +/// for more details +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MacroArg { + pub name: Ident, + pub default_expr: Option, +} + +impl MacroArg { + /// Returns an argument with name. + pub fn new(name: &str) -> Self { + Self { + name: name.into(), + default_expr: None, + } + } +} + +impl fmt::Display for MacroArg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.name)?; + if let Some(default_expr) = &self.default_expr { + write!(f, " := {default_expr}")?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MacroDefinition { + Expr(Expr), + Table(Query), +} + +impl fmt::Display for MacroDefinition { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MacroDefinition::Expr(expr) => write!(f, "{expr}")?, + MacroDefinition::Table(query) => write!(f, "{query}")?, + } + Ok(()) + } +} + /// Schema possible naming variants ([1]). /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#schema-definition diff --git a/src/keywords.rs b/src/keywords.rs index e73b89a9..663818c7 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -347,6 +347,7 @@ define_keywords!( LOCKED, LOGIN, LOWER, + MACRO, MANAGEDLOCATION, MATCH, MATCHED, diff --git a/src/parser.rs b/src/parser.rs index 2ba95ad6..bdf9fda3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2346,6 +2346,8 @@ impl<'a> Parser<'a> { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_replace, temporary) + } else if self.parse_keyword(Keyword::MACRO) { + self.parse_create_macro(or_replace, temporary) } else if or_replace { self.expected( "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", @@ -2624,6 +2626,8 @@ impl<'a> Parser<'a> { return_type, params, }) + } else if dialect_of!(self is DuckDbDialect) { + self.parse_create_macro(or_replace, temporary) } else { self.prev_token(); self.expected("an object type after CREATE", self.peek_token()) @@ -2699,6 +2703,53 @@ impl<'a> Parser<'a> { } } + pub fn parse_create_macro( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + if dialect_of!(self is DuckDbDialect | GenericDialect) { + let name = self.parse_object_name()?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + self.prev_token(); + None + } else { + Some(self.parse_comma_separated(Parser::parse_macro_arg)?) + }; + + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::AS)?; + + Ok(Statement::CreateMacro { + or_replace, + temporary, + name, + args, + definition: if self.parse_keyword(Keyword::TABLE) { + MacroDefinition::Table(self.parse_query()?) + } else { + MacroDefinition::Expr(self.parse_expr()?) + }, + }) + } else { + self.prev_token(); + self.expected("an object type after CREATE", self.peek_token()) + } + } + + fn parse_macro_arg(&mut self) -> Result { + let name = self.parse_identifier()?; + + let default_expr = + if self.consume_token(&Token::DuckAssignment) || self.consume_token(&Token::RArrow) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(MacroArg { name, default_expr }) + } + pub fn parse_create_external_table( &mut self, or_replace: bool, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ffa1a96f..257a3517 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -114,6 +114,8 @@ pub enum Token { Colon, /// DoubleColon `::` (used for casting in postgresql) DoubleColon, + /// Assignment `:=` (used for keyword argument in DuckDB macros) + DuckAssignment, /// SemiColon `;` used as separator for COPY and payload SemiColon, /// Backslash `\` used in terminating the COPY payload with `\.` @@ -222,6 +224,7 @@ impl fmt::Display for Token { Token::Period => f.write_str("."), Token::Colon => f.write_str(":"), Token::DoubleColon => f.write_str("::"), + Token::DuckAssignment => f.write_str(":="), Token::SemiColon => f.write_str(";"), Token::Backslash => f.write_str("\\"), Token::LBracket => f.write_str("["), @@ -847,6 +850,7 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some(':') => self.consume_and_return(chars, Token::DoubleColon), + Some('=') => self.consume_and_return(chars, Token::DuckAssignment), _ => Ok(Some(Token::Colon)), } } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 1a4f04c3..bb60235a 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -68,3 +68,70 @@ fn test_select_wildcard_with_exclude() { fn parse_div_infix() { duckdb_and_generic().verified_stmt(r#"SELECT 5 // 2"#); } + +#[test] +fn test_create_macro() { + let macro_ = duckdb().verified_stmt("CREATE MACRO schema.add(a, b) AS a + b"); + let expected = Statement::CreateMacro { + or_replace: false, + temporary: false, + name: ObjectName(vec![Ident::new("schema"), Ident::new("add")]), + args: Some(vec![MacroArg::new("a"), MacroArg::new("b")]), + definition: MacroDefinition::Expr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Identifier(Ident::new("b"))), + }), + }; + assert_eq!(expected, macro_); +} + +#[test] +fn test_create_macro_default_args() { + let macro_ = duckdb().verified_stmt("CREATE MACRO add_default(a, b := 5) AS a + b"); + let expected = Statement::CreateMacro { + or_replace: false, + temporary: false, + name: ObjectName(vec![Ident::new("add_default")]), + args: Some(vec![ + MacroArg::new("a"), + MacroArg { + name: Ident::new("b"), + default_expr: Some(Expr::Value(Value::Number( + #[cfg(not(feature = "bigdecimal"))] + 5.to_string(), + #[cfg(feature = "bigdecimal")] + bigdecimal::BigDecimal::from(5), + false, + ))), + }, + ]), + definition: MacroDefinition::Expr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Identifier(Ident::new("b"))), + }), + }; + assert_eq!(expected, macro_); +} + +#[test] +fn test_create_table_macro() { + let query = "SELECT col1_value AS column1, col2_value AS column2 UNION ALL SELECT 'Hello' AS col1_value, 456 AS col2_value"; + let macro_ = duckdb().verified_stmt( + &("CREATE OR REPLACE TEMPORARY MACRO dynamic_table(col1_value, col2_value) AS TABLE " + .to_string() + + query), + ); + let expected = Statement::CreateMacro { + or_replace: true, + temporary: true, + name: ObjectName(vec![Ident::new("dynamic_table")]), + args: Some(vec![ + MacroArg::new("col1_value"), + MacroArg::new("col2_value"), + ]), + definition: MacroDefinition::Table(duckdb().verified_query(query)), + }; + assert_eq!(expected, macro_); +} From f72b5a5d9b9b6ae1650051f2d8fee91876279693 Mon Sep 17 00:00:00 2001 From: delsehi Date: Thu, 22 Jun 2023 17:09:14 +0200 Subject: [PATCH 003/109] Support basic CREATE PROCEDURE of MSSQL (#900) Co-authored-by: Andrew Lamb --- src/ast/ddl.rs | 13 +++++++ src/ast/mod.rs | 37 +++++++++++++++++- src/keywords.rs | 2 + src/parser.rs | 55 ++++++++++++++++++++++++++- tests/sqlparser_mssql.rs | 82 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 185 insertions(+), 4 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 3e428e21..a4640d55 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -489,6 +489,19 @@ impl fmt::Display for IndexType { } } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ProcedureParam { + pub name: Ident, + pub data_type: DataType, +} + +impl fmt::Display for ProcedureParam { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.name, self.data_type) + } +} /// SQL column definition #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7afb576b..9c55c5d6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -30,8 +30,8 @@ pub use self::data_type::{ }; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, - ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ReferentialAction, TableConstraint, - UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ProcedureParam, ReferentialAction, + TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ @@ -1580,6 +1580,15 @@ pub enum Statement { params: CreateFunctionBody, }, /// ```sql + /// CREATE PROCEDURE + /// ``` + CreateProcedure { + or_alter: bool, + name: ObjectName, + params: Option>, + body: Vec, + }, + /// ```sql /// CREATE MACRO /// ``` /// @@ -2111,6 +2120,30 @@ impl fmt::Display for Statement { write!(f, "{params}")?; Ok(()) } + Statement::CreateProcedure { + name, + or_alter, + params, + body, + } => { + write!( + f, + "CREATE {or_alter}PROCEDURE {name}", + or_alter = if *or_alter { "OR ALTER " } else { "" }, + name = name + )?; + + if let Some(p) = params { + if !p.is_empty() { + write!(f, " ({})", display_comma_separated(p))?; + } + } + write!( + f, + " AS BEGIN {body} END", + body = display_separated(body, "; ") + ) + } Statement::CreateMacro { or_replace, temporary, diff --git a/src/keywords.rs b/src/keywords.rs index 663818c7..a76a9c95 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -690,6 +690,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::SET, Keyword::QUALIFY, Keyword::WINDOW, + Keyword::END, ]; /// Can't be used as a column alias, so that `SELECT alias` @@ -719,4 +720,5 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, Keyword::INTO, + Keyword::END, ]; diff --git a/src/parser.rs b/src/parser.rs index bdf9fda3..89e47b15 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -346,9 +346,14 @@ impl<'a> Parser<'a> { expecting_statement_delimiter = false; } - if self.peek_token() == Token::EOF { - break; + match self.peek_token().token { + Token::EOF => break, + + // end of statement + Token::Word(word) if word.keyword == Keyword::END => break, + _ => {} } + if expecting_statement_delimiter { return self.expected("end of statement", self.peek_token()); } @@ -2324,6 +2329,7 @@ impl<'a> Parser<'a> { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); let global = self.parse_one_of_keywords(&[Keyword::GLOBAL]).is_some(); let transient = self.parse_one_of_keywords(&[Keyword::TRANSIENT]).is_some(); @@ -2369,6 +2375,8 @@ impl<'a> Parser<'a> { self.parse_create_sequence(temporary) } else if self.parse_keyword(Keyword::TYPE) { self.parse_create_type() + } else if self.parse_keyword(Keyword::PROCEDURE) { + self.parse_create_procedure(or_alter) } else { self.expected("an object type after CREATE", self.peek_token()) } @@ -3503,6 +3511,28 @@ impl<'a> Parser<'a> { .build()) } + pub fn parse_optional_procedure_parameters( + &mut self, + ) -> Result>, ParserError> { + let mut params = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok(Some(params)); + } + loop { + if let Token::Word(_) = self.peek_token().token { + params.push(self.parse_procedure_param()?) + } + let comma = self.consume_token(&Token::Comma); + if self.consume_token(&Token::RParen) { + // allow a trailing comma, even though it's not in standard + break; + } else if !comma { + return self.expected("',' or ')' after parameter definition", self.peek_token()); + } + } + Ok(Some(params)) + } + pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { let mut columns = vec![]; let mut constraints = vec![]; @@ -3530,6 +3560,12 @@ impl<'a> Parser<'a> { Ok((columns, constraints)) } + pub fn parse_procedure_param(&mut self) -> Result { + let name = self.parse_identifier()?; + let data_type = self.parse_data_type()?; + Ok(ProcedureParam { name, data_type }) + } + pub fn parse_column_def(&mut self) -> Result { let name = self.parse_identifier()?; let data_type = self.parse_data_type()?; @@ -7082,6 +7118,21 @@ impl<'a> Parser<'a> { Ok(NamedWindowDefinition(ident, window_spec)) } + pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { + let name = self.parse_object_name()?; + let params = self.parse_optional_procedure_parameters()?; + self.expect_keyword(Keyword::AS)?; + self.expect_keyword(Keyword::BEGIN)?; + let statements = self.parse_statements()?; + self.expect_keyword(Keyword::END)?; + Ok(Statement::CreateProcedure { + name, + or_alter, + params, + body: statements, + }) + } + pub fn parse_window_spec(&mut self) -> Result { let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { self.parse_comma_separated(Parser::parse_expr)? diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 53db3323..8fc3dcd5 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -55,6 +55,88 @@ fn parse_mssql_delimited_identifiers() { ); } +#[test] +fn parse_create_procedure() { + let sql = "CREATE OR ALTER PROCEDURE test (@foo INT, @bar VARCHAR(256)) AS BEGIN SELECT 1 END"; + + #[cfg(feature = "bigdecimal")] + let one = Value::Number(bigdecimal::BigDecimal::from(1), false); + + #[cfg(not(feature = "bigdecimal"))] + let one = Value::Number("1".to_string(), false); + + assert_eq!( + ms().verified_stmt(sql), + Statement::CreateProcedure { + or_alter: true, + body: vec![Statement::Query(Box::new(Query { + with: None, + limit: None, + offset: None, + fetch: None, + locks: vec![], + order_by: vec![], + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Value(one))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None + }))) + }))], + params: Some(vec![ + ProcedureParam { + name: Ident { + value: "@foo".into(), + quote_style: None + }, + data_type: DataType::Int(None) + }, + ProcedureParam { + name: Ident { + value: "@bar".into(), + quote_style: None + }, + data_type: DataType::Varchar(Some(CharacterLength { + length: 256, + unit: None + })) + } + ]), + name: ObjectName(vec![Ident { + value: "test".into(), + quote_style: None + }]) + } + ) +} + +#[test] +fn parse_mssql_create_procedure() { + let _ = ms_and_generic().verified_stmt("CREATE OR ALTER PROCEDURE foo AS BEGIN SELECT 1 END"); + let _ = ms_and_generic().verified_stmt("CREATE PROCEDURE foo AS BEGIN SELECT 1 END"); + let _ = ms().verified_stmt( + "CREATE PROCEDURE foo AS BEGIN SELECT [myColumn] FROM [myschema].[mytable] END", + ); + let _ = ms_and_generic().verified_stmt( + "CREATE PROCEDURE foo (@CustomerName NVARCHAR(50)) AS BEGIN SELECT * FROM DEV END", + ); + let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test' END"); + // Test a statement with END in it + let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN SELECT [foo], CASE WHEN [foo] IS NULL THEN 'empty' ELSE 'notempty' END AS [foo] END"); + // Multiple statements + let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10 END"); +} + #[test] fn parse_mssql_apply_join() { let _ = ms_and_generic().verified_only_select( From 8877cbafa6d928dffd7677b45e3ff4a500c45f15 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 22 Jun 2023 18:15:31 +0300 Subject: [PATCH 004/109] fix: unary negation operator with operators: `Mul`, `Div` and `Mod` (#902) --- src/parser.rs | 9 ++++----- tests/sqlparser_common.rs | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 89e47b15..a3f06047 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -799,7 +799,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::UnaryOp { op, - expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?), }) } tok @ Token::DoubleExclamationMark @@ -1964,6 +1964,7 @@ impl<'a> Parser<'a> { } // use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference + const MUL_DIV_MOD_OP_PREC: u8 = 40; const PLUS_MINUS_PREC: u8 = 30; const XOR_PREC: u8 = 24; const TIME_ZONE_PREC: u8 = 20; @@ -1974,8 +1975,6 @@ impl<'a> Parser<'a> { const AND_PREC: u8 = 10; const OR_PREC: u8 = 5; - const DIV_OP_PREC: u8 = 40; - /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { // allow the dialect to override precedence logic @@ -2025,7 +2024,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::DIV_OP_PREC), + Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), Token::Eq | Token::Lt | Token::LtEq @@ -2043,7 +2042,7 @@ impl<'a> Parser<'a> { Token::Ampersand => Ok(23), Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { - Ok(40) + Ok(Self::MUL_DIV_MOD_OP_PREC) } Token::DoubleColon => Ok(50), Token::Colon => Ok(50), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ad350705..917a89d8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1074,7 +1074,7 @@ fn parse_compound_expr_2() { } #[test] -fn parse_unary_math() { +fn parse_unary_math_with_plus() { use self::Expr::*; let sql = "-a + -b"; assert_eq!( @@ -1093,6 +1093,26 @@ fn parse_unary_math() { ); } +#[test] +fn parse_unary_math_with_multiply() { + use self::Expr::*; + let sql = "-a * -b"; + assert_eq!( + BinaryOp { + left: Box::new(UnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(Identifier(Ident::new("a"))), + }), + op: BinaryOperator::Multiply, + right: Box::new(UnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(Identifier(Ident::new("b"))), + }), + }, + verified_expr(sql) + ); +} + #[test] fn parse_is_null() { use self::Expr::*; From 04c9fbaead8773409e27ed89f9198bcbe97abd07 Mon Sep 17 00:00:00 2001 From: parkma99 <84610851+parkma99@users.noreply.github.com> Date: Fri, 23 Jun 2023 22:48:04 +0800 Subject: [PATCH 005/109] update parse STRICT tables (#903) Co-authored-by: Andrew Lamb --- src/ast/helpers/stmt_create_table.rs | 10 ++++++++++ src/ast/mod.rs | 9 ++++++++- src/keywords.rs | 1 + src/parser.rs | 2 ++ tests/sqlparser_sqlite.rs | 9 +++++++++ 5 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index cb1ad45e..2998935d 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -70,6 +70,7 @@ pub struct CreateTableBuilder { pub on_commit: Option, pub on_cluster: Option, pub order_by: Option>, + pub strict: bool, } impl CreateTableBuilder { @@ -100,6 +101,7 @@ impl CreateTableBuilder { on_commit: None, on_cluster: None, order_by: None, + strict: false, } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -220,6 +222,11 @@ impl CreateTableBuilder { self } + pub fn strict(mut self, strict: bool) -> Self { + self.strict = strict; + self + } + pub fn build(self) -> Statement { Statement::CreateTable { or_replace: self.or_replace, @@ -247,6 +254,7 @@ impl CreateTableBuilder { on_commit: self.on_commit, on_cluster: self.on_cluster, order_by: self.order_by, + strict: self.strict, } } } @@ -284,6 +292,7 @@ impl TryFrom for CreateTableBuilder { on_commit, on_cluster, order_by, + strict, } => Ok(Self { or_replace, temporary, @@ -310,6 +319,7 @@ impl TryFrom for CreateTableBuilder { on_commit, on_cluster, order_by, + strict, }), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9c55c5d6..5652cce1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1316,6 +1316,10 @@ pub enum Statement { /// than empty (represented as ()), the latter meaning "no sorting". /// order_by: Option>, + /// SQLite "STRICT" clause. + /// if the "STRICT" table-option keyword is added to the end, after the closing ")", + /// then strict typing rules apply to that table. + strict: bool, }, /// SQLite's `CREATE VIRTUAL TABLE .. USING ()` CreateVirtualTable { @@ -2219,6 +2223,7 @@ impl fmt::Display for Statement { on_commit, on_cluster, order_by, + strict, } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: @@ -2387,7 +2392,9 @@ impl fmt::Display for Statement { }; write!(f, " {on_commit}")?; } - + if *strict { + write!(f, " STRICT")?; + } Ok(()) } Statement::CreateVirtualTable { diff --git a/src/keywords.rs b/src/keywords.rs index a76a9c95..46389980 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -551,6 +551,7 @@ define_keywords!( STDOUT, STORAGE_INTEGRATION, STORED, + STRICT, STRING, SUBMULTISET, SUBSTRING, diff --git a/src/parser.rs b/src/parser.rs index a3f06047..0b142146 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3485,6 +3485,7 @@ impl<'a> Parser<'a> { None }; + let strict = self.parse_keyword(Keyword::STRICT); Ok(CreateTableBuilder::new(table_name) .temporary(temporary) .columns(columns) @@ -3507,6 +3508,7 @@ impl<'a> Parser<'a> { .collation(collation) .on_commit(on_commit) .on_cluster(on_cluster) + .strict(strict) .build()) } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 31d2dd97..8f6cc757 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -242,6 +242,15 @@ fn parse_similar_to() { chk(true); } +#[test] +fn parse_create_table_with_strict() { + let sql = "CREATE TABLE Fruits (id TEXT NOT NULL PRIMARY KEY) STRICT"; + if let Statement::CreateTable { name, strict, .. } = sqlite().verified_stmt(sql) { + assert_eq!(name.to_string(), "Fruits"); + assert!(strict); + } +} + fn sqlite() -> TestedDialects { TestedDialects { dialects: vec![Box::new(SQLiteDialect {})], From 631eddad78d102da21e7dfc690974137398e031f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 23 Jun 2023 10:53:18 -0400 Subject: [PATCH 006/109] Update CHANGELOG.md for version `0.35.0` (#904) --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4bd16e8..7f52f7fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,22 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. + +## [0.35.0] 2023-06-23 + +### Added +* Support `CREATE PROCEDURE` of MSSQL (#900) - Thanks @delsehi +* Support DuckDB's `CREATE MACRO` statements (#897) - Thanks @MartinNowak +* Support for `CREATE TYPE (AS)` statements (#888) - Thanks @srijs +* Support `STRICT` tables of sqlite (#903) - Thanks @parkma99 + +### Fixed +* Fixed precedence of unary negation operator with operators: Mul, Div and Mod (#902) - Thanks @izveigor + +### Changed +* Add `support_group_by_expr` to `Dialect` trait (#896) - Thanks @jdye64 +* Update criterion requirement from `0.4` to `0.5` in `/sqlparser_bench` (#890) - Thanks @dependabot (!!) + ## [0.34.0] 2023-05-19 ### Added From efd8cb7fd1c4e6654b101868f7a76d80f66da74b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 23 Jun 2023 10:54:52 -0400 Subject: [PATCH 007/109] chore: Release sqlparser version 0.35.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4a4c8ce4..eea1fe78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.34.0" +version = "0.35.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 9effeba0d836e9659425d8de026e15cca97e3cd0 Mon Sep 17 00:00:00 2001 From: Robert Pack <42610831+roeap@users.noreply.github.com> Date: Thu, 29 Jun 2023 19:30:21 +0200 Subject: [PATCH 008/109] feat: add deltalake keywords (#906) --- src/keywords.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/keywords.rs b/src/keywords.rs index 46389980..32556c6d 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -110,6 +110,7 @@ define_keywords!( BIGNUMERIC, BINARY, BLOB, + BLOOMFILTER, BOOLEAN, BOTH, BTREE, @@ -201,10 +202,12 @@ define_keywords!( DELETE, DELIMITED, DELIMITER, + DELTA, DENSE_RANK, DEREF, DESC, DESCRIBE, + DETAIL, DETERMINISTIC, DIRECTORY, DISCARD, @@ -217,6 +220,7 @@ define_keywords!( DOW, DOY, DROP, + DRY, DUPLICATE, DYNAMIC, EACH, @@ -273,11 +277,13 @@ define_keywords!( FREE, FREEZE, FROM, + FSCK, FULL, FULLTEXT, FUNCTION, FUNCTIONS, FUSION, + GENERATE, GENERATED, GET, GLOBAL, @@ -290,9 +296,11 @@ define_keywords!( HASH, HAVING, HEADER, + HISTORY, HIVEVAR, HOLD, HOUR, + HOURS, IDENTITY, IF, IGNORE, @@ -416,6 +424,7 @@ define_keywords!( ONLY, OPEN, OPERATOR, + OPTIMIZE, OPTION, OPTIONS, OR, @@ -487,12 +496,14 @@ define_keywords!( RELATIVE, RELEASE, RENAME, + REORG, REPAIR, REPEATABLE, REPLACE, REPLICATION, RESTRICT, RESULT, + RETAIN, RETURN, RETURNING, RETURNS, @@ -505,6 +516,7 @@ define_keywords!( ROWID, ROWS, ROW_NUMBER, + RUN, SAFE_CAST, SAVEPOINT, SCHEMA, @@ -619,6 +631,7 @@ define_keywords!( USER, USING, UUID, + VACUUM, VALID, VALIDATION_MODE, VALUE, @@ -648,7 +661,8 @@ define_keywords!( WRITE, XOR, YEAR, - ZONE + ZONE, + ZORDER ); /// These keywords can't be used as a table alias, so that `FROM table_name alias` From f05f71e20dc54f14cac80a638e2ed252b6fca91c Mon Sep 17 00:00:00 2001 From: liadgiladi <51291468+liadgiladi@users.noreply.github.com> Date: Thu, 29 Jun 2023 20:33:51 +0300 Subject: [PATCH 009/109] Support `ALTER VIEW`, MySQL syntax (#907) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 24 +++++++++++++++ src/parser.rs | 21 +++++++++++++- tests/sqlparser_common.rs | 61 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5652cce1..1cf5a768 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1375,6 +1375,15 @@ pub enum Statement { name: ObjectName, operation: AlterIndexOperation, }, + /// ALTER VIEW + AlterView { + /// View name + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + name: ObjectName, + columns: Vec, + query: Box, + with_options: Vec, + }, /// DROP Drop { /// The type of the object to drop: TABLE, VIEW, etc. @@ -2534,6 +2543,21 @@ impl fmt::Display for Statement { Statement::AlterIndex { name, operation } => { write!(f, "ALTER INDEX {name} {operation}") } + Statement::AlterView { + name, + columns, + query, + with_options, + } => { + write!(f, "ALTER VIEW {name}")?; + if !with_options.is_empty() { + write!(f, " WITH ({})", display_comma_separated(with_options))?; + } + if !columns.is_empty() { + write!(f, " ({})", display_comma_separated(columns))?; + } + write!(f, " AS {query}") + } Statement::Drop { object_type, if_exists, diff --git a/src/parser.rs b/src/parser.rs index 0b142146..ff6c5c54 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3898,8 +3898,10 @@ impl<'a> Parser<'a> { } pub fn parse_alter(&mut self) -> Result { - let object_type = self.expect_one_of_keywords(&[Keyword::TABLE, Keyword::INDEX])?; + let object_type = + self.expect_one_of_keywords(&[Keyword::VIEW, Keyword::TABLE, Keyword::INDEX])?; match object_type { + Keyword::VIEW => self.parse_alter_view(), Keyword::TABLE => { let _ = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name()?; @@ -4097,6 +4099,23 @@ impl<'a> Parser<'a> { } } + pub fn parse_alter_view(&mut self) -> Result { + let name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(Optional, false)?; + + let with_options = self.parse_options(Keyword::WITH)?; + + self.expect_keyword(Keyword::AS)?; + let query = Box::new(self.parse_query()?); + + Ok(Statement::AlterView { + name, + columns, + query, + with_options, + }) + } + /// Parse a copy statement pub fn parse_copy(&mut self) -> Result { let source; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 917a89d8..fa2574ab 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2869,6 +2869,67 @@ fn parse_alter_index() { }; } +#[test] +fn parse_alter_view() { + let sql = "ALTER VIEW myschema.myview AS SELECT foo FROM bar"; + match verified_stmt(sql) { + Statement::AlterView { + name, + columns, + query, + with_options, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert_eq!(with_options, vec![]); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_alter_view_with_options() { + let sql = "ALTER VIEW v WITH (foo = 'bar', a = 123) AS SELECT 1"; + match verified_stmt(sql) { + Statement::AlterView { with_options, .. } => { + assert_eq!( + vec![ + SqlOption { + name: "foo".into(), + value: Value::SingleQuotedString("bar".into()), + }, + SqlOption { + name: "a".into(), + value: number("123"), + }, + ], + with_options + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_alter_view_with_columns() { + let sql = "ALTER VIEW v (has, cols) AS SELECT 1, 2"; + match verified_stmt(sql) { + Statement::AlterView { + name, + columns, + query, + with_options, + } => { + assert_eq!("v", name.to_string()); + assert_eq!(columns, vec![Ident::new("has"), Ident::new("cols")]); + assert_eq!("SELECT 1, 2", query.to_string()); + assert_eq!(with_options, vec![]); + } + _ => unreachable!(), + } +} + #[test] fn parse_alter_table_add_column() { match verified_stmt("ALTER TABLE tab ADD foo TEXT") { From 20ac38b4da23d53249caf438c87f826bb58ff487 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Sat, 1 Jul 2023 04:50:46 +0800 Subject: [PATCH 010/109] Support multi args for unnest (#909) Signed-off-by: jayzhan211 Co-authored-by: Andrew Lamb --- src/ast/query.rs | 7 +-- src/parser.rs | 4 +- src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 4 +- tests/sqlparser_common.rs | 93 +++++++++++++++++++++++++++++++++++-- tests/sqlparser_postgres.rs | 3 +- 6 files changed, 98 insertions(+), 14 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index a709e101..fe650e6b 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -677,7 +677,7 @@ pub enum TableFactor { /// ``` UNNEST { alias: Option, - array_expr: Box, + array_exprs: Vec, with_offset: bool, with_offset_alias: Option, }, @@ -749,11 +749,12 @@ impl fmt::Display for TableFactor { } TableFactor::UNNEST { alias, - array_expr, + array_exprs, with_offset, with_offset_alias, } => { - write!(f, "UNNEST({array_expr})")?; + write!(f, "UNNEST({})", display_comma_separated(array_exprs))?; + if let Some(alias) = alias { write!(f, " AS {alias}")?; } diff --git a/src/parser.rs b/src/parser.rs index ff6c5c54..d907a611 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5999,7 +5999,7 @@ impl<'a> Parser<'a> { && self.parse_keyword(Keyword::UNNEST) { self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; + let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { @@ -6025,7 +6025,7 @@ impl<'a> Parser<'a> { Ok(TableFactor::UNNEST { alias, - array_expr: Box::new(expr), + array_exprs, with_offset, with_offset_alias, }) diff --git a/src/test_utils.rs b/src/test_utils.rs index 57b21e1c..e825db97 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -197,6 +197,7 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } } +/// Creates a `Value::Number`, panic'ing if n is not a number pub fn number(n: &'static str) -> Value { Value::Number(n.parse().unwrap(), false) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 11998ae6..4eb38113 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -160,10 +160,10 @@ fn parse_join_constraint_unnest_alias() { vec![Join { relation: TableFactor::UNNEST { alias: table_alias("f"), - array_expr: Box::new(Expr::CompoundIdentifier(vec![ + array_exprs: vec![Expr::CompoundIdentifier(vec![ Ident::new("t1"), Ident::new("a") - ])), + ])], with_offset: false, with_offset_alias: None }, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index fa2574ab..f025216a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4130,7 +4130,16 @@ fn parse_table_function() { #[test] fn parse_unnest() { + let sql = "SELECT UNNEST(make_array(1, 2, 3))"; + one_statement_parses_to(sql, sql); + let sql = "SELECT UNNEST(make_array(1, 2, 3), make_array(4, 5))"; + one_statement_parses_to(sql, sql); +} + +#[test] +fn parse_unnest_in_from_clause() { fn chk( + array_exprs: &str, alias: bool, with_offset: bool, with_offset_alias: bool, @@ -4138,7 +4147,8 @@ fn parse_unnest() { want: Vec, ) { let sql = &format!( - "SELECT * FROM UNNEST(expr){}{}{}", + "SELECT * FROM UNNEST({}){}{}{}", + array_exprs, if alias { " AS numbers" } else { "" }, if with_offset { " WITH OFFSET" } else { "" }, if with_offset_alias { @@ -4156,6 +4166,7 @@ fn parse_unnest() { }; // 1. both Alias and WITH OFFSET clauses. chk( + "expr", true, true, false, @@ -4166,7 +4177,7 @@ fn parse_unnest() { name: Ident::new("numbers"), columns: vec![], }), - array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, }, @@ -4175,6 +4186,7 @@ fn parse_unnest() { ); // 2. neither Alias nor WITH OFFSET clause. chk( + "expr", false, false, false, @@ -4182,7 +4194,7 @@ fn parse_unnest() { vec![TableWithJoins { relation: TableFactor::UNNEST { alias: None, - array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, }, @@ -4191,6 +4203,7 @@ fn parse_unnest() { ); // 3. Alias but no WITH OFFSET clause. chk( + "expr", false, true, false, @@ -4198,7 +4211,7 @@ fn parse_unnest() { vec![TableWithJoins { relation: TableFactor::UNNEST { alias: None, - array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, }, @@ -4207,6 +4220,7 @@ fn parse_unnest() { ); // 4. WITH OFFSET but no Alias. chk( + "expr", true, false, false, @@ -4217,13 +4231,82 @@ fn parse_unnest() { name: Ident::new("numbers"), columns: vec![], }), - array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + array_exprs: vec![Expr::Identifier(Ident::new("expr"))], + with_offset: false, + with_offset_alias: None, + }, + joins: vec![], + }], + ); + // 5. Simple array + chk( + "make_array(1, 2, 3)", + false, + false, + false, + &dialects, + vec![TableWithJoins { + relation: TableFactor::UNNEST { + alias: None, + array_exprs: vec![Expr::Function(Function { + name: ObjectName(vec![Ident::new("make_array")]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("1")))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), + ], + over: None, + distinct: false, + special: false, + order_by: vec![], + })], with_offset: false, with_offset_alias: None, }, joins: vec![], }], ); + // 6. Multiple arrays + chk( + "make_array(1, 2, 3), make_array(5, 6)", + false, + false, + false, + &dialects, + vec![TableWithJoins { + relation: TableFactor::UNNEST { + alias: None, + array_exprs: vec![ + Expr::Function(Function { + name: ObjectName(vec![Ident::new("make_array")]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("1")))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), + ], + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + Expr::Function(Function { + name: ObjectName(vec![Ident::new("make_array")]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("5")))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("6")))), + ], + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + ], + with_offset: false, + with_offset_alias: None, + }, + joins: vec![], + }], + ) } #[test] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 80a4261e..6b8bc64d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2407,7 +2407,7 @@ fn parse_create_role() { in_role, in_group, role, - user, + user: _, admin, authorization_owner, }], @@ -2435,7 +2435,6 @@ fn parse_create_role() { assert_eq_vec(&["role1", "role2"], in_role); assert!(in_group.is_empty()); assert_eq_vec(&["role3"], role); - assert!(user.is_empty()); assert_eq_vec(&["role4", "role5"], admin); assert_eq!(*authorization_owner, None); } From a50671d95d34688529fbc3d4452bec164ff21497 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 6 Jul 2023 16:27:18 +0300 Subject: [PATCH 011/109] feat: support PGOverlap operator (#912) --- src/ast/operator.rs | 2 ++ src/parser.rs | 4 ++++ src/tokenizer.rs | 14 ++++++++++++-- tests/sqlparser_postgres.rs | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index b988265b..b60b5135 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -95,6 +95,7 @@ pub enum BinaryOperator { PGBitwiseShiftLeft, PGBitwiseShiftRight, PGExp, + PGOverlap, PGRegexMatch, PGRegexIMatch, PGRegexNotMatch, @@ -135,6 +136,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::PGBitwiseShiftLeft => f.write_str("<<"), BinaryOperator::PGBitwiseShiftRight => f.write_str(">>"), BinaryOperator::PGExp => f.write_str("^"), + BinaryOperator::PGOverlap => f.write_str("&&"), BinaryOperator::PGRegexMatch => f.write_str("~"), BinaryOperator::PGRegexIMatch => f.write_str("~*"), BinaryOperator::PGRegexNotMatch => f.write_str("!~"), diff --git a/src/parser.rs b/src/parser.rs index d907a611..d9b85fcc 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1662,6 +1662,9 @@ impl<'a> Parser<'a> { Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { Some(BinaryOperator::PGBitwiseXor) } + Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Some(BinaryOperator::PGOverlap) + } Token::Tilde => Some(BinaryOperator::PGRegexMatch), Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch), Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch), @@ -2050,6 +2053,7 @@ impl<'a> Parser<'a> { Token::LBracket | Token::LongArrow | Token::Arrow + | Token::Overlap | Token::HashArrow | Token::HashLongArrow | Token::AtArrow diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 257a3517..f8e6793f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -150,6 +150,8 @@ pub enum Token { ShiftLeft, /// `>>`, a bitwise shift right operator in PostgreSQL ShiftRight, + /// '&&', an overlap operator in PostgreSQL + Overlap, /// Exclamation Mark `!` used for PostgreSQL factorial operator ExclamationMark, /// Double Exclamation Mark `!!` used for PostgreSQL prefix factorial operator @@ -158,7 +160,7 @@ pub enum Token { AtSign, /// `|/`, a square root math operator in PostgreSQL PGSquareRoot, - /// `||/` , a cube root math operator in PostgreSQL + /// `||/`, a cube root math operator in PostgreSQL PGCubeRoot, /// `?` or `$` , a prepared statement arg placeholder Placeholder(String), @@ -245,6 +247,7 @@ impl fmt::Display for Token { Token::AtSign => f.write_str("@"), Token::ShiftLeft => f.write_str("<<"), Token::ShiftRight => f.write_str(">>"), + Token::Overlap => f.write_str("&&"), Token::PGSquareRoot => f.write_str("|/"), Token::PGCubeRoot => f.write_str("||/"), Token::Placeholder(ref s) => write!(f, "{s}"), @@ -858,7 +861,14 @@ impl<'a> Tokenizer<'a> { '\\' => self.consume_and_return(chars, Token::Backslash), '[' => self.consume_and_return(chars, Token::LBracket), ']' => self.consume_and_return(chars, Token::RBracket), - '&' => self.consume_and_return(chars, Token::Ampersand), + '&' => { + chars.next(); // consume the '&' + match chars.peek() { + Some('&') => self.consume_and_return(chars, Token::Overlap), + // Bitshift '&' operator + _ => Ok(Some(Token::Ampersand)), + } + } '^' => self.consume_and_return(chars, Token::Caret), '{' => self.consume_and_return(chars, Token::LBrace), '}' => self.consume_and_return(chars, Token::RBrace), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6b8bc64d..b29081ab 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1613,6 +1613,7 @@ fn parse_pg_binary_ops() { ("^", BinaryOperator::PGExp, pg()), (">>", BinaryOperator::PGBitwiseShiftRight, pg_and_generic()), ("<<", BinaryOperator::PGBitwiseShiftLeft, pg_and_generic()), + ("&&", BinaryOperator::PGOverlap, pg()), ]; for (str_op, op, dialects) in binary_ops { From 4efe55dd8ac440ac370752e0e7095b3e62d73d2c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 17 Jul 2023 14:19:51 -0400 Subject: [PATCH 012/109] Remove most instances of `#[cfg(feature(bigdecimal))]` in tests (#910) --- src/ast/value.rs | 3 +++ src/test_utils.rs | 2 +- tests/sqlparser_bigquery.rs | 21 +++----------------- tests/sqlparser_common.rs | 1 - tests/sqlparser_duckdb.rs | 8 +------- tests/sqlparser_mssql.rs | 8 +------- tests/sqlparser_mysql.rs | 38 ++++++++++++++----------------------- tests/sqlparser_postgres.rs | 37 +++++------------------------------- 8 files changed, 28 insertions(+), 90 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 95ea978d..491553ca 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -32,6 +32,9 @@ pub enum Value { #[cfg(not(feature = "bigdecimal"))] Number(String, bool), #[cfg(feature = "bigdecimal")] + // HINT: use `test_utils::number` to make an instance of + // Value::Number This might help if you your tests pass locally + // but fail on CI with the `--all-features` flag enabled Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), diff --git a/src/test_utils.rs b/src/test_utils.rs index e825db97..47fb00d5 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -198,7 +198,7 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } /// Creates a `Value::Number`, panic'ing if n is not a number -pub fn number(n: &'static str) -> Value { +pub fn number(n: &str) -> Value { Value::Number(n.parse().unwrap(), false) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 4eb38113..bbe1a6e9 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -17,11 +17,6 @@ use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; use test_utils::*; -#[cfg(feature = "bigdecimal")] -use bigdecimal::*; -#[cfg(feature = "bigdecimal")] -use std::str::FromStr; - #[test] fn parse_literal_string() { let sql = r#"SELECT 'single', "double""#; @@ -377,22 +372,13 @@ fn test_select_wildcard_with_replace() { expr: Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("quantity"))), op: BinaryOperator::Divide, - #[cfg(not(feature = "bigdecimal"))] - right: Box::new(Expr::Value(Value::Number("2".to_string(), false))), - #[cfg(feature = "bigdecimal")] - right: Box::new(Expr::Value(Value::Number( - BigDecimal::from_str("2").unwrap(), - false, - ))), + right: Box::new(Expr::Value(number("2"))), }, column_name: Ident::new("quantity"), as_keyword: true, }), Box::new(ReplaceSelectElement { - #[cfg(not(feature = "bigdecimal"))] - expr: Expr::Value(Value::Number("3".to_string(), false)), - #[cfg(feature = "bigdecimal")] - expr: Expr::Value(Value::Number(BigDecimal::from_str("3").unwrap(), false)), + expr: Expr::Value(number("3")), column_name: Ident::new("order_id"), as_keyword: true, }), @@ -421,7 +407,6 @@ fn bigquery_and_generic() -> TestedDialects { fn parse_map_access_offset() { let sql = "SELECT d[offset(0)]"; let _select = bigquery().verified_only_select(sql); - #[cfg(not(feature = "bigdecimal"))] assert_eq!( _select.projection[0], SelectItem::UnnamedExpr(Expr::MapAccess { @@ -432,7 +417,7 @@ fn parse_map_access_offset() { keys: vec![Expr::Function(Function { name: ObjectName(vec!["offset".into()]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - Value::Number("0".into(), false) + number("0") ))),], over: None, distinct: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f025216a..4fe1a57b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1731,7 +1731,6 @@ fn parse_select_having() { assert!(select.having.is_some()); } -#[cfg(feature = "bigdecimal")] #[test] fn parse_select_qualify() { let sql = "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1"; diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index bb60235a..fabb9790 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -97,13 +97,7 @@ fn test_create_macro_default_args() { MacroArg::new("a"), MacroArg { name: Ident::new("b"), - default_expr: Some(Expr::Value(Value::Number( - #[cfg(not(feature = "bigdecimal"))] - 5.to_string(), - #[cfg(feature = "bigdecimal")] - bigdecimal::BigDecimal::from(5), - false, - ))), + default_expr: Some(Expr::Value(number("5"))), }, ]), definition: MacroDefinition::Expr(Expr::BinaryOp { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 8fc3dcd5..b46a0c6c 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -59,12 +59,6 @@ fn parse_mssql_delimited_identifiers() { fn parse_create_procedure() { let sql = "CREATE OR ALTER PROCEDURE test (@foo INT, @bar VARCHAR(256)) AS BEGIN SELECT 1 END"; - #[cfg(feature = "bigdecimal")] - let one = Value::Number(bigdecimal::BigDecimal::from(1), false); - - #[cfg(not(feature = "bigdecimal"))] - let one = Value::Number("1".to_string(), false); - assert_eq!( ms().verified_stmt(sql), Statement::CreateProcedure { @@ -79,7 +73,7 @@ fn parse_create_procedure() { body: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::Value(one))], + projection: vec![SelectItem::UnnamedExpr(Expr::Value(number("1")))], into: None, from: vec![], lateral_views: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3e5c810e..12d2cc73 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -259,13 +259,7 @@ fn parse_set_variables() { local: true, hivevar: false, variable: ObjectName(vec!["autocommit".into()]), - value: vec![Expr::Value(Value::Number( - #[cfg(not(feature = "bigdecimal"))] - "1".to_string(), - #[cfg(feature = "bigdecimal")] - bigdecimal::BigDecimal::from(1), - false - ))], + value: vec![Expr::Value(number("1"))], } ); } @@ -643,7 +637,6 @@ fn parse_create_table_unsigned() { } #[test] -#[cfg(not(feature = "bigdecimal"))] fn parse_simple_insert() { let sql = r"INSERT INTO tasks (title, priority) VALUES ('Test Some Inserts', 1), ('Test Entry 2', 2), ('Test Entry 3', 3)"; @@ -668,15 +661,15 @@ fn parse_simple_insert() { Expr::Value(Value::SingleQuotedString( "Test Some Inserts".to_string() )), - Expr::Value(Value::Number("1".to_string(), false)) + Expr::Value(number("1")) ], vec![ Expr::Value(Value::SingleQuotedString("Test Entry 2".to_string())), - Expr::Value(Value::Number("2".to_string(), false)) + Expr::Value(number("2")) ], vec![ Expr::Value(Value::SingleQuotedString("Test Entry 3".to_string())), - Expr::Value(Value::Number("3".to_string(), false)) + Expr::Value(number("3")) ] ] })), @@ -895,6 +888,13 @@ fn parse_select_with_numeric_prefix_column_name() { } } +// Don't run with bigdecimal as it fails like this on rust beta: +// +// 'parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column' +// panicked at 'assertion failed: `(left == right)` +// +// left: `"SELECT 123e4, 123col_$@123abc FROM \"table\""`, +// right: `"SELECT 1230000, 123col_$@123abc FROM \"table\""`', src/test_utils.rs:114:13 #[cfg(not(feature = "bigdecimal"))] #[test] fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { @@ -907,10 +907,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { distinct: None, top: None, projection: vec![ - SelectItem::UnnamedExpr(Expr::Value(Value::Number( - "123e4".to_string(), - false - ))), + SelectItem::UnnamedExpr(Expr::Value(number("123e4"))), SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("123col_$@123abc"))) ], into: None, @@ -1072,7 +1069,6 @@ fn parse_alter_table_change_column() { } #[test] -#[cfg(not(feature = "bigdecimal"))] fn parse_substring_in_select() { let sql = "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test"; match mysql().one_statement_parses_to( @@ -1091,14 +1087,8 @@ fn parse_substring_in_select() { value: "description".to_string(), quote_style: None })), - substring_from: Some(Box::new(Expr::Value(Value::Number( - "0".to_string(), - false - )))), - substring_for: Some(Box::new(Expr::Value(Value::Number( - "1".to_string(), - false - )))) + substring_from: Some(Box::new(Expr::Value(number("0")))), + substring_for: Some(Box::new(Expr::Value(number("1")))) })], into: None, from: vec![TableWithJoins { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index b29081ab..024ead6a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1099,13 +1099,7 @@ fn parse_set() { local: false, hivevar: false, variable: ObjectName(vec![Ident::new("a")]), - value: vec![Expr::Value(Value::Number( - #[cfg(not(feature = "bigdecimal"))] - "0".to_string(), - #[cfg(feature = "bigdecimal")] - bigdecimal::BigDecimal::from(0), - false, - ))], + value: vec![Expr::Value(number("0"))], } ); @@ -1691,13 +1685,8 @@ fn parse_pg_regex_match_ops() { #[test] fn parse_array_index_expr() { - #[cfg(feature = "bigdecimal")] let num: Vec = (0..=10) - .map(|s| Expr::Value(Value::Number(bigdecimal::BigDecimal::from(s), false))) - .collect(); - #[cfg(not(feature = "bigdecimal"))] - let num: Vec = (0..=10) - .map(|s| Expr::Value(Value::Number(s.to_string(), false))) + .map(|s| Expr::Value(number(&s.to_string()))) .collect(); let sql = "SELECT foo[0] FROM foos"; @@ -1785,13 +1774,7 @@ fn parse_array_subquery_expr() { left: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number( - #[cfg(not(feature = "bigdecimal"))] - "1".to_string(), - #[cfg(feature = "bigdecimal")] - bigdecimal::BigDecimal::from(1), - false, - )))], + projection: vec![SelectItem::UnnamedExpr(Expr::Value(number("1")))], into: None, from: vec![], lateral_views: vec![], @@ -1807,13 +1790,7 @@ fn parse_array_subquery_expr() { right: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number( - #[cfg(not(feature = "bigdecimal"))] - "2".to_string(), - #[cfg(feature = "bigdecimal")] - bigdecimal::BigDecimal::from(2), - false, - )))], + projection: vec![SelectItem::UnnamedExpr(Expr::Value(number("2")))], into: None, from: vec![], lateral_views: vec![], @@ -2021,10 +1998,6 @@ fn test_composite_value() { select.projection[0] ); - #[cfg(feature = "bigdecimal")] - let num: Expr = Expr::Value(Value::Number(bigdecimal::BigDecimal::from(9), false)); - #[cfg(not(feature = "bigdecimal"))] - let num: Expr = Expr::Value(Value::Number("9".to_string(), false)); assert_eq!( select.selection, Some(Expr::BinaryOp { @@ -2036,7 +2009,7 @@ fn test_composite_value() { ])))) }), op: BinaryOperator::Gt, - right: Box::new(num) + right: Box::new(Expr::Value(number("9"))) }) ); From 653346c4d6e67001e492a41920f3c8903267a547 Mon Sep 17 00:00:00 2001 From: JIN-YONG LEE <74762475+jinlee0@users.noreply.github.com> Date: Tue, 18 Jul 2023 03:21:45 +0900 Subject: [PATCH 013/109] Upgrade bigdecimal to 0.4.1 (#921) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index eea1fe78..03a08f66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ json_example = ["serde_json", "serde"] visitor = ["sqlparser_derive"] [dependencies] -bigdecimal = { version = "0.3", features = ["serde"], optional = true } +bigdecimal = { version = "0.4.1", features = ["serde"], optional = true } log = "0.4" serde = { version = "1.0", features = ["derive"], optional = true } # serde_json is only used in examples/cli, but we have to put it outside From d6ebb58b969e0eb62fd34ed228ff3d51013e5ea5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 17 Jul 2023 14:34:54 -0400 Subject: [PATCH 014/109] Fix dependabot by removing rust-toolchain toml (#922) --- rust-toolchain | 1 - 1 file changed, 1 deletion(-) delete mode 100644 rust-toolchain diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index 870bbe4e..00000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -stable \ No newline at end of file From c8b6e7f2c7b10698d7d6fb6704ed7933e7ab9d7d Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Mon, 17 Jul 2023 21:42:28 +0300 Subject: [PATCH 015/109] feat: comments for all operators (#917) --- src/ast/operator.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index b60b5135..9fb1bf02 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -28,8 +28,11 @@ use super::display_separated; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum UnaryOperator { + /// Plus, e.g. `+9` Plus, + /// Minus, e.g. `-9` Minus, + /// Not, e.g. `NOT(true)` Not, /// Bitwise Not, e.g. `~9` (PostgreSQL-specific) PGBitwiseNot, @@ -66,24 +69,43 @@ impl fmt::Display for UnaryOperator { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum BinaryOperator { + /// Plus, e.g. `a + b` Plus, + /// Minus, e.g. `a - b` Minus, + /// Multiply, e.g. `a * b` Multiply, + /// Divide, e.g. `a / b` Divide, + /// Modulo, e.g. `a % b` Modulo, + /// String/Array Concat operator, e.g. `a || b` StringConcat, + /// Greater than, e.g. `a > b` Gt, + /// Less than, e.g. `a < b` Lt, + /// Greater equal, e.g. `a >= b` GtEq, + /// Less equal, e.g. `a <= b` LtEq, + /// Spaceship, e.g. `a <=> b` Spaceship, + /// Equal, e.g. `a = b` Eq, + /// Not equal, e.g. `a <> b` NotEq, + /// And, e.g. `a AND b` And, + /// Or, e.g. `a OR b` Or, + /// XOR, e.g. `a XOR b` Xor, + /// Bitwise or, e.g. `a | b` BitwiseOr, + /// Bitwise and, e.g. `a & b` BitwiseAnd, + /// Bitwise XOR, e.g. `a ^ b` BitwiseXor, /// Integer division operator `//` in DuckDB DuckIntegerDivide, @@ -91,14 +113,23 @@ pub enum BinaryOperator { MyIntegerDivide, /// Support for custom operators (built by parsers outside this crate) Custom(String), + /// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific) PGBitwiseXor, + /// Bitwise shift left, e.g. `a << b` (PostgreSQL-specific) PGBitwiseShiftLeft, + /// Bitwise shift right, e.g. `a >> b` (PostgreSQL-specific) PGBitwiseShiftRight, + /// Exponent, e.g. `a ^ b` (PostgreSQL-specific) PGExp, + /// Overlap operator, e.g. `a && b` (PostgreSQL-specific) PGOverlap, + /// String matches regular expression (case sensitively), e.g. `a ~ b` (PostgreSQL-specific) PGRegexMatch, + /// String matches regular expression (case insensitively), e.g. `a ~* b` (PostgreSQL-specific) PGRegexIMatch, + /// String does not match regular expression (case sensitively), e.g. `a !~ b` (PostgreSQL-specific) PGRegexNotMatch, + /// String does not match regular expression (case insensitively), e.g. `a !~* b` (PostgreSQL-specific) PGRegexNotIMatch, /// PostgreSQL-specific custom operator. /// From df45db13754311a597777cf3bf53d923cd7e3869 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Mon, 17 Jul 2023 22:03:48 +0300 Subject: [PATCH 016/109] fix: parsing `JsonOperator` (#913) --- src/tokenizer.rs | 111 ++++++++++++++++++++++++++------------ tests/sqlparser_common.rs | 35 ++++++++++++ 2 files changed, 112 insertions(+), 34 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f8e6793f..7a181354 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,7 +35,9 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::DollarQuotedString; -use crate::dialect::{BigQueryDialect, DuckDbDialect, GenericDialect, SnowflakeDialect}; +use crate::dialect::{ + BigQueryDialect, DuckDbDialect, GenericDialect, HiveDialect, SnowflakeDialect, +}; use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; @@ -495,9 +497,32 @@ impl<'a> Tokenizer<'a> { Ok(tokens) } + fn tokenize_identifier_or_keyword( + &self, + ch: String, + chars: &mut State, + ) -> Result, TokenizerError> { + chars.next(); // consume the first char + let word = self.tokenize_word(ch, chars); + + // TODO: implement parsing of exponent here + if word.chars().all(|x| x.is_ascii_digit() || x == '.') { + let mut inner_state = State { + peekable: word.chars().peekable(), + line: 0, + col: 0, + }; + let mut s = peeking_take_while(&mut inner_state, |ch| matches!(ch, '0'..='9' | '.')); + let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); + s += s2.as_str(); + return Ok(Some(Token::Number(s, false))); + } + + Ok(Some(Token::make_word(&word, None))) + } + /// Get the next token or return None fn next_token(&self, chars: &mut State) -> Result, TokenizerError> { - //println!("next_token: {:?}", chars.peek()); match chars.peek() { Some(&ch) => match ch { ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)), @@ -525,7 +550,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "b" or "B" - let s = self.tokenize_word(b, chars); + let s = self.tokenize_word(b.to_string(), chars); Ok(Some(Token::make_word(&s, None))) } } @@ -544,7 +569,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "r" or "R" - let s = self.tokenize_word(b, chars); + let s = self.tokenize_word(b.to_string(), chars); Ok(Some(Token::make_word(&s, None))) } } @@ -560,7 +585,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "N" - let s = self.tokenize_word(n, chars); + let s = self.tokenize_word(n.to_string(), chars); Ok(Some(Token::make_word(&s, None))) } } @@ -577,7 +602,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "E" or "e" - let s = self.tokenize_word(x, chars); + let s = self.tokenize_word(x.to_string(), chars); Ok(Some(Token::make_word(&s, None))) } } @@ -594,33 +619,11 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "X" - let s = self.tokenize_word(x, chars); + let s = self.tokenize_word(x.to_string(), chars); Ok(Some(Token::make_word(&s, None))) } } } - // identifier or keyword - ch if self.dialect.is_identifier_start(ch) => { - chars.next(); // consume the first char - let word = self.tokenize_word(ch, chars); - - // TODO: implement parsing of exponent here - if word.chars().all(|x| x.is_ascii_digit() || x == '.') { - let mut inner_state = State { - peekable: word.chars().peekable(), - line: 0, - col: 0, - }; - let mut s = peeking_take_while(&mut inner_state, |ch| { - matches!(ch, '0'..='9' | '.') - }); - let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); - s += s2.as_str(); - return Ok(Some(Token::Number(s, false))); - } - - Ok(Some(Token::make_word(&word, None))) - } // single quoted string '\'' => { let s = self.tokenize_quoted_string(chars, '\'')?; @@ -714,7 +717,7 @@ impl<'a> Tokenizer<'a> { // mysql dialect supports identifiers that start with a numeric prefix, // as long as they aren't an exponent number. - if dialect_of!(self is MySqlDialect) && exponent_part.is_empty() { + if dialect_of!(self is MySqlDialect | HiveDialect) && exponent_part.is_empty() { let word = peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch)); @@ -786,7 +789,18 @@ impl<'a> Tokenizer<'a> { } '+' => self.consume_and_return(chars, Token::Plus), '*' => self.consume_and_return(chars, Token::Mul), - '%' => self.consume_and_return(chars, Token::Mod), + '%' => { + chars.next(); + match chars.peek() { + Some(' ') => self.consume_and_return(chars, Token::Mod), + Some(sch) if self.dialect.is_identifier_start('%') => { + let mut s = ch.to_string(); + s.push_str(&sch.to_string()); + self.tokenize_identifier_or_keyword(s, chars) + } + _ => self.consume_and_return(chars, Token::Mod), + } + } '|' => { chars.next(); // consume the '|' match chars.peek() { @@ -901,6 +915,12 @@ impl<'a> Tokenizer<'a> { _ => Ok(Some(Token::HashArrow)), } } + Some(' ') => Ok(Some(Token::Sharp)), + Some(sch) if self.dialect.is_identifier_start('#') => { + let mut s = ch.to_string(); + s.push_str(&sch.to_string()); + self.tokenize_identifier_or_keyword(s, chars) + } _ => Ok(Some(Token::Sharp)), } } @@ -909,7 +929,25 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('>') => self.consume_and_return(chars, Token::AtArrow), Some('?') => self.consume_and_return(chars, Token::AtQuestion), - Some('@') => self.consume_and_return(chars, Token::AtAt), + Some('@') => { + chars.next(); + match chars.peek() { + Some(' ') => Ok(Some(Token::AtAt)), + Some(tch) if self.dialect.is_identifier_start('@') => { + let mut s = ch.to_string(); + s.push('@'); + s.push_str(&tch.to_string()); + self.tokenize_identifier_or_keyword(s, chars) + } + _ => Ok(Some(Token::AtAt)), + } + } + Some(' ') => Ok(Some(Token::AtSign)), + Some(sch) if self.dialect.is_identifier_start('@') => { + let mut s = ch.to_string(); + s.push_str(&sch.to_string()); + self.tokenize_identifier_or_keyword(s, chars) + } _ => Ok(Some(Token::AtSign)), } } @@ -918,6 +956,11 @@ impl<'a> Tokenizer<'a> { let s = peeking_take_while(chars, |ch| ch.is_numeric()); Ok(Some(Token::Placeholder(String::from("?") + &s))) } + + // identifier or keyword + ch if self.dialect.is_identifier_start(ch) => { + self.tokenize_identifier_or_keyword(ch.to_string(), chars) + } '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)), //whitespace check (including unicode chars) should be last as it covers some of the chars above @@ -1043,8 +1086,8 @@ impl<'a> Tokenizer<'a> { } /// Tokenize an identifier or keyword, after the first char is already consumed. - fn tokenize_word(&self, first_char: char, chars: &mut State) -> String { - let mut s = first_char.to_string(); + fn tokenize_word(&self, first_chars: String, chars: &mut State) -> String { + let mut s = first_chars; s.push_str(&peeking_take_while(chars, |ch| { self.dialect.is_identifier_part(ch) })); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4fe1a57b..08451ac2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1113,6 +1113,41 @@ fn parse_unary_math_with_multiply() { ); } +fn pg_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})], + options: None, + } +} + +#[test] +fn parse_json_ops_without_colon() { + use self::JsonOperator; + let binary_ops = &[ + ("->", JsonOperator::Arrow, all_dialects()), + ("->>", JsonOperator::LongArrow, all_dialects()), + ("#>", JsonOperator::HashArrow, pg_and_generic()), + ("#>>", JsonOperator::HashLongArrow, pg_and_generic()), + ("@>", JsonOperator::AtArrow, all_dialects()), + ("<@", JsonOperator::ArrowAt, all_dialects()), + ("#-", JsonOperator::HashMinus, pg_and_generic()), + ("@?", JsonOperator::AtQuestion, all_dialects()), + ("@@", JsonOperator::AtAt, all_dialects()), + ]; + + for (str_op, op, dialects) in binary_ops { + let select = dialects.verified_only_select(&format!("SELECT a {} b", &str_op)); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + left: Box::new(Expr::Identifier(Ident::new("a"))), + operator: *op, + right: Box::new(Expr::Identifier(Ident::new("b"))), + }), + select.projection[0] + ); + } +} + #[test] fn parse_is_null() { use self::Expr::*; From c45451850c94a0d7d05a90035463799a301e7d13 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 17 Jul 2023 16:09:55 -0400 Subject: [PATCH 017/109] Clean up JSON operator tokenizing code (#923) --- src/tokenizer.rs | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7a181354..6a135b43 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -497,12 +497,14 @@ impl<'a> Tokenizer<'a> { Ok(tokens) } + // Tokenize the identifer or keywords in `ch` fn tokenize_identifier_or_keyword( &self, - ch: String, + ch: impl IntoIterator, chars: &mut State, ) -> Result, TokenizerError> { chars.next(); // consume the first char + let ch: String = ch.into_iter().collect(); let word = self.tokenize_word(ch, chars); // TODO: implement parsing of exponent here @@ -550,7 +552,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "b" or "B" - let s = self.tokenize_word(b.to_string(), chars); + let s = self.tokenize_word(b, chars); Ok(Some(Token::make_word(&s, None))) } } @@ -569,7 +571,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "r" or "R" - let s = self.tokenize_word(b.to_string(), chars); + let s = self.tokenize_word(b, chars); Ok(Some(Token::make_word(&s, None))) } } @@ -585,7 +587,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "N" - let s = self.tokenize_word(n.to_string(), chars); + let s = self.tokenize_word(n, chars); Ok(Some(Token::make_word(&s, None))) } } @@ -602,7 +604,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "E" or "e" - let s = self.tokenize_word(x.to_string(), chars); + let s = self.tokenize_word(x, chars); Ok(Some(Token::make_word(&s, None))) } } @@ -619,7 +621,7 @@ impl<'a> Tokenizer<'a> { } _ => { // regular identifier starting with an "X" - let s = self.tokenize_word(x.to_string(), chars); + let s = self.tokenize_word(x, chars); Ok(Some(Token::make_word(&s, None))) } } @@ -794,9 +796,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some(' ') => self.consume_and_return(chars, Token::Mod), Some(sch) if self.dialect.is_identifier_start('%') => { - let mut s = ch.to_string(); - s.push_str(&sch.to_string()); - self.tokenize_identifier_or_keyword(s, chars) + self.tokenize_identifier_or_keyword([ch, *sch], chars) } _ => self.consume_and_return(chars, Token::Mod), } @@ -917,9 +917,7 @@ impl<'a> Tokenizer<'a> { } Some(' ') => Ok(Some(Token::Sharp)), Some(sch) if self.dialect.is_identifier_start('#') => { - let mut s = ch.to_string(); - s.push_str(&sch.to_string()); - self.tokenize_identifier_or_keyword(s, chars) + self.tokenize_identifier_or_keyword([ch, *sch], chars) } _ => Ok(Some(Token::Sharp)), } @@ -934,19 +932,14 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some(' ') => Ok(Some(Token::AtAt)), Some(tch) if self.dialect.is_identifier_start('@') => { - let mut s = ch.to_string(); - s.push('@'); - s.push_str(&tch.to_string()); - self.tokenize_identifier_or_keyword(s, chars) + self.tokenize_identifier_or_keyword([ch, '@', *tch], chars) } _ => Ok(Some(Token::AtAt)), } } Some(' ') => Ok(Some(Token::AtSign)), Some(sch) if self.dialect.is_identifier_start('@') => { - let mut s = ch.to_string(); - s.push_str(&sch.to_string()); - self.tokenize_identifier_or_keyword(s, chars) + self.tokenize_identifier_or_keyword([ch, *sch], chars) } _ => Ok(Some(Token::AtSign)), } @@ -959,7 +952,7 @@ impl<'a> Tokenizer<'a> { // identifier or keyword ch if self.dialect.is_identifier_start(ch) => { - self.tokenize_identifier_or_keyword(ch.to_string(), chars) + self.tokenize_identifier_or_keyword([ch], chars) } '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)), @@ -1086,8 +1079,8 @@ impl<'a> Tokenizer<'a> { } /// Tokenize an identifier or keyword, after the first char is already consumed. - fn tokenize_word(&self, first_chars: String, chars: &mut State) -> String { - let mut s = first_chars; + fn tokenize_word(&self, first_chars: impl Into, chars: &mut State) -> String { + let mut s = first_chars.into(); s.push_str(&peeking_take_while(chars, |ch| { self.dialect.is_identifier_part(ch) })); From eb288487a6a938b45004deda850e550a74fcf935 Mon Sep 17 00:00:00 2001 From: parkma99 <84610851+parkma99@users.noreply.github.com> Date: Wed, 19 Jul 2023 05:15:05 +0800 Subject: [PATCH 018/109] Support UNION (ALL) BY NAME syntax (#915) --- src/ast/query.rs | 9 ++- src/keywords.rs | 1 + src/parser.rs | 10 ++- tests/sqlparser_duckdb.rs | 147 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index fe650e6b..5f4c289d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -110,7 +110,10 @@ impl fmt::Display for SetExpr { } => { write!(f, "{left} {op}")?; match set_quantifier { - SetQuantifier::All | SetQuantifier::Distinct => write!(f, " {set_quantifier}")?, + SetQuantifier::All + | SetQuantifier::Distinct + | SetQuantifier::ByName + | SetQuantifier::AllByName => write!(f, " {set_quantifier}")?, SetQuantifier::None => write!(f, "{set_quantifier}")?, } write!(f, " {right}")?; @@ -148,6 +151,8 @@ impl fmt::Display for SetOperator { pub enum SetQuantifier { All, Distinct, + ByName, + AllByName, None, } @@ -156,6 +161,8 @@ impl fmt::Display for SetQuantifier { match self { SetQuantifier::All => write!(f, "ALL"), SetQuantifier::Distinct => write!(f, "DISTINCT"), + SetQuantifier::ByName => write!(f, "BY NAME"), + SetQuantifier::AllByName => write!(f, "ALL BY NAME"), SetQuantifier::None => write!(f, ""), } } diff --git a/src/keywords.rs b/src/keywords.rs index 32556c6d..80c605dd 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -384,6 +384,7 @@ define_keywords!( MSCK, MULTISET, MUTATION, + NAME, NANOSECOND, NANOSECONDS, NATIONAL, diff --git a/src/parser.rs b/src/parser.rs index d9b85fcc..4d331ce0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5331,8 +5331,14 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { Some(SetOperator::Union) => { - if self.parse_keyword(Keyword::ALL) { - SetQuantifier::All + if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + SetQuantifier::ByName + } else if self.parse_keyword(Keyword::ALL) { + if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + SetQuantifier::AllByName + } else { + SetQuantifier::All + } } else if self.parse_keyword(Keyword::DISTINCT) { SetQuantifier::Distinct } else { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index fabb9790..83b1e537 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -129,3 +129,150 @@ fn test_create_table_macro() { }; assert_eq!(expected, macro_); } + +#[test] +fn test_select_union_by_name() { + let ast = duckdb().verified_query("SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"); + let expected = Box::::new(SetExpr::SetOperation { + op: SetOperator::Union, + set_quantifier: SetQuantifier::ByName, + left: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + right: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + }); + + assert_eq!(ast.body, expected); + + let ast = + duckdb().verified_query("SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"); + let expected = Box::::new(SetExpr::SetOperation { + op: SetOperator::Union, + set_quantifier: SetQuantifier::AllByName, + left: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + right: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + }); + assert_eq!(ast.body, expected); +} From f98a2f9dca1c3b6c54f1c4db367c669648ab9678 Mon Sep 17 00:00:00 2001 From: canalun Date: Thu, 20 Jul 2023 05:36:52 +0900 Subject: [PATCH 019/109] feat: mysql no-escape mode (#870) Co-authored-by: Andrew Lamb --- src/ast/value.rs | 53 +++++++- src/ast/visitor.rs | 3 +- src/parser.rs | 68 ++++++++-- src/tokenizer.rs | 245 ++++++++++++++++++++++------------- tests/sqlparser_common.rs | 36 ++++- tests/sqlparser_mysql.rs | 195 +++++++++++++++++++++++++++- tests/sqlparser_snowflake.rs | 6 +- 7 files changed, 485 insertions(+), 121 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 491553ca..9c18a325 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -71,7 +71,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), - Value::DoubleQuotedString(v) => write!(f, "\"{v}\""), + Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::DollarQuotedString(v) => write!(f, "{v}"), Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), @@ -187,12 +187,49 @@ pub struct EscapeQuotedString<'a> { impl<'a> fmt::Display for EscapeQuotedString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for c in self.string.chars() { - if c == self.quote { - write!(f, "{q}{q}", q = self.quote)?; - } else { - write!(f, "{c}")?; + // EscapeQuotedString doesn't know which mode of escape was + // chosen by the user. So this code must to correctly display + // strings without knowing if the strings are already escaped + // or not. + // + // If the quote symbol in the string is repeated twice, OR, if + // the quote symbol is after backslash, display all the chars + // without any escape. However, if the quote symbol is used + // just between usual chars, `fmt()` should display it twice." + // + // The following table has examples + // + // | original query | mode | AST Node | serialized | + // | ------------- | --------- | -------------------------------------------------- | ------------ | + // | `"A""B""A"` | no-escape | `DoubleQuotedString(String::from("A\"\"B\"\"A"))` | `"A""B""A"` | + // | `"A""B""A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` | + // | `"A\"B\"A"` | no-escape | `DoubleQuotedString(String::from("A\\\"B\\\"A"))` | `"A\"B\"A"` | + // | `"A\"B\"A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` | + let quote = self.quote; + let mut previous_char = char::default(); + let mut peekable_chars = self.string.chars().peekable(); + while let Some(&ch) = peekable_chars.peek() { + match ch { + char if char == quote => { + if previous_char == '\\' { + write!(f, "{char}")?; + peekable_chars.next(); + continue; + } + peekable_chars.next(); + if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) { + write!(f, "{char}{char}")?; + peekable_chars.next(); + } else { + write!(f, "{char}{char}")?; + } + } + _ => { + write!(f, "{ch}")?; + peekable_chars.next(); + } } + previous_char = ch; } Ok(()) } @@ -206,6 +243,10 @@ pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> { escape_quoted_string(s, '\'') } +pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> { + escape_quoted_string(s, '\"') +} + pub struct EscapeEscapedStringLiteral<'a>(&'a str); impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> { diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 81343220..8aa038db 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -632,8 +632,7 @@ mod tests { fn do_visit(sql: &str) -> Vec { let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let s = Parser::new(&dialect) .with_tokens(tokens) .parse_statement() diff --git a/src/parser.rs b/src/parser.rs index 4d331ce0..790ba8fb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -195,9 +195,52 @@ impl std::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; -#[derive(Debug, Default, Clone, PartialEq, Eq)] +/// Options that control how the [`Parser`] parses SQL text +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { pub trailing_commas: bool, + /// Controls how literal values are unescaped. See + /// [`Tokenizer::with_unescape`] for more details. + pub unescape: bool, +} + +impl Default for ParserOptions { + fn default() -> Self { + Self { + trailing_commas: false, + unescape: true, + } + } +} + +impl ParserOptions { + /// Create a new [`ParserOptions`] + pub fn new() -> Self { + Default::default() + } + + /// Set if trailing commas are allowed. + /// + /// If this option is `false` (the default), the following SQL will + /// not parse. If the option is `true`, the SQL will parse. + /// + /// ```sql + /// SELECT + /// foo, + /// bar, + /// FROM baz + /// ``` + pub fn with_trailing_commas(mut self, trailing_commas: bool) -> Self { + self.trailing_commas = trailing_commas; + self + } + + /// Set if literal values are unescaped. Defaults to true. See + /// [`Tokenizer::with_unescape`] for more details. + pub fn with_unescape(mut self, unescape: bool) -> Self { + self.unescape = unescape; + self + } } pub struct Parser<'a> { @@ -206,8 +249,9 @@ pub struct Parser<'a> { index: usize, /// The current dialect to use dialect: &'a dyn Dialect, - /// Additional options that allow you to mix & match behavior otherwise - /// constrained to certain dialects (e.g. trailing commas) + /// Additional options that allow you to mix & match behavior + /// otherwise constrained to certain dialects (e.g. trailing + /// commas) and/or format of parse (e.g. unescaping) options: ParserOptions, /// ensure the stack does not overflow by limiting recursion depth recursion_counter: RecursionCounter, @@ -267,17 +311,20 @@ impl<'a> Parser<'a> { /// Specify additional parser options /// /// - /// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to - /// mix & match behavior otherwise constrained to certain dialects (e.g. trailing - /// commas). + /// [`Parser`] supports additional options ([`ParserOptions`]) + /// that allow you to mix & match behavior otherwise constrained + /// to certain dialects (e.g. trailing commas). /// /// Example: /// ``` /// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect}; /// # fn main() -> Result<(), ParserError> { /// let dialect = GenericDialect{}; + /// let options = ParserOptions::new() + /// .with_trailing_commas(true) + /// .with_unescape(false); /// let result = Parser::new(&dialect) - /// .with_options(ParserOptions { trailing_commas: true }) + /// .with_options(options) /// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")? /// .parse_statements(); /// assert!(matches!(result, Ok(_))); @@ -317,8 +364,9 @@ impl<'a> Parser<'a> { /// See example on [`Parser::new()`] for an example pub fn try_with_sql(self, sql: &str) -> Result { debug!("Parsing sql '{}'...", sql); - let mut tokenizer = Tokenizer::new(self.dialect, sql); - let tokens = tokenizer.tokenize()?; + let tokens = Tokenizer::new(self.dialect, sql) + .with_unescape(self.options.unescape) + .tokenize()?; Ok(self.with_tokens(tokens)) } @@ -3654,7 +3702,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Some(ColumnOption::Check(expr))) } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | GenericDialect) { // Support AUTO_INCREMENT for MySQL Ok(Some(ColumnOption::DialectSpecific(vec![ diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 6a135b43..83e9f317 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -455,12 +455,69 @@ impl<'a> State<'a> { pub struct Tokenizer<'a> { dialect: &'a dyn Dialect, query: &'a str, + /// If true (the default), the tokenizer will un-escape literal + /// SQL strings See [`Tokenizer::with_unescape`] for more details. + unescape: bool, } impl<'a> Tokenizer<'a> { /// Create a new SQL tokenizer for the specified SQL statement + /// + /// ``` + /// # use sqlparser::tokenizer::{Token, Whitespace, Tokenizer}; + /// # use sqlparser::dialect::GenericDialect; + /// # let dialect = GenericDialect{}; + /// let query = r#"SELECT 'foo'"#; + /// + /// // Parsing the query + /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap(); + /// + /// assert_eq!(tokens, vec![ + /// Token::make_word("SELECT", None), + /// Token::Whitespace(Whitespace::Space), + /// Token::SingleQuotedString("foo".to_string()), + /// ]); pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self { - Self { dialect, query } + Self { + dialect, + query, + unescape: true, + } + } + + /// Set unescape mode + /// + /// When true (default) the tokenizer unescapes literal values + /// (for example, `""` in SQL is unescaped to the literal `"`). + /// + /// When false, the tokenizer provides the raw strings as provided + /// in the query. This can be helpful for programs that wish to + /// recover the *exact* original query text without normalizing + /// the escaping + /// + /// # Example + /// + /// ``` + /// # use sqlparser::tokenizer::{Token, Tokenizer}; + /// # use sqlparser::dialect::GenericDialect; + /// # let dialect = GenericDialect{}; + /// let query = r#""Foo "" Bar""#; + /// let unescaped = Token::make_word(r#"Foo " Bar"#, Some('"')); + /// let original = Token::make_word(r#"Foo "" Bar"#, Some('"')); + /// + /// // Parsing with unescaping (default) + /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap(); + /// assert_eq!(tokens, vec![unescaped]); + /// + /// // Parsing with unescape = false + /// let tokens = Tokenizer::new(&dialect, &query) + /// .with_unescape(false) + /// .tokenize().unwrap(); + /// assert_eq!(tokens, vec![original]); + /// ``` + pub fn with_unescape(mut self, unescape: bool) -> Self { + self.unescape = unescape; + self } /// Tokenize the statement and produce a vector of tokens @@ -650,7 +707,7 @@ impl<'a> Tokenizer<'a> { let error_loc = chars.location(); chars.next(); // consume the opening quote let quote_end = Word::matching_end_quote(quote_start); - let (s, last_char) = parse_quoted_ident(chars, quote_end); + let (s, last_char) = self.parse_quoted_ident(chars, quote_end); if last_char == Some(quote_end) { Ok(Some(Token::make_word(&s, Some(quote_start)))) @@ -1168,6 +1225,10 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume if chars.peek().map(|c| *c == quote_style).unwrap_or(false) { s.push(ch); + if !self.unescape { + // In no-escape mode, the given query has to be saved completely + s.push(ch); + } chars.next(); } else { return Ok(s); @@ -1176,22 +1237,29 @@ impl<'a> Tokenizer<'a> { '\\' => { // consume chars.next(); - // slash escaping is specific to MySQL dialect + // slash escaping is specific to MySQL dialect. if dialect_of!(self is MySqlDialect) { if let Some(next) = chars.peek() { - // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences - let n = match next { - '\'' | '\"' | '\\' | '%' | '_' => *next, - '0' => '\0', - 'b' => '\u{8}', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'Z' => '\u{1a}', - _ => *next, - }; - s.push(n); - chars.next(); // consume next + if !self.unescape { + // In no-escape mode, the given query has to be saved completely including backslashes. + s.push(ch); + s.push(*next); + chars.next(); // consume next + } else { + // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences + let n = match next { + '\'' | '\"' | '\\' | '%' | '_' => *next, + '0' => '\0', + 'b' => '\u{8}', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'Z' => '\u{1a}', + _ => *next, + }; + s.push(n); + chars.next(); // consume next + } } } else { s.push(ch); @@ -1239,6 +1307,29 @@ impl<'a> Tokenizer<'a> { } } + fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option) { + let mut last_char = None; + let mut s = String::new(); + while let Some(ch) = chars.next() { + if ch == quote_end { + if chars.peek() == Some("e_end) { + chars.next(); + s.push(ch); + if !self.unescape { + // In no-escape mode, the given query has to be saved completely + s.push(ch); + } + } else { + last_char = Some(quote_end); + break; + } + } else { + s.push(ch); + } + } + (s, last_char) + } + #[allow(clippy::unnecessary_wraps)] fn consume_and_return( &self, @@ -1266,25 +1357,6 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool s } -fn parse_quoted_ident(chars: &mut State, quote_end: char) -> (String, Option) { - let mut last_char = None; - let mut s = String::new(); - while let Some(ch) = chars.next() { - if ch == quote_end { - if chars.peek() == Some("e_end) { - chars.next(); - s.push(ch); - } else { - last_char = Some(quote_end); - break; - } - } else { - s.push(ch); - } - } - (s, last_char) -} - #[cfg(test)] mod tests { use super::*; @@ -1309,8 +1381,7 @@ mod tests { fn tokenize_select_1() { let sql = String::from("SELECT 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1325,8 +1396,7 @@ mod tests { fn tokenize_select_float() { let sql = String::from("SELECT .1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1341,8 +1411,7 @@ mod tests { fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1376,8 +1445,7 @@ mod tests { fn tokenize_scalar_function() { let sql = String::from("SELECT sqrt(1)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1395,8 +1463,7 @@ mod tests { fn tokenize_string_string_concat() { let sql = String::from("SELECT 'a' || 'b'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1414,8 +1481,7 @@ mod tests { fn tokenize_bitwise_op() { let sql = String::from("SELECT one | two ^ three"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1438,8 +1504,7 @@ mod tests { let sql = String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1478,8 +1543,7 @@ mod tests { fn tokenize_simple_select() { let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1510,8 +1574,7 @@ mod tests { fn tokenize_explain_select() { let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("EXPLAIN"), @@ -1540,8 +1603,7 @@ mod tests { fn tokenize_explain_analyze_select() { let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("EXPLAIN"), @@ -1572,8 +1634,7 @@ mod tests { fn tokenize_string_predicate() { let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1601,8 +1662,7 @@ mod tests { let sql = String::from("\n💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1617,8 +1677,7 @@ mod tests { let sql = String::from("'foo\r\nbar\nbaz'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; compare(expected, tokens); } @@ -1660,8 +1719,7 @@ mod tests { let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1684,8 +1742,7 @@ mod tests { fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("FUNCTION", None), Token::LParen, @@ -1701,8 +1758,7 @@ mod tests { fn tokenize_is_null() { let sql = String::from("a IS NULL"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("a", None), @@ -1720,8 +1776,7 @@ mod tests { let sql = String::from("0--this is a comment\n1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { @@ -1738,8 +1793,7 @@ mod tests { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), comment: "this is a comment".to_string(), @@ -1752,8 +1806,7 @@ mod tests { let sql = String::from("0/*multi-line\n* /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( @@ -1769,8 +1822,7 @@ mod tests { let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( @@ -1786,8 +1838,7 @@ mod tests { let sql = String::from("\n/** Comment **/\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())), @@ -1801,8 +1852,7 @@ mod tests { let sql = String::from(" \u{2003}\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space), @@ -1832,8 +1882,7 @@ mod tests { let sql = String::from("line1\nline2\rline3\r\nline4\r"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), Token::Whitespace(Whitespace::Newline), @@ -1851,8 +1900,7 @@ mod tests { fn tokenize_mssql_top() { let sql = "SELECT TOP 5 [bar] FROM foo"; let dialect = MsSqlDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), @@ -1873,8 +1921,7 @@ mod tests { fn tokenize_pg_regex_match() { let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), @@ -1912,8 +1959,7 @@ mod tests { fn tokenize_quoted_identifier() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), Token::make_word(r#"a " b"#, Some('"')), @@ -1926,12 +1972,33 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_quoted_identifier_with_no_escape() { + let sql = r#" "a "" b" "a """ "c """"" "#; + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, sql) + .with_unescape(false) + .tokenize() + .unwrap(); + let expected = vec![ + Token::Whitespace(Whitespace::Space), + Token::make_word(r#"a "" b"#, Some('"')), + Token::Whitespace(Whitespace::Space), + Token::make_word(r#"a """#, Some('"')), + Token::Whitespace(Whitespace::Space), + Token::make_word(r#"c """""#, Some('"')), + Token::Whitespace(Whitespace::Space), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_with_location() { let sql = "SELECT a,\n b"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize_with_location().unwrap(); + let tokens = Tokenizer::new(&dialect, sql) + .tokenize_with_location() + .unwrap(); let expected = vec![ TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1), TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 1, 7), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 08451ac2..356926e1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1004,11 +1004,13 @@ fn parse_select_with_date_column_name() { } #[test] -fn parse_escaped_single_quote_string_predicate() { +fn parse_escaped_single_quote_string_predicate_with_escape() { use self::BinaryOperator::*; let sql = "SELECT id, fname, lname FROM customer \ WHERE salary <> 'Jim''s salary'"; + let ast = verified_only_select(sql); + assert_eq!( Some(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("salary"))), @@ -1021,6 +1023,34 @@ fn parse_escaped_single_quote_string_predicate() { ); } +#[test] +fn parse_escaped_single_quote_string_predicate_with_no_escape() { + use self::BinaryOperator::*; + let sql = "SELECT id, fname, lname FROM customer \ + WHERE salary <> 'Jim''s salary'"; + + let ast = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some( + ParserOptions::new() + .with_trailing_commas(true) + .with_unescape(false), + ), + } + .verified_only_select(sql); + + assert_eq!( + Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("salary"))), + op: NotEq, + right: Box::new(Expr::Value(Value::SingleQuotedString( + "Jim''s salary".to_string() + ))), + }), + ast.selection, + ); +} + #[test] fn parse_number() { let expr = verified_expr("1.0"); @@ -7264,9 +7294,7 @@ fn parse_non_latin_identifiers() { fn parse_trailing_comma() { let trailing_commas = TestedDialects { dialects: vec![Box::new(GenericDialect {})], - options: Some(ParserOptions { - trailing_commas: true, - }), + options: Some(ParserOptions::new().with_trailing_commas(true)), }; trailing_commas.one_statement_parses_to( diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 12d2cc73..ae95de2e 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -18,6 +18,7 @@ use sqlparser::ast::Expr; use sqlparser::ast::Value; use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, MySqlDialect}; +use sqlparser::parser::ParserOptions; use sqlparser::tokenizer::Token; use test_utils::*; @@ -432,10 +433,14 @@ fn parse_quote_identifiers() { } #[test] -fn parse_quote_identifiers_2() { +fn parse_escaped_quote_identifiers_with_escape() { let sql = "SELECT `quoted `` identifier`"; assert_eq!( - mysql().verified_stmt(sql), + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .verified_stmt(sql), Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -467,10 +472,56 @@ fn parse_quote_identifiers_2() { } #[test] -fn parse_quote_identifiers_3() { +fn parse_escaped_quote_identifiers_with_no_escape() { + let sql = "SELECT `quoted `` identifier`"; + assert_eq!( + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + unescape: false, + }), + } + .verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "quoted `` identifier".into(), + quote_style: Some('`'), + }))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + })) + ); +} + +#[test] +fn parse_escaped_backticks_with_escape() { let sql = "SELECT ```quoted identifier```"; assert_eq!( - mysql().verified_stmt(sql), + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .verified_stmt(sql), Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -501,6 +552,45 @@ fn parse_quote_identifiers_3() { ); } +#[test] +fn parse_escaped_backticks_with_no_escape() { + let sql = "SELECT ```quoted identifier```"; + assert_eq!( + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions::new().with_unescape(false)), + } + .verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "``quoted identifier``".into(), + quote_style: Some('`'), + }))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + })) + ); +} + #[test] fn parse_unterminated_escape() { let sql = r#"SELECT 'I\'m not fine\'"#; @@ -513,9 +603,13 @@ fn parse_unterminated_escape() { } #[test] -fn parse_escaped_string() { +fn parse_escaped_string_with_escape() { fn assert_mysql_query_value(sql: &str, quoted: &str) { - let stmt = mysql().one_statement_parses_to(sql, ""); + let stmt = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .one_statement_parses_to(sql, ""); match stmt { Statement::Query(query) => match *query.body { @@ -544,6 +638,95 @@ fn parse_escaped_string() { assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a "); } +#[test] +fn parse_escaped_string_with_no_escape() { + fn assert_mysql_query_value(sql: &str, quoted: &str) { + let stmt = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions::new().with_unescape(false)), + } + .one_statement_parses_to(sql, ""); + + match stmt { + Statement::Query(query) => match *query.body { + SetExpr::Select(value) => { + let expr = expr_from_projection(only(&value.projection)); + assert_eq!( + *expr, + Expr::Value(Value::SingleQuotedString(quoted.to_string())) + ); + } + _ => unreachable!(), + }, + _ => unreachable!(), + }; + } + let sql = r#"SELECT 'I\'m fine'"#; + assert_mysql_query_value(sql, r#"I\'m fine"#); + + let sql = r#"SELECT 'I''m fine'"#; + assert_mysql_query_value(sql, r#"I''m fine"#); + + let sql = r#"SELECT 'I\"m fine'"#; + assert_mysql_query_value(sql, r#"I\"m fine"#); + + let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#; + assert_mysql_query_value(sql, r#"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "#); +} + +#[test] +fn check_roundtrip_of_escaped_string() { + let options = Some(ParserOptions::new().with_unescape(false)); + + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I\'m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I''m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I\"m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I""m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I\\\"m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I\\\"m fine""#); + + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options, + } + .verified_stmt(r#"SELECT "I'm ''fine''""#); +} + #[test] fn parse_create_table_with_minimum_display_width() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))"; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9a54c89c..43ebb8b1 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -55,8 +55,7 @@ fn test_snowflake_create_transient_table() { fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; let dialect = SnowflakeDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"), @@ -72,8 +71,7 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); let sql = "CREATE TABLE // this is a comment \ntable_1"; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"), From a4520541116a8dc9fe21b6a4b3fbd947504263c7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jul 2023 17:29:21 -0400 Subject: [PATCH 020/109] CHANGELOG for 0.36.0 (#924) --- CHANGELOG.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f52f7fa..b5b713e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,25 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.36.0] 2023-07-19 + +### Added +* Support toggling "unescape" mode to retain original escaping (#870) - Thanks @canalun +* Support UNION (ALL) BY NAME syntax (#915) - Thanks @parkma99 +* Add doc comment for all operators (#917) - Thanks @izveigor +* Support `PGOverlap` operator (#912) - Thanks @izveigor +* Support multi args for unnest (#909) - Thanks @jayzhan211 +* Support `ALTER VIEW`, MySQL syntax (#907) - Thanks @liadgiladi +* Add DeltaLake keywords (#906) - Thanks @roeap + +### Fixed +* Parse JsonOperators correctly (#913) - Thanks @izveigor +* Fix dependabot by removing rust-toolchain toml (#922) - Thanks @alamb + +### Changed +* Clean up JSON operator tokenizing code (#923) - Thanks @alamb +* Upgrade bigdecimal to 0.4.1 (#921) - Thanks @jinlee0 +* Remove most instances of #[cfg(feature(bigdecimal))] in tests (#910) - Thanks @alamb ## [0.35.0] 2023-06-23 From e36b34d8ccc63bb1a8d5061dab8baa4af6e82da7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jul 2023 17:31:43 -0400 Subject: [PATCH 021/109] chore: Release sqlparser version 0.36.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 03a08f66..6a286954 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.35.0" +version = "0.36.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 3a412152b97678406729d4e8134dd79a78dc3a43 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 21 Jul 2023 05:55:41 -0400 Subject: [PATCH 022/109] fix parsing of identifiers after `%` symbol (#927) --- src/test_utils.rs | 16 +++++++++++----- src/tokenizer.rs | 8 +++++--- tests/sqlparser_common.rs | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 47fb00d5..0ec59509 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -116,6 +116,16 @@ impl TestedDialects { only_statement } + /// Ensures that `sql` parses as an [`Expr`], and that + /// re-serializing the parse result produces canonical + pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr { + let ast = self + .run_parser_method(sql, |parser| parser.parse_expr()) + .unwrap(); + assert_eq!(canonical, &ast.to_string()); + ast + } + /// Ensures that `sql` parses as a single [Statement], and that /// re-serializing the parse result produces the same `sql` /// string (is not modified after a serialization round-trip). @@ -147,11 +157,7 @@ impl TestedDialects { /// re-serializing the parse result produces the same `sql` /// string (is not modified after a serialization round-trip). pub fn verified_expr(&self, sql: &str) -> Expr { - let ast = self - .run_parser_method(sql, |parser| parser.parse_expr()) - .unwrap(); - assert_eq!(sql, &ast.to_string(), "round-tripping without changes"); - ast + self.expr_parses_to(sql, sql) } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 83e9f317..f20e01b7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -424,6 +424,7 @@ struct State<'a> { } impl<'a> State<'a> { + /// return the next character and advance the stream pub fn next(&mut self) -> Option { match self.peekable.next() { None => None, @@ -439,6 +440,7 @@ impl<'a> State<'a> { } } + /// return the next character but do not advance the stream pub fn peek(&mut self) -> Option<&char> { self.peekable.peek() } @@ -849,13 +851,13 @@ impl<'a> Tokenizer<'a> { '+' => self.consume_and_return(chars, Token::Plus), '*' => self.consume_and_return(chars, Token::Mul), '%' => { - chars.next(); + chars.next(); // advance past '%' match chars.peek() { - Some(' ') => self.consume_and_return(chars, Token::Mod), + Some(' ') => Ok(Some(Token::Mod)), Some(sch) if self.dialect.is_identifier_start('%') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } - _ => self.consume_and_return(chars, Token::Mod), + _ => Ok(Some(Token::Mod)), } } '|' => { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 356926e1..a9fd419e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1143,6 +1143,20 @@ fn parse_unary_math_with_multiply() { ); } +#[test] +fn parse_mod() { + use self::Expr::*; + let sql = "a % b"; + assert_eq!( + BinaryOp { + left: Box::new(Identifier(Ident::new("a"))), + op: BinaryOperator::Modulo, + right: Box::new(Identifier(Ident::new("b"))), + }, + verified_expr(sql) + ); +} + fn pg_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})], @@ -1178,6 +1192,24 @@ fn parse_json_ops_without_colon() { } } +#[test] +fn parse_mod_no_spaces() { + use self::Expr::*; + let canonical = "a1 % b1"; + let sqls = ["a1 % b1", "a1% b1", "a1 %b1", "a1%b1"]; + for sql in sqls { + println!("Parsing {sql}"); + assert_eq!( + BinaryOp { + left: Box::new(Identifier(Ident::new("a1"))), + op: BinaryOperator::Modulo, + right: Box::new(Identifier(Ident::new("b1"))), + }, + pg_and_generic().expr_parses_to(sql, canonical) + ); + } +} + #[test] fn parse_is_null() { use self::Expr::*; From 91ef061254b9b962ae9b07d7bff365a729bd9ab2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 21 Jul 2023 06:00:48 -0400 Subject: [PATCH 023/109] Changelog for 0.36.1 (#928) --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5b713e6..8669f058 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.36.1] 2023-07-19 + +### Fixed +* Fix parsing of identifiers after '%' symbol (#927) - Thanks @alamb + ## [0.36.0] 2023-07-19 ### Added From f60a6f758ce12aa84fa3801a38c9501f55f245ef Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 21 Jul 2023 05:16:48 -0500 Subject: [PATCH 024/109] chore: Release sqlparser version 0.36.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6a286954..777e9f4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.36.0" +version = "0.36.1" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 53593f1982b343f04943c2be85a947a1c041b786 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Wed, 26 Jul 2023 22:47:02 +0300 Subject: [PATCH 025/109] Fix parsing of datetime functions without parenthesis (#930) --- src/parser.rs | 9 ++-- tests/sqlparser_common.rs | 101 +++++++++----------------------------- 2 files changed, 29 insertions(+), 81 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 790ba8fb..e0c82c5e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -957,17 +957,18 @@ impl<'a> Parser<'a> { } pub fn parse_time_functions(&mut self, name: ObjectName) -> Result { - let (args, order_by) = if self.consume_token(&Token::LParen) { - self.parse_optional_args_with_orderby()? + let (args, order_by, special) = if self.consume_token(&Token::LParen) { + let (args, order_by) = self.parse_optional_args_with_orderby()?; + (args, order_by, false) } else { - (vec![], vec![]) + (vec![], vec![], true) }; Ok(Expr::Function(Function { name, args, over: None, distinct: false, - special: false, + special, order_by, })) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a9fd419e..31f25220 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6703,90 +6703,37 @@ fn parse_offset_and_limit() { #[test] fn parse_time_functions() { - let sql = "SELECT CURRENT_TIMESTAMP()"; - let select = verified_only_select(sql); - assert_eq!( - &Expr::Function(Function { - name: ObjectName(vec![Ident::new("CURRENT_TIMESTAMP")]), - args: vec![], - over: None, - distinct: false, - special: false, - order_by: vec![], - }), - expr_from_projection(&select.projection[0]) - ); - - // Validating Parenthesis - one_statement_parses_to("SELECT CURRENT_TIMESTAMP", sql); - - let sql = "SELECT CURRENT_TIME()"; - let select = verified_only_select(sql); - assert_eq!( - &Expr::Function(Function { - name: ObjectName(vec![Ident::new("CURRENT_TIME")]), - args: vec![], - over: None, - distinct: false, - special: false, - order_by: vec![], - }), - expr_from_projection(&select.projection[0]) - ); - - // Validating Parenthesis - one_statement_parses_to("SELECT CURRENT_TIME", sql); - - let sql = "SELECT CURRENT_DATE()"; - let select = verified_only_select(sql); - assert_eq!( - &Expr::Function(Function { - name: ObjectName(vec![Ident::new("CURRENT_DATE")]), - args: vec![], - over: None, - distinct: false, - special: false, - order_by: vec![], - }), - expr_from_projection(&select.projection[0]) - ); - - // Validating Parenthesis - one_statement_parses_to("SELECT CURRENT_DATE", sql); - - let sql = "SELECT LOCALTIME()"; - let select = verified_only_select(sql); - assert_eq!( - &Expr::Function(Function { - name: ObjectName(vec![Ident::new("LOCALTIME")]), + fn test_time_function(func_name: &'static str) { + let sql = format!("SELECT {}()", func_name); + let select = verified_only_select(&sql); + let select_localtime_func_call_ast = Function { + name: ObjectName(vec![Ident::new(func_name)]), args: vec![], over: None, distinct: false, special: false, order_by: vec![], - }), - expr_from_projection(&select.projection[0]) - ); - - // Validating Parenthesis - one_statement_parses_to("SELECT LOCALTIME", sql); + }; + assert_eq!( + &Expr::Function(select_localtime_func_call_ast.clone()), + expr_from_projection(&select.projection[0]) + ); - let sql = "SELECT LOCALTIMESTAMP()"; - let select = verified_only_select(sql); - assert_eq!( - &Expr::Function(Function { - name: ObjectName(vec![Ident::new("LOCALTIMESTAMP")]), - args: vec![], - over: None, - distinct: false, - special: false, - order_by: vec![], - }), - expr_from_projection(&select.projection[0]) - ); + // Validating Parenthesis + let sql_without_parens = format!("SELECT {}", func_name); + let mut ast_without_parens = select_localtime_func_call_ast.clone(); + ast_without_parens.special = true; + assert_eq!( + &Expr::Function(ast_without_parens.clone()), + expr_from_projection(&verified_only_select(&sql_without_parens).projection[0]) + ); + } - // Validating Parenthesis - one_statement_parses_to("SELECT LOCALTIMESTAMP", sql); + test_time_function("CURRENT_TIMESTAMP"); + test_time_function("CURRENT_TIME"); + test_time_function("CURRENT_DATE"); + test_time_function("LOCALTIME"); + test_time_function("LOCALTIMESTAMP"); } #[test] From 0ddb85341034f95eb8dff22b385b7d0c21006de2 Mon Sep 17 00:00:00 2001 From: Kikkon <19528375+Kikkon@users.noreply.github.com> Date: Thu, 27 Jul 2023 18:20:24 +0800 Subject: [PATCH 026/109] feat: support pg type alias (#933) --- src/ast/data_type.rs | 61 +++++++++++++++++++++++++++++-- src/keywords.rs | 6 ++++ src/parser.rs | 27 ++++++++++++++ tests/sqlparser_postgres.rs | 71 +++++++++++++++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 2 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 647e7b35..bc4ecd4f 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -97,6 +97,14 @@ pub enum DataType { TinyInt(Option), /// Unsigned tiny integer with optional display width e.g. TINYINT UNSIGNED or TINYINT(3) UNSIGNED UnsignedTinyInt(Option), + /// Int2 as alias for SmallInt in [postgresql] + /// Note: Int2 mean 2 bytes in postgres (not 2 bits) + /// Int2 with optional display width e.g. INT2 or INT2(5) + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + Int2(Option), + /// Unsigned Int2 with optional display width e.g. INT2 Unsigned or INT2(5) Unsigned + UnsignedInt2(Option), /// Small integer with optional display width e.g. SMALLINT or SMALLINT(5) SmallInt(Option), /// Unsigned small integer with optional display width e.g. SMALLINT UNSIGNED or SMALLINT(5) UNSIGNED @@ -109,20 +117,44 @@ pub enum DataType { /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html UnsignedMediumInt(Option), - /// Integer with optional display width e.g. INT or INT(11) + /// Int with optional display width e.g. INT or INT(11) Int(Option), + /// Int4 as alias for Integer in [postgresql] + /// Note: Int4 mean 4 bytes in postgres (not 4 bits) + /// Int4 with optional display width e.g. Int4 or Int4(11) + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + Int4(Option), /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), - /// Unsigned integer with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED + /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED UnsignedInt(Option), + /// Unsigned int4 with optional display width e.g. INT4 UNSIGNED or INT4(11) UNSIGNED + UnsignedInt4(Option), /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED UnsignedInteger(Option), /// Big integer with optional display width e.g. BIGINT or BIGINT(20) BigInt(Option), /// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED UnsignedBigInt(Option), + /// Int8 as alias for Bigint in [postgresql] + /// Note: Int8 mean 8 bytes in postgres (not 8 bits) + /// Int8 with optional display width e.g. INT8 or INT8(11) + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + Int8(Option), + /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED + UnsignedInt8(Option), + /// FLOAT4 as alias for Real in [postgresql] + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + FLOAT4, /// Floating point e.g. REAL Real, + /// FLOAT8 as alias for Double in [postgresql] + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + FLOAT8, /// Double Double, /// Double PRECISION e.g. [standard], [postgresql] @@ -130,6 +162,10 @@ pub enum DataType { /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#approximate-numeric-type /// [postgresql]: https://www.postgresql.org/docs/current/datatype-numeric.html DoublePrecision, + /// Bool as alias for Boolean in [postgresql] + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + Bool, /// Boolean Boolean, /// Date @@ -213,6 +249,12 @@ impl fmt::Display for DataType { DataType::UnsignedTinyInt(zerofill) => { format_type_with_optional_length(f, "TINYINT", zerofill, true) } + DataType::Int2(zerofill) => { + format_type_with_optional_length(f, "INT2", zerofill, false) + } + DataType::UnsignedInt2(zerofill) => { + format_type_with_optional_length(f, "INT2", zerofill, true) + } DataType::SmallInt(zerofill) => { format_type_with_optional_length(f, "SMALLINT", zerofill, false) } @@ -229,6 +271,12 @@ impl fmt::Display for DataType { DataType::UnsignedInt(zerofill) => { format_type_with_optional_length(f, "INT", zerofill, true) } + DataType::Int4(zerofill) => { + format_type_with_optional_length(f, "INT4", zerofill, false) + } + DataType::UnsignedInt4(zerofill) => { + format_type_with_optional_length(f, "INT4", zerofill, true) + } DataType::Integer(zerofill) => { format_type_with_optional_length(f, "INTEGER", zerofill, false) } @@ -241,9 +289,18 @@ impl fmt::Display for DataType { DataType::UnsignedBigInt(zerofill) => { format_type_with_optional_length(f, "BIGINT", zerofill, true) } + DataType::Int8(zerofill) => { + format_type_with_optional_length(f, "INT8", zerofill, false) + } + DataType::UnsignedInt8(zerofill) => { + format_type_with_optional_length(f, "INT8", zerofill, true) + } DataType::Real => write!(f, "REAL"), + DataType::FLOAT4 => write!(f, "FLOAT4"), DataType::Double => write!(f, "DOUBLE"), + DataType::FLOAT8 => write!(f, "FLOAT8"), DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"), + DataType::Bool => write!(f, "BOOL"), DataType::Boolean => write!(f, "BOOLEAN"), DataType::Date => write!(f, "DATE"), DataType::Time(precision, timezone_info) => { diff --git a/src/keywords.rs b/src/keywords.rs index 80c605dd..98e03941 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -111,6 +111,7 @@ define_keywords!( BINARY, BLOB, BLOOMFILTER, + BOOL, BOOLEAN, BOTH, BTREE, @@ -263,6 +264,8 @@ define_keywords!( FIRST, FIRST_VALUE, FLOAT, + FLOAT4, + FLOAT8, FLOOR, FOLLOWING, FOR, @@ -317,6 +320,9 @@ define_keywords!( INSENSITIVE, INSERT, INT, + INT2, + INT4, + INT8, INTEGER, INTERSECT, INTERSECTION, diff --git a/src/parser.rs b/src/parser.rs index e0c82c5e..b8f36332 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4560,8 +4560,11 @@ impl<'a> Parser<'a> { let mut data = match next_token.token { Token::Word(w) => match w.keyword { Keyword::BOOLEAN => Ok(DataType::Boolean), + Keyword::BOOL => Ok(DataType::Bool), Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), + Keyword::FLOAT4 => Ok(DataType::FLOAT4), + Keyword::FLOAT8 => Ok(DataType::FLOAT8), Keyword::DOUBLE => { if self.parse_keyword(Keyword::PRECISION) { Ok(DataType::DoublePrecision) @@ -4577,6 +4580,14 @@ impl<'a> Parser<'a> { Ok(DataType::TinyInt(optional_precision?)) } } + Keyword::INT2 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt2(optional_precision?)) + } else { + Ok(DataType::Int2(optional_precision?)) + } + } Keyword::SMALLINT => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -4601,6 +4612,14 @@ impl<'a> Parser<'a> { Ok(DataType::Int(optional_precision?)) } } + Keyword::INT4 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt4(optional_precision?)) + } else { + Ok(DataType::Int4(optional_precision?)) + } + } Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -4617,6 +4636,14 @@ impl<'a> Parser<'a> { Ok(DataType::BigInt(optional_precision?)) } } + Keyword::INT8 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt8(optional_precision?)) + } else { + Ok(DataType::Int8(optional_precision?)) + } + } Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)), Keyword::CHARACTER => { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 024ead6a..fe55b741 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2945,3 +2945,74 @@ fn parse_truncate() { truncate ); } + +#[test] +fn parse_create_table_with_alias() { + let sql = "CREATE TABLE public.datatype_aliases + ( + int8_col INT8, + int4_col INT4, + int2_col INT2, + float8_col FLOAT8, + float4_col FLOAT4, + bool_col BOOL, + );"; + match pg_and_generic().one_statement_parses_to(sql, "") { + Statement::CreateTable { + name, + columns, + constraints, + with_options: _with_options, + if_not_exists: false, + external: false, + file_format: None, + location: None, + .. + } => { + assert_eq!("public.datatype_aliases", name.to_string()); + assert_eq!( + columns, + vec![ + ColumnDef { + name: "int8_col".into(), + data_type: DataType::Int8(None), + collation: None, + options: vec![] + }, + ColumnDef { + name: "int4_col".into(), + data_type: DataType::Int4(None), + collation: None, + options: vec![] + }, + ColumnDef { + name: "int2_col".into(), + data_type: DataType::Int2(None), + collation: None, + options: vec![] + }, + ColumnDef { + name: "float8_col".into(), + data_type: DataType::FLOAT8, + collation: None, + options: vec![] + }, + ColumnDef { + name: "float4_col".into(), + data_type: DataType::FLOAT4, + collation: None, + options: vec![] + }, + ColumnDef { + name: "bool_col".into(), + data_type: DataType::Bool, + collation: None, + options: vec![] + }, + ] + ); + assert!(constraints.is_empty()); + } + _ => unreachable!(), + } +} From 10a6ec56371e32f5fa145074e797f8ec74f6d812 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Thu, 27 Jul 2023 14:32:55 +0300 Subject: [PATCH 027/109] Fix "BEGIN TRANSACTION" being serialized as "START TRANSACTION" (#935) --- src/ast/mod.rs | 22 ++++++++++++++++++---- src/parser.rs | 2 ++ tests/sqlparser_common.rs | 10 +++++----- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1cf5a768..d08ef0e2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1540,8 +1540,15 @@ pub enum Statement { /// /// Note: This is a MySQL-specific statement. Use { db_name: Ident }, - /// `{ BEGIN [ TRANSACTION | WORK ] | START TRANSACTION } ...` - StartTransaction { modes: Vec }, + /// `START [ TRANSACTION | WORK ] | START TRANSACTION } ...` + /// If `begin` is false. + /// + /// `BEGIN [ TRANSACTION | WORK ] | START TRANSACTION } ...` + /// If `begin` is true + StartTransaction { + modes: Vec, + begin: bool, + }, /// `SET TRANSACTION ...` SetTransaction { modes: Vec, @@ -2720,8 +2727,15 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::StartTransaction { modes } => { - write!(f, "START TRANSACTION")?; + Statement::StartTransaction { + modes, + begin: syntax_begin, + } => { + if *syntax_begin { + write!(f, "BEGIN TRANSACTION")?; + } else { + write!(f, "START TRANSACTION")?; + } if !modes.is_empty() { write!(f, " {}", display_comma_separated(modes))?; } diff --git a/src/parser.rs b/src/parser.rs index b8f36332..7dd07d05 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6905,6 +6905,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::TRANSACTION)?; Ok(Statement::StartTransaction { modes: self.parse_transaction_modes()?, + begin: false, }) } @@ -6912,6 +6913,7 @@ impl<'a> Parser<'a> { let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); Ok(Statement::StartTransaction { modes: self.parse_transaction_modes()?, + begin: true, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 31f25220..444e6d34 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5732,7 +5732,7 @@ fn lateral_derived() { #[test] fn parse_start_transaction() { match verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") { - Statement::StartTransaction { modes } => assert_eq!( + Statement::StartTransaction { modes, .. } => assert_eq!( modes, vec![ TransactionMode::AccessMode(TransactionAccessMode::ReadOnly), @@ -5749,7 +5749,7 @@ fn parse_start_transaction() { "START TRANSACTION READ ONLY READ WRITE ISOLATION LEVEL SERIALIZABLE", "START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE", ) { - Statement::StartTransaction { modes } => assert_eq!( + Statement::StartTransaction { modes, .. } => assert_eq!( modes, vec![ TransactionMode::AccessMode(TransactionAccessMode::ReadOnly), @@ -5761,9 +5761,9 @@ fn parse_start_transaction() { } verified_stmt("START TRANSACTION"); - one_statement_parses_to("BEGIN", "START TRANSACTION"); - one_statement_parses_to("BEGIN WORK", "START TRANSACTION"); - one_statement_parses_to("BEGIN TRANSACTION", "START TRANSACTION"); + one_statement_parses_to("BEGIN", "BEGIN TRANSACTION"); + one_statement_parses_to("BEGIN WORK", "BEGIN TRANSACTION"); + one_statement_parses_to("BEGIN TRANSACTION", "BEGIN TRANSACTION"); verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"); verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED"); From eb4be989806f3f325ffadcdf5821b81f325ec22d Mon Sep 17 00:00:00 2001 From: liadgiladi <51291468+liadgiladi@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:54:24 +0300 Subject: [PATCH 028/109] Support `DROP TEMPORARY TABLE`, MySQL syntax (#916) --- src/ast/mod.rs | 6 +++++- src/parser.rs | 5 +++++ tests/sqlparser_common.rs | 4 ++++ tests/sqlparser_mysql.rs | 26 ++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d08ef0e2..ad8c7629 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1401,6 +1401,8 @@ pub enum Statement { /// Hive allows you specify whether the table's stored data will be /// deleted along with the dropped table purge: bool, + /// MySQL-specific "TEMPORARY" keyword + temporary: bool, }, /// DROP Function DropFunction { @@ -2572,9 +2574,11 @@ impl fmt::Display for Statement { cascade, restrict, purge, + temporary, } => write!( f, - "DROP {}{} {}{}{}{}", + "DROP {}{}{} {}{}{}{}", + if *temporary { "TEMPORARY " } else { "" }, object_type, if *if_exists { " IF EXISTS" } else { "" }, display_comma_separated(names), diff --git a/src/parser.rs b/src/parser.rs index 7dd07d05..91e21ec3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3127,6 +3127,10 @@ impl<'a> Parser<'a> { } pub fn parse_drop(&mut self) -> Result { + // MySQL dialect supports `TEMPORARY` + let temporary = dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::TEMPORARY); + let object_type = if self.parse_keyword(Keyword::TABLE) { ObjectType::Table } else if self.parse_keyword(Keyword::VIEW) { @@ -3169,6 +3173,7 @@ impl<'a> Parser<'a> { cascade, restrict, purge, + temporary, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 444e6d34..9ec182f2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5432,6 +5432,7 @@ fn parse_drop_table() { names, cascade, purge: _, + temporary, .. } => { assert!(!if_exists); @@ -5441,6 +5442,7 @@ fn parse_drop_table() { names.iter().map(ToString::to_string).collect::>() ); assert!(!cascade); + assert!(!temporary); } _ => unreachable!(), } @@ -5453,6 +5455,7 @@ fn parse_drop_table() { names, cascade, purge: _, + temporary, .. } => { assert!(if_exists); @@ -5462,6 +5465,7 @@ fn parse_drop_table() { names.iter().map(ToString::to_string).collect::>() ); assert!(cascade); + assert!(!temporary); } _ => unreachable!(), } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ae95de2e..07337ec7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1597,3 +1597,29 @@ fn parse_string_introducers() { fn parse_div_infix() { mysql().verified_stmt(r#"SELECT 5 DIV 2"#); } + +#[test] +fn parse_drop_temporary_table() { + let sql = "DROP TEMPORARY TABLE foo"; + match mysql().verified_stmt(sql) { + Statement::Drop { + object_type, + if_exists, + names, + cascade, + purge: _, + temporary, + .. + } => { + assert!(!if_exists); + assert_eq!(ObjectType::Table, object_type); + assert_eq!( + vec!["foo"], + names.iter().map(ToString::to_string).collect::>() + ); + assert!(!cascade); + assert!(temporary); + } + _ => unreachable!(), + } +} From 173a6db8189a0f1bb9896117c86374c4e691f0f3 Mon Sep 17 00:00:00 2001 From: Kikkon <19528375+Kikkon@users.noreply.github.com> Date: Mon, 7 Aug 2023 22:55:42 +0800 Subject: [PATCH 029/109] Fix: use Rust idiomatic capitalization for newly added `DataType` enums (#939) --- src/ast/data_type.rs | 12 ++++++------ src/parser.rs | 4 ++-- tests/sqlparser_postgres.rs | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index bc4ecd4f..2a6a004f 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -145,16 +145,16 @@ pub enum DataType { Int8(Option), /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED UnsignedInt8(Option), - /// FLOAT4 as alias for Real in [postgresql] + /// Float4 as alias for Real in [postgresql] /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html - FLOAT4, + Float4, /// Floating point e.g. REAL Real, - /// FLOAT8 as alias for Double in [postgresql] + /// Float8 as alias for Double in [postgresql] /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html - FLOAT8, + Float8, /// Double Double, /// Double PRECISION e.g. [standard], [postgresql] @@ -296,9 +296,9 @@ impl fmt::Display for DataType { format_type_with_optional_length(f, "INT8", zerofill, true) } DataType::Real => write!(f, "REAL"), - DataType::FLOAT4 => write!(f, "FLOAT4"), + DataType::Float4 => write!(f, "FLOAT4"), DataType::Double => write!(f, "DOUBLE"), - DataType::FLOAT8 => write!(f, "FLOAT8"), + DataType::Float8 => write!(f, "FLOAT8"), DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"), DataType::Bool => write!(f, "BOOL"), DataType::Boolean => write!(f, "BOOLEAN"), diff --git a/src/parser.rs b/src/parser.rs index 91e21ec3..20e587c7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4568,8 +4568,8 @@ impl<'a> Parser<'a> { Keyword::BOOL => Ok(DataType::Bool), Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), - Keyword::FLOAT4 => Ok(DataType::FLOAT4), - Keyword::FLOAT8 => Ok(DataType::FLOAT8), + Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT8 => Ok(DataType::Float8), Keyword::DOUBLE => { if self.parse_keyword(Keyword::PRECISION) { Ok(DataType::DoublePrecision) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe55b741..f6bb80d7 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2993,13 +2993,13 @@ fn parse_create_table_with_alias() { }, ColumnDef { name: "float8_col".into(), - data_type: DataType::FLOAT8, + data_type: DataType::Float8, collation: None, options: vec![] }, ColumnDef { name: "float4_col".into(), - data_type: DataType::FLOAT4, + data_type: DataType::Float4, collation: None, options: vec![] }, From 8bbb85356ceb50e2d45545da0e8a15da3680353e Mon Sep 17 00:00:00 2001 From: Jeremy Maness Date: Thu, 17 Aug 2023 06:17:57 -0400 Subject: [PATCH 030/109] Fix SUBSTRING from/to argument construction for mssql (#947) --- src/ast/mod.rs | 17 +++++++++-- src/dialect/mod.rs | 4 +++ src/dialect/mssql.rs | 4 +++ src/parser.rs | 56 ++++++++++++++++++++++++++----------- tests/sqlparser_common.rs | 34 +++++++++++++++++++--- tests/sqlparser_mssql.rs | 59 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 3 +- 7 files changed, 153 insertions(+), 24 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ad8c7629..0a366f63 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -477,6 +477,10 @@ pub enum Expr { expr: Box, substring_from: Option>, substring_for: Option>, + + // Some dialects use `SUBSTRING(expr [FROM start] [FOR len])` syntax while others omit FROM, + // FOR keywords (e.g. Microsoft SQL Server). This flags is used for formatting. + special: bool, }, /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) @@ -830,13 +834,22 @@ impl fmt::Display for Expr { expr, substring_from, substring_for, + special, } => { write!(f, "SUBSTRING({expr}")?; if let Some(from_part) = substring_from { - write!(f, " FROM {from_part}")?; + if *special { + write!(f, ", {from_part}")?; + } else { + write!(f, " FROM {from_part}")?; + } } if let Some(for_part) = substring_for { - write!(f, " FOR {for_part}")?; + if *special { + write!(f, ", {for_part}")?; + } else { + write!(f, " FOR {for_part}")?; + } } write!(f, ")") diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8b3a5888..e174528b 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -117,6 +117,10 @@ pub trait Dialect: Debug + Any { fn supports_group_by_expr(&self) -> bool { false } + /// Returns true if the dialect supports `SUBSTRING(expr [FROM start] [FOR len])` expressions + fn supports_substring_from_for_expr(&self) -> bool { + true + } /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 6d1f49cd..f0439810 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -34,4 +34,8 @@ impl Dialect for MsSqlDialect { || ch == '#' || ch == '_' } + + fn supports_substring_from_for_expr(&self) -> bool { + false + } } diff --git a/src/parser.rs b/src/parser.rs index 20e587c7..5d44ce9c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1223,25 +1223,47 @@ impl<'a> Parser<'a> { } pub fn parse_substring_expr(&mut self) -> Result { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - let mut from_expr = None; - if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) { - from_expr = Some(self.parse_expr()?); - } + if self.dialect.supports_substring_from_for_expr() { + // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + let mut from_expr = None; + if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) { + from_expr = Some(self.parse_expr()?); + } - let mut to_expr = None; - if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { - to_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; + let mut to_expr = None; + if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { + to_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), - }) + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + special: !self.dialect.supports_substring_from_for_expr(), + }) + } else { + // PARSE SUBSTRING(EXPR, start, length) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + + self.expect_token(&Token::Comma)?; + let from_expr = Some(self.parse_expr()?); + + self.expect_token(&Token::Comma)?; + let to_expr = Some(self.parse_expr()?); + + self.expect_token(&Token::RParen)?; + + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + special: !self.dialect.supports_substring_from_for_expr(), + }) + } } pub fn parse_overlay_expr(&mut self) -> Result { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9ec182f2..ca30e951 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5084,19 +5084,45 @@ fn parse_scalar_subqueries() { #[test] fn parse_substring() { - one_statement_parses_to("SELECT SUBSTRING('1')", "SELECT SUBSTRING('1')"); + let from_for_supported_dialects = TestedDialects { + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(AnsiDialect {}), + Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}), + Box::new(RedshiftSqlDialect {}), + Box::new(MySqlDialect {}), + Box::new(BigQueryDialect {}), + Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), + ], + options: None, + }; - one_statement_parses_to( + let from_for_unsupported_dialects = TestedDialects { + dialects: vec![Box::new(MsSqlDialect {})], + options: None, + }; + + from_for_supported_dialects + .one_statement_parses_to("SELECT SUBSTRING('1')", "SELECT SUBSTRING('1')"); + + from_for_supported_dialects.one_statement_parses_to( "SELECT SUBSTRING('1' FROM 1)", "SELECT SUBSTRING('1' FROM 1)", ); - one_statement_parses_to( + from_for_supported_dialects.one_statement_parses_to( "SELECT SUBSTRING('1' FROM 1 FOR 3)", "SELECT SUBSTRING('1' FROM 1 FOR 3)", ); - one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)"); + from_for_unsupported_dialects + .one_statement_parses_to("SELECT SUBSTRING('1', 1, 3)", "SELECT SUBSTRING('1', 1, 3)"); + + from_for_supported_dialects + .one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)"); } #[test] diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index b46a0c6c..d4e093ad 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -379,6 +379,65 @@ fn parse_similar_to() { chk(true); } +#[test] +fn parse_substring_in_select() { + let sql = "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test"; + match ms().one_statement_parses_to( + sql, + "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test", + ) { + Statement::Query(query) => { + assert_eq!( + Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: Some(Distinct::Distinct), + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Substring { + expr: Box::new(Expr::Identifier(Ident { + value: "description".to_string(), + quote_style: None + })), + substring_from: Some(Box::new(Expr::Value(number("0")))), + substring_for: Some(Box::new(Expr::Value(number("1")))), + special: true, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None + }]), + alias: None, + args: None, + with_hints: vec![] + }, + joins: vec![] + }], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + }), + query + ); + } + _ => unreachable!(), + } +} + fn ms() -> TestedDialects { TestedDialects { dialects: vec![Box::new(MsSqlDialect {})], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 07337ec7..9618786d 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1271,7 +1271,8 @@ fn parse_substring_in_select() { quote_style: None })), substring_from: Some(Box::new(Expr::Value(number("0")))), - substring_for: Some(Box::new(Expr::Value(number("1")))) + substring_for: Some(Box::new(Expr::Value(number("1")))), + special: false, })], into: None, from: vec![TableWithJoins { From 83e30677b071e7dd8b595c414b98f90995a2a7cf Mon Sep 17 00:00:00 2001 From: ehoeve Date: Thu, 17 Aug 2023 12:44:55 +0200 Subject: [PATCH 031/109] Add support for table-level comments (#946) Co-authored-by: Andrew Lamb --- src/ast/helpers/stmt_create_table.rs | 10 ++++++++++ src/ast/mod.rs | 5 +++++ src/parser.rs | 12 ++++++++++++ tests/sqlparser_mysql.rs | 16 ++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 2998935d..a00555fd 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -65,6 +65,7 @@ pub struct CreateTableBuilder { pub like: Option, pub clone: Option, pub engine: Option, + pub comment: Option, pub default_charset: Option, pub collation: Option, pub on_commit: Option, @@ -96,6 +97,7 @@ impl CreateTableBuilder { like: None, clone: None, engine: None, + comment: None, default_charset: None, collation: None, on_commit: None, @@ -197,6 +199,11 @@ impl CreateTableBuilder { self } + pub fn comment(mut self, comment: Option) -> Self { + self.comment = comment; + self + } + pub fn default_charset(mut self, default_charset: Option) -> Self { self.default_charset = default_charset; self @@ -249,6 +256,7 @@ impl CreateTableBuilder { like: self.like, clone: self.clone, engine: self.engine, + comment: self.comment, default_charset: self.default_charset, collation: self.collation, on_commit: self.on_commit, @@ -287,6 +295,7 @@ impl TryFrom for CreateTableBuilder { like, clone, engine, + comment, default_charset, collation, on_commit, @@ -314,6 +323,7 @@ impl TryFrom for CreateTableBuilder { like, clone, engine, + comment, default_charset, collation, on_commit, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 0a366f63..41a5530a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1319,6 +1319,7 @@ pub enum Statement { like: Option, clone: Option, engine: Option, + comment: Option, default_charset: Option, collation: Option, on_commit: Option, @@ -2250,6 +2251,7 @@ impl fmt::Display for Statement { clone, default_charset, engine, + comment, collation, on_commit, on_cluster, @@ -2401,6 +2403,9 @@ impl fmt::Display for Statement { if let Some(engine) = engine { write!(f, " ENGINE={engine}")?; } + if let Some(comment) = comment { + write!(f, " COMMENT '{comment}'")?; + } if let Some(order_by) = order_by { write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; } diff --git a/src/parser.rs b/src/parser.rs index 5d44ce9c..34456e09 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3503,6 +3503,17 @@ impl<'a> Parser<'a> { None }; + let comment = if self.parse_keyword(Keyword::COMMENT) { + let _ = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(str), + _ => self.expected("comment", next_token)?, + } + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { let columns = if self.peek_token() != Token::RParen { @@ -3583,6 +3594,7 @@ impl<'a> Parser<'a> { .like(like) .clone_clause(clone) .engine(engine) + .comment(comment) .order_by(order_by) .default_charset(default_charset) .collation(collation) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 9618786d..c0a51eda 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -296,6 +296,22 @@ fn parse_create_table_auto_increment() { } } +#[test] +fn parse_create_table_comment() { + let canonical = "CREATE TABLE foo (bar INT) COMMENT 'baz'"; + let with_equal = "CREATE TABLE foo (bar INT) COMMENT = 'baz'"; + + for sql in [canonical, with_equal] { + match mysql().one_statement_parses_to(sql, canonical) { + Statement::CreateTable { name, comment, .. } => { + assert_eq!(name.to_string(), "foo"); + assert_eq!(comment.expect("Should exist").to_string(), "baz"); + } + _ => unreachable!(), + } + } +} + #[test] fn parse_create_table_set_enum() { let sql = "CREATE TABLE foo (bar SET('a', 'b'), baz ENUM('a', 'b'))"; From a7d28582e557b6d7c177d5e98809de1090581fc6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Aug 2023 06:45:18 -0400 Subject: [PATCH 032/109] Minor: clarify the value of the special flag (#948) --- src/parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 34456e09..fcac8f23 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1242,7 +1242,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), substring_from: from_expr.map(Box::new), substring_for: to_expr.map(Box::new), - special: !self.dialect.supports_substring_from_for_expr(), + special: false, }) } else { // PARSE SUBSTRING(EXPR, start, length) @@ -1261,7 +1261,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), substring_from: from_expr.map(Box::new), substring_for: to_expr.map(Box::new), - special: !self.dialect.supports_substring_from_for_expr(), + special: true, }) } } From a49ea1908d3862f3471e3e1304c15926314eda48 Mon Sep 17 00:00:00 2001 From: "r.4ntix" Date: Thu, 17 Aug 2023 20:05:54 +0800 Subject: [PATCH 033/109] feat: add `ALTER ROLE` syntax of PostgreSQL and MS SQL Server (#942) --- src/ast/dcl.rs | 195 +++++++++++++++++++++++++++++ src/ast/mod.rs | 10 ++ src/keywords.rs | 1 + src/parser/alter.rs | 204 +++++++++++++++++++++++++++++++ src/{parser.rs => parser/mod.rs} | 11 +- tests/sqlparser_mssql.rs | 54 ++++++++ tests/sqlparser_postgres.rs | 193 +++++++++++++++++++++++++++++ 7 files changed, 666 insertions(+), 2 deletions(-) create mode 100644 src/ast/dcl.rs create mode 100644 src/parser/alter.rs rename src/{parser.rs => parser/mod.rs} (99%) diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs new file mode 100644 index 00000000..f90de34d --- /dev/null +++ b/src/ast/dcl.rs @@ -0,0 +1,195 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! AST types specific to GRANT/REVOKE/ROLE variants of [`Statement`](crate::ast::Statement) +//! (commonly referred to as Data Control Language, or DCL) + +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +use core::fmt; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "visitor")] +use sqlparser_derive::{Visit, VisitMut}; + +use super::{Expr, Ident, Password}; +use crate::ast::{display_separated, ObjectName}; + +/// An option in `ROLE` statement. +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum RoleOption { + BypassRLS(bool), + ConnectionLimit(Expr), + CreateDB(bool), + CreateRole(bool), + Inherit(bool), + Login(bool), + Password(Password), + Replication(bool), + SuperUser(bool), + ValidUntil(Expr), +} + +impl fmt::Display for RoleOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + RoleOption::BypassRLS(value) => { + write!(f, "{}", if *value { "BYPASSRLS" } else { "NOBYPASSRLS" }) + } + RoleOption::ConnectionLimit(expr) => { + write!(f, "CONNECTION LIMIT {expr}") + } + RoleOption::CreateDB(value) => { + write!(f, "{}", if *value { "CREATEDB" } else { "NOCREATEDB" }) + } + RoleOption::CreateRole(value) => { + write!(f, "{}", if *value { "CREATEROLE" } else { "NOCREATEROLE" }) + } + RoleOption::Inherit(value) => { + write!(f, "{}", if *value { "INHERIT" } else { "NOINHERIT" }) + } + RoleOption::Login(value) => { + write!(f, "{}", if *value { "LOGIN" } else { "NOLOGIN" }) + } + RoleOption::Password(password) => match password { + Password::Password(expr) => write!(f, "PASSWORD {expr}"), + Password::NullPassword => write!(f, "PASSWORD NULL"), + }, + RoleOption::Replication(value) => { + write!( + f, + "{}", + if *value { + "REPLICATION" + } else { + "NOREPLICATION" + } + ) + } + RoleOption::SuperUser(value) => { + write!(f, "{}", if *value { "SUPERUSER" } else { "NOSUPERUSER" }) + } + RoleOption::ValidUntil(expr) => { + write!(f, "VALID UNTIL {expr}") + } + } + } +} + +/// SET config value option: +/// * SET `configuration_parameter` { TO | = } { `value` | DEFAULT } +/// * SET `configuration_parameter` FROM CURRENT +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SetConfigValue { + Default, + FromCurrent, + Value(Expr), +} + +/// RESET config option: +/// * RESET `configuration_parameter` +/// * RESET ALL +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ResetConfig { + ALL, + ConfigName(ObjectName), +} + +/// An `ALTER ROLE` (`Statement::AlterRole`) operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterRoleOperation { + /// Generic + RenameRole { + role_name: Ident, + }, + /// MS SQL Server + /// + AddMember { + member_name: Ident, + }, + DropMember { + member_name: Ident, + }, + /// PostgreSQL + /// + WithOptions { + options: Vec, + }, + Set { + config_name: ObjectName, + config_value: SetConfigValue, + in_database: Option, + }, + Reset { + config_name: ResetConfig, + in_database: Option, + }, +} + +impl fmt::Display for AlterRoleOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterRoleOperation::RenameRole { role_name } => { + write!(f, "RENAME TO {role_name}") + } + AlterRoleOperation::AddMember { member_name } => { + write!(f, "ADD MEMBER {member_name}") + } + AlterRoleOperation::DropMember { member_name } => { + write!(f, "DROP MEMBER {member_name}") + } + AlterRoleOperation::WithOptions { options } => { + write!(f, "WITH {}", display_separated(options, " ")) + } + AlterRoleOperation::Set { + config_name, + config_value, + in_database, + } => { + if let Some(database_name) = in_database { + write!(f, "IN DATABASE {} ", database_name)?; + } + + match config_value { + SetConfigValue::Default => write!(f, "SET {config_name} TO DEFAULT"), + SetConfigValue::FromCurrent => write!(f, "SET {config_name} FROM CURRENT"), + SetConfigValue::Value(expr) => write!(f, "SET {config_name} TO {expr}"), + } + } + AlterRoleOperation::Reset { + config_name, + in_database, + } => { + if let Some(database_name) = in_database { + write!(f, "IN DATABASE {} ", database_name)?; + } + + match config_name { + ResetConfig::ALL => write!(f, "RESET ALL"), + ResetConfig::ConfigName(name) => write!(f, "RESET {name}"), + } + } + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 41a5530a..edab3c63 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -28,6 +28,7 @@ use sqlparser_derive::{Visit, VisitMut}; pub use self::data_type::{ CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, }; +pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ProcedureParam, ReferentialAction, @@ -52,6 +53,7 @@ use crate::ast::helpers::stmt_data_loading::{ pub use visitor::*; mod data_type; +mod dcl; mod ddl; pub mod helpers; mod operator; @@ -1398,6 +1400,11 @@ pub enum Statement { query: Box, with_options: Vec, }, + /// ALTER ROLE + AlterRole { + name: Ident, + operation: AlterRoleOperation, + }, /// DROP Drop { /// The type of the object to drop: TABLE, VIEW, etc. @@ -2585,6 +2592,9 @@ impl fmt::Display for Statement { } write!(f, " AS {query}") } + Statement::AlterRole { name, operation } => { + write!(f, "ALTER ROLE {name} {operation}") + } Statement::Drop { object_type, if_exists, diff --git a/src/keywords.rs b/src/keywords.rs index 98e03941..5f3e4402 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -508,6 +508,7 @@ define_keywords!( REPEATABLE, REPLACE, REPLICATION, + RESET, RESTRICT, RESULT, RETAIN, diff --git a/src/parser/alter.rs b/src/parser/alter.rs new file mode 100644 index 00000000..838b6489 --- /dev/null +++ b/src/parser/alter.rs @@ -0,0 +1,204 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Parser for ALTER + +#[cfg(not(feature = "std"))] +use alloc::vec; + +use super::{Parser, ParserError}; +use crate::{ + ast::{AlterRoleOperation, Expr, Password, ResetConfig, RoleOption, SetConfigValue, Statement}, + dialect::{MsSqlDialect, PostgreSqlDialect}, + keywords::Keyword, + tokenizer::Token, +}; + +impl<'a> Parser<'a> { + pub fn parse_alter_role(&mut self) -> Result { + if dialect_of!(self is PostgreSqlDialect) { + return self.parse_pg_alter_role(); + } else if dialect_of!(self is MsSqlDialect) { + return self.parse_mssql_alter_role(); + } + + Err(ParserError::ParserError( + "ALTER ROLE is only support for PostgreSqlDialect, MsSqlDialect".into(), + )) + } + + fn parse_mssql_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier()?; + + let operation = if self.parse_keywords(&[Keyword::ADD, Keyword::MEMBER]) { + let member_name = self.parse_identifier()?; + AlterRoleOperation::AddMember { member_name } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::MEMBER]) { + let member_name = self.parse_identifier()?; + AlterRoleOperation::DropMember { member_name } + } else if self.parse_keywords(&[Keyword::WITH, Keyword::NAME]) { + if self.consume_token(&Token::Eq) { + let role_name = self.parse_identifier()?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("= after WITH NAME ", self.peek_token()); + } + } else { + return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + fn parse_pg_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier()?; + + // [ IN DATABASE _`database_name`_ ] + let in_database = if self.parse_keywords(&[Keyword::IN, Keyword::DATABASE]) { + self.parse_object_name().ok() + } else { + None + }; + + let operation = if self.parse_keyword(Keyword::RENAME) { + if self.parse_keyword(Keyword::TO) { + let role_name = self.parse_identifier()?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("TO after RENAME", self.peek_token()); + } + // SET + } else if self.parse_keyword(Keyword::SET) { + let config_name = self.parse_object_name()?; + // FROM CURRENT + if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::FromCurrent, + in_database, + } + // { TO | = } { value | DEFAULT } + } else if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + if self.parse_keyword(Keyword::DEFAULT) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Default, + in_database, + } + } else if let Ok(expr) = self.parse_expr() { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Value(expr), + in_database, + } + } else { + self.expected("config value", self.peek_token())? + } + } else { + self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + } + // RESET + } else if self.parse_keyword(Keyword::RESET) { + if self.parse_keyword(Keyword::ALL) { + AlterRoleOperation::Reset { + config_name: ResetConfig::ALL, + in_database, + } + } else { + let config_name = self.parse_object_name()?; + AlterRoleOperation::Reset { + config_name: ResetConfig::ConfigName(config_name), + in_database, + } + } + // option + } else { + // [ WITH ] + let _ = self.parse_keyword(Keyword::WITH); + // option + let mut options = vec![]; + while let Some(opt) = self.maybe_parse(|parser| parser.parse_pg_role_option()) { + options.push(opt); + } + // check option + if options.is_empty() { + return self.expected("option", self.peek_token())?; + } + + AlterRoleOperation::WithOptions { options } + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + fn parse_pg_role_option(&mut self) -> Result { + let option = match self.parse_one_of_keywords(&[ + Keyword::BYPASSRLS, + Keyword::NOBYPASSRLS, + Keyword::CONNECTION, + Keyword::CREATEDB, + Keyword::NOCREATEDB, + Keyword::CREATEROLE, + Keyword::NOCREATEROLE, + Keyword::INHERIT, + Keyword::NOINHERIT, + Keyword::LOGIN, + Keyword::NOLOGIN, + Keyword::PASSWORD, + Keyword::REPLICATION, + Keyword::NOREPLICATION, + Keyword::SUPERUSER, + Keyword::NOSUPERUSER, + Keyword::VALID, + ]) { + Some(Keyword::BYPASSRLS) => RoleOption::BypassRLS(true), + Some(Keyword::NOBYPASSRLS) => RoleOption::BypassRLS(false), + Some(Keyword::CONNECTION) => { + self.expect_keyword(Keyword::LIMIT)?; + RoleOption::ConnectionLimit(Expr::Value(self.parse_number_value()?)) + } + Some(Keyword::CREATEDB) => RoleOption::CreateDB(true), + Some(Keyword::NOCREATEDB) => RoleOption::CreateDB(false), + Some(Keyword::CREATEROLE) => RoleOption::CreateRole(true), + Some(Keyword::NOCREATEROLE) => RoleOption::CreateRole(false), + Some(Keyword::INHERIT) => RoleOption::Inherit(true), + Some(Keyword::NOINHERIT) => RoleOption::Inherit(false), + Some(Keyword::LOGIN) => RoleOption::Login(true), + Some(Keyword::NOLOGIN) => RoleOption::Login(false), + Some(Keyword::PASSWORD) => { + let password = if self.parse_keyword(Keyword::NULL) { + Password::NullPassword + } else { + Password::Password(Expr::Value(self.parse_value()?)) + }; + RoleOption::Password(password) + } + Some(Keyword::REPLICATION) => RoleOption::Replication(true), + Some(Keyword::NOREPLICATION) => RoleOption::Replication(false), + Some(Keyword::SUPERUSER) => RoleOption::SuperUser(true), + Some(Keyword::NOSUPERUSER) => RoleOption::SuperUser(false), + Some(Keyword::VALID) => { + self.expect_keyword(Keyword::UNTIL)?; + RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) + } + _ => self.expected("option", self.peek_token())?, + }; + + Ok(option) + } +} diff --git a/src/parser.rs b/src/parser/mod.rs similarity index 99% rename from src/parser.rs rename to src/parser/mod.rs index fcac8f23..d6f45359 100644 --- a/src/parser.rs +++ b/src/parser/mod.rs @@ -33,6 +33,8 @@ use crate::dialect::*; use crate::keywords::{self, Keyword}; use crate::tokenizer::*; +mod alter; + #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { TokenizerError(String), @@ -3990,8 +3992,12 @@ impl<'a> Parser<'a> { } pub fn parse_alter(&mut self) -> Result { - let object_type = - self.expect_one_of_keywords(&[Keyword::VIEW, Keyword::TABLE, Keyword::INDEX])?; + let object_type = self.expect_one_of_keywords(&[ + Keyword::VIEW, + Keyword::TABLE, + Keyword::INDEX, + Keyword::ROLE, + ])?; match object_type { Keyword::VIEW => self.parse_alter_view(), Keyword::TABLE => { @@ -4186,6 +4192,7 @@ impl<'a> Parser<'a> { operation, }) } + Keyword::ROLE => self.parse_alter_role(), // unreachable because expect_one_of_keywords used above _ => unreachable!(), } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index d4e093ad..56fbd576 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -216,6 +216,60 @@ fn parse_mssql_create_role() { } } +#[test] +fn parse_alter_role() { + let sql = "ALTER ROLE old_name WITH NAME = new_name"; + assert_eq!( + ms().parse_sql_statements(sql).unwrap(), + [Statement::AlterRole { + name: Ident { + value: "old_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::RenameRole { + role_name: Ident { + value: "new_name".into(), + quote_style: None + } + }, + }] + ); + + let sql = "ALTER ROLE role_name ADD MEMBER new_member"; + assert_eq!( + ms().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::AddMember { + member_name: Ident { + value: "new_member".into(), + quote_style: None + } + }, + } + ); + + let sql = "ALTER ROLE role_name DROP MEMBER old_member"; + assert_eq!( + ms().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::DropMember { + member_name: Ident { + value: "old_member".into(), + quote_style: None + } + }, + } + ); +} + #[test] fn parse_delimited_identifiers() { // check that quoted identifiers in any position remain quoted after serialization diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index f6bb80d7..09196db4 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2447,6 +2447,199 @@ fn parse_create_role() { } } +#[test] +fn parse_alter_role() { + let sql = "ALTER ROLE old_name RENAME TO new_name"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "old_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::RenameRole { + role_name: Ident { + value: "new_name".into(), + quote_style: None + } + }, + } + ); + + let sql = "ALTER ROLE role_name WITH SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 100 PASSWORD 'abcdef' VALID UNTIL '2025-01-01'"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::WithOptions { + options: vec![ + RoleOption::SuperUser(true), + RoleOption::CreateDB(true), + RoleOption::CreateRole(true), + RoleOption::Inherit(true), + RoleOption::Login(true), + RoleOption::Replication(true), + RoleOption::BypassRLS(true), + RoleOption::ConnectionLimit(Expr::Value(number("100"))), + RoleOption::Password({ + Password::Password(Expr::Value(Value::SingleQuotedString("abcdef".into()))) + }), + RoleOption::ValidUntil(Expr::Value(Value::SingleQuotedString( + "2025-01-01".into(), + ))) + ] + }, + } + ); + + let sql = "ALTER ROLE role_name WITH NOSUPERUSER NOCREATEDB NOCREATEROLE NOINHERIT NOLOGIN NOREPLICATION NOBYPASSRLS PASSWORD NULL"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::WithOptions { + options: vec![ + RoleOption::SuperUser(false), + RoleOption::CreateDB(false), + RoleOption::CreateRole(false), + RoleOption::Inherit(false), + RoleOption::Login(false), + RoleOption::Replication(false), + RoleOption::BypassRLS(false), + RoleOption::Password(Password::NullPassword), + ] + }, + } + ); + + let sql = "ALTER ROLE role_name SET maintenance_work_mem FROM CURRENT"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::FromCurrent, + in_database: None + }, + } + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name SET maintenance_work_mem = 100000"; + assert_eq!( + pg().parse_sql_statements(sql).unwrap(), + [Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::Value(Expr::Value(number("100000"))), + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + }] + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name SET maintenance_work_mem TO 100000"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::Value(Expr::Value(number("100000"))), + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + } + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name SET maintenance_work_mem TO DEFAULT"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::Default, + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + } + ); + + let sql = "ALTER ROLE role_name RESET ALL"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Reset { + config_name: ResetConfig::ALL, + in_database: None + }, + } + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name RESET maintenance_work_mem"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Reset { + config_name: ResetConfig::ConfigName(ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }])), + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + } + ); +} + #[test] fn parse_delimited_identifiers() { // check that quoted identifiers in any position remain quoted after serialization From 9a39afbe07b6eecd8819fe1f7d9af2b4aad3a6d7 Mon Sep 17 00:00:00 2001 From: Forbes Lindesay Date: Thu, 17 Aug 2023 16:47:11 +0100 Subject: [PATCH 034/109] feat: support more Postgres index syntax (#943) --- src/ast/mod.rs | 35 ++++++- src/keywords.rs | 2 + src/parser/mod.rs | 38 ++++++- tests/sqlparser_common.rs | 12 ++- tests/sqlparser_postgres.rs | 202 ++++++++++++++++++++++++++++++++++++ 5 files changed, 279 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index edab3c63..47419a89 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1348,13 +1348,17 @@ pub enum Statement { /// CREATE INDEX CreateIndex { /// index name - name: ObjectName, + name: Option, #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, using: Option, columns: Vec, unique: bool, + concurrently: bool, if_not_exists: bool, + include: Vec, + nulls_distinct: Option, + predicate: Option, }, /// CREATE ROLE /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) @@ -2464,20 +2468,41 @@ impl fmt::Display for Statement { using, columns, unique, + concurrently, if_not_exists, + include, + nulls_distinct, + predicate, } => { write!( f, - "CREATE {unique}INDEX {if_not_exists}{name} ON {table_name}", + "CREATE {unique}INDEX {concurrently}{if_not_exists}", unique = if *unique { "UNIQUE " } else { "" }, + concurrently = if *concurrently { "CONCURRENTLY " } else { "" }, if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - name = name, - table_name = table_name )?; + if let Some(value) = name { + write!(f, "{value} ")?; + } + write!(f, "ON {table_name}")?; if let Some(value) = using { write!(f, " USING {value} ")?; } - write!(f, "({})", display_separated(columns, ",")) + write!(f, "({})", display_separated(columns, ","))?; + if !include.is_empty() { + write!(f, " INCLUDE ({})", display_separated(include, ","))?; + } + if let Some(value) = nulls_distinct { + if *value { + write!(f, " NULLS DISTINCT")?; + } else { + write!(f, " NULLS NOT DISTINCT")?; + } + } + if let Some(predicate) = predicate { + write!(f, " WHERE {predicate}")?; + } + Ok(()) } Statement::CreateRole { names, diff --git a/src/keywords.rs b/src/keywords.rs index 5f3e4402..c73535fc 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -153,6 +153,7 @@ define_keywords!( COMMITTED, COMPRESSION, COMPUTE, + CONCURRENTLY, CONDITION, CONFLICT, CONNECT, @@ -310,6 +311,7 @@ define_keywords!( ILIKE, IMMUTABLE, IN, + INCLUDE, INCREMENT, INDEX, INDICATOR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d6f45359..94814627 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3370,9 +3370,15 @@ impl<'a> Parser<'a> { } pub fn parse_create_index(&mut self, unique: bool) -> Result { + let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let index_name = self.parse_object_name()?; - self.expect_keyword(Keyword::ON)?; + let index_name = if if_not_exists || !self.parse_keyword(Keyword::ON) { + let index_name = self.parse_object_name()?; + self.expect_keyword(Keyword::ON)?; + Some(index_name) + } else { + None + }; let table_name = self.parse_object_name()?; let using = if self.parse_keyword(Keyword::USING) { Some(self.parse_identifier()?) @@ -3382,13 +3388,41 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; self.expect_token(&Token::RParen)?; + + let include = if self.parse_keyword(Keyword::INCLUDE) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + columns + } else { + vec![] + }; + + let nulls_distinct = if self.parse_keyword(Keyword::NULLS) { + let not = self.parse_keyword(Keyword::NOT); + self.expect_keyword(Keyword::DISTINCT)?; + Some(!not) + } else { + None + }; + + let predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(Statement::CreateIndex { name: index_name, table_name, using, columns, unique, + concurrently, if_not_exists, + include, + nulls_distinct, + predicate, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ca30e951..8dfcc6e7 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5981,7 +5981,7 @@ fn parse_create_index() { ]; match verified_stmt(sql) { Statement::CreateIndex { - name, + name: Some(name), table_name, columns, unique, @@ -6015,19 +6015,25 @@ fn test_create_index_with_using_function() { ]; match verified_stmt(sql) { Statement::CreateIndex { - name, + name: Some(name), table_name, using, columns, unique, + concurrently, if_not_exists, + include, + nulls_distinct: None, + predicate: None, } => { assert_eq!("idx_name", name.to_string()); assert_eq!("test", table_name.to_string()); assert_eq!("btree", using.unwrap().to_string()); assert_eq!(indexed_columns, columns); assert!(unique); - assert!(if_not_exists) + assert!(!concurrently); + assert!(if_not_exists); + assert!(include.is_empty()); } _ => unreachable!(), } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 09196db4..a62c41e4 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1761,6 +1761,208 @@ fn parse_array_index_expr() { ); } +#[test] +fn parse_create_index() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + nulls_distinct: None, + include, + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_anonymous_index() { + let sql = "CREATE INDEX ON my_table(col1,col2)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name, + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: None, + } => { + assert_eq!(None, name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(!if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_concurrently() { + let sql = "CREATE INDEX CONCURRENTLY IF NOT EXISTS my_index ON my_table(col1,col2)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_with_predicate() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) WHERE col3 IS NULL"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: Some(_), + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_with_include() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) INCLUDE (col3)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert_eq_vec(&["col3"], &include); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_with_nulls_distinct() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS NOT DISTINCT"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: Some(nulls_distinct), + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + assert!(!nulls_distinct); + } + _ => unreachable!(), + } + + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS DISTINCT"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: Some(nulls_distinct), + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + assert!(nulls_distinct); + } + _ => unreachable!(), + } +} + #[test] fn parse_array_subquery_expr() { let sql = "SELECT ARRAY(SELECT 1 UNION SELECT 2)"; From 41e47cc0136c705a3335b1504d50ae8b22711b86 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 21 Aug 2023 19:21:45 +0200 Subject: [PATCH 035/109] add a test for mssql table name in square brackets (#952) --- tests/sqlparser_mssql.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 56fbd576..0bb2ba3d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -324,6 +324,26 @@ fn parse_delimited_identifiers() { //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } +#[test] +fn parse_table_name_in_square_brackets() { + let select = ms().verified_only_select(r#"SELECT [a column] FROM [a schema].[a table]"#); + if let TableFactor::Table { name, .. } = only(select.from).relation { + assert_eq!( + vec![ + Ident::with_quote('[', "a schema"), + Ident::with_quote('[', "a table") + ], + name.0 + ); + } else { + panic!("Expecting TableFactor::Table"); + } + assert_eq!( + &Expr::Identifier(Ident::with_quote('[', "a column")), + expr_from_projection(&select.projection[0]), + ); +} + #[test] fn parse_like() { fn chk(negated: bool) { From 9500649c3519f638ba54c407a4a3ed834606abe0 Mon Sep 17 00:00:00 2001 From: ehoeve Date: Mon, 21 Aug 2023 22:25:32 +0200 Subject: [PATCH 036/109] Add support for MySQL auto_increment offset (#950) --- src/ast/helpers/stmt_create_table.rs | 10 ++++++++++ src/ast/mod.rs | 5 +++++ src/parser/mod.rs | 12 ++++++++++++ tests/sqlparser_mysql.rs | 25 +++++++++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index a00555fd..17327e7f 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -66,6 +66,7 @@ pub struct CreateTableBuilder { pub clone: Option, pub engine: Option, pub comment: Option, + pub auto_increment_offset: Option, pub default_charset: Option, pub collation: Option, pub on_commit: Option, @@ -98,6 +99,7 @@ impl CreateTableBuilder { clone: None, engine: None, comment: None, + auto_increment_offset: None, default_charset: None, collation: None, on_commit: None, @@ -204,6 +206,11 @@ impl CreateTableBuilder { self } + pub fn auto_increment_offset(mut self, offset: Option) -> Self { + self.auto_increment_offset = offset; + self + } + pub fn default_charset(mut self, default_charset: Option) -> Self { self.default_charset = default_charset; self @@ -257,6 +264,7 @@ impl CreateTableBuilder { clone: self.clone, engine: self.engine, comment: self.comment, + auto_increment_offset: self.auto_increment_offset, default_charset: self.default_charset, collation: self.collation, on_commit: self.on_commit, @@ -296,6 +304,7 @@ impl TryFrom for CreateTableBuilder { clone, engine, comment, + auto_increment_offset, default_charset, collation, on_commit, @@ -324,6 +333,7 @@ impl TryFrom for CreateTableBuilder { clone, engine, comment, + auto_increment_offset, default_charset, collation, on_commit, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 47419a89..50c11ba5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1322,6 +1322,7 @@ pub enum Statement { clone: Option, engine: Option, comment: Option, + auto_increment_offset: Option, default_charset: Option, collation: Option, on_commit: Option, @@ -2263,6 +2264,7 @@ impl fmt::Display for Statement { default_charset, engine, comment, + auto_increment_offset, collation, on_commit, on_cluster, @@ -2417,6 +2419,9 @@ impl fmt::Display for Statement { if let Some(comment) = comment { write!(f, " COMMENT '{comment}'")?; } + if let Some(auto_increment_offset) = auto_increment_offset { + write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; + } if let Some(order_by) = order_by { write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 94814627..1b11237f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3550,6 +3550,17 @@ impl<'a> Parser<'a> { None }; + let auto_increment_offset = if self.parse_keyword(Keyword::AUTO_INCREMENT) { + let _ = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::Number(s, _) => Some(s.parse::().expect("literal int")), + _ => self.expected("literal int", next_token)?, + } + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { let columns = if self.peek_token() != Token::RParen { @@ -3631,6 +3642,7 @@ impl<'a> Parser<'a> { .clone_clause(clone) .engine(engine) .comment(comment) + .auto_increment_offset(auto_increment_offset) .order_by(order_by) .default_charset(default_charset) .collation(collation) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c0a51eda..209143b1 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -312,6 +312,31 @@ fn parse_create_table_comment() { } } +#[test] +fn parse_create_table_auto_increment_offset() { + let canonical = + "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT 123"; + let with_equal = + "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT=123"; + + for sql in [canonical, with_equal] { + match mysql().one_statement_parses_to(sql, canonical) { + Statement::CreateTable { + name, + auto_increment_offset, + .. + } => { + assert_eq!(name.to_string(), "foo"); + assert_eq!( + auto_increment_offset.expect("Should exist").to_string(), + "123" + ); + } + _ => unreachable!(), + } + } +} + #[test] fn parse_create_table_set_enum() { let sql = "CREATE TABLE foo (bar SET('a', 'b'), baz ENUM('a', 'b'))"; From 1ea88585759f5610a8665bfc280400dce9e8be3c Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Tue, 22 Aug 2023 12:06:32 +0200 Subject: [PATCH 037/109] Table time travel clause support, add `visit_table_factor` to Visitor (#951) --- src/ast/mod.rs | 3 +- src/ast/query.rs | 24 ++++++++++ src/ast/visitor.rs | 82 +++++++++++++++++++++++++++++++---- src/parser/mod.rs | 18 ++++++++ src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 26 +++++++++++ tests/sqlparser_clickhouse.rs | 3 ++ tests/sqlparser_common.rs | 29 +++++++++++++ tests/sqlparser_duckdb.rs | 4 ++ tests/sqlparser_hive.rs | 2 + tests/sqlparser_mssql.rs | 30 ++++++++++++- tests/sqlparser_mysql.rs | 7 ++- tests/sqlparser_postgres.rs | 2 + tests/sqlparser_redshift.rs | 4 ++ tests/sqlparser_snowflake.rs | 2 + 15 files changed, 225 insertions(+), 12 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 50c11ba5..a241f950 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,7 +40,8 @@ pub use self::query::{ JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, - TableAlias, TableFactor, TableWithJoins, Top, Values, WildcardAdditionalOptions, With, + TableAlias, TableFactor, TableVersion, TableWithJoins, Top, Values, WildcardAdditionalOptions, + With, }; pub use self::value::{ escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, diff --git a/src/ast/query.rs b/src/ast/query.rs index 5f4c289d..b7001765 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -646,6 +646,7 @@ impl fmt::Display for TableWithJoins { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] pub enum TableFactor { Table { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] @@ -661,6 +662,9 @@ pub enum TableFactor { args: Option>, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, + /// Optional version qualifier to facilitate table time-travel, as + /// supported by BigQuery and MSSQL. + version: Option, }, Derived { lateral: bool, @@ -720,6 +724,7 @@ impl fmt::Display for TableFactor { alias, args, with_hints, + version, } => { write!(f, "{name}")?; if let Some(args) = args { @@ -731,6 +736,9 @@ impl fmt::Display for TableFactor { if !with_hints.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_hints))?; } + if let Some(version) = version { + write!(f, "{version}")?; + } Ok(()) } TableFactor::Derived { @@ -835,6 +843,22 @@ impl fmt::Display for TableAlias { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableVersion { + ForSystemTimeAsOf(Expr), +} + +impl Display for TableVersion { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TableVersion::ForSystemTimeAsOf(e) => write!(f, " FOR SYSTEM_TIME AS OF {e}")?, + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 8aa038db..bb7c1967 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -12,7 +12,7 @@ //! Recursive visitors for ast Nodes. See [`Visitor`] for more details. -use crate::ast::{Expr, ObjectName, Statement}; +use crate::ast::{Expr, ObjectName, Statement, TableFactor}; use core::ops::ControlFlow; /// A type that can be visited by a [`Visitor`]. See [`Visitor`] for @@ -115,8 +115,8 @@ visit_noop!(bigdecimal::BigDecimal); /// A visitor that can be used to walk an AST tree. /// -/// `previst_` methods are invoked before visiting all children of the -/// node and `postvisit_` methods are invoked after visiting all +/// `pre_visit_` methods are invoked before visiting all children of the +/// node and `post_visit_` methods are invoked after visiting all /// children of the node. /// /// # See also @@ -139,7 +139,7 @@ visit_noop!(bigdecimal::BigDecimal); /// } /// /// // Visit relations and exprs before children are visited (depth first walk) -/// // Note you can also visit statements and visit exprs after children have been visitoed +/// // Note you can also visit statements and visit exprs after children have been visited /// impl Visitor for V { /// type Break = (); /// @@ -189,6 +189,16 @@ pub trait Visitor { ControlFlow::Continue(()) } + /// Invoked for any table factors that appear in the AST before visiting children + fn pre_visit_table_factor(&mut self, _table_factor: &TableFactor) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any table factors that appear in the AST after visiting children + fn post_visit_table_factor(&mut self, _table_factor: &TableFactor) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any expressions that appear in the AST before visiting children fn pre_visit_expr(&mut self, _expr: &Expr) -> ControlFlow { ControlFlow::Continue(()) @@ -212,8 +222,8 @@ pub trait Visitor { /// A visitor that can be used to mutate an AST tree. /// -/// `previst_` methods are invoked before visiting all children of the -/// node and `postvisit_` methods are invoked after visiting all +/// `pre_visit_` methods are invoked before visiting all children of the +/// node and `post_visit_` methods are invoked after visiting all /// children of the node. /// /// # See also @@ -267,6 +277,22 @@ pub trait VisitorMut { ControlFlow::Continue(()) } + /// Invoked for any table factors that appear in the AST before visiting children + fn pre_visit_table_factor( + &mut self, + _table_factor: &mut TableFactor, + ) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any table factors that appear in the AST after visiting children + fn post_visit_table_factor( + &mut self, + _table_factor: &mut TableFactor, + ) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any expressions that appear in the AST before visiting children fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow { ControlFlow::Continue(()) @@ -609,6 +635,24 @@ mod tests { ControlFlow::Continue(()) } + fn pre_visit_table_factor( + &mut self, + table_factor: &TableFactor, + ) -> ControlFlow { + self.visited + .push(format!("PRE: TABLE FACTOR: {table_factor}")); + ControlFlow::Continue(()) + } + + fn post_visit_table_factor( + &mut self, + table_factor: &TableFactor, + ) -> ControlFlow { + self.visited + .push(format!("POST: TABLE FACTOR: {table_factor}")); + ControlFlow::Continue(()) + } + fn pre_visit_expr(&mut self, expr: &Expr) -> ControlFlow { self.visited.push(format!("PRE: EXPR: {expr}")); ControlFlow::Continue(()) @@ -647,22 +691,28 @@ mod tests { fn test_sql() { let tests = vec![ ( - "SELECT * from table_name", + "SELECT * from table_name as my_table", vec![ - "PRE: STATEMENT: SELECT * FROM table_name", + "PRE: STATEMENT: SELECT * FROM table_name AS my_table", + "PRE: TABLE FACTOR: table_name AS my_table", "PRE: RELATION: table_name", "POST: RELATION: table_name", - "POST: STATEMENT: SELECT * FROM table_name", + "POST: TABLE FACTOR: table_name AS my_table", + "POST: STATEMENT: SELECT * FROM table_name AS my_table", ], ), ( "SELECT * from t1 join t2 on t1.id = t2.t1_id", vec![ "PRE: STATEMENT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "PRE: EXPR: t1.id = t2.t1_id", "PRE: EXPR: t1.id", "POST: EXPR: t1.id", @@ -676,13 +726,17 @@ mod tests { "SELECT * from t1 where EXISTS(SELECT column from t2)", vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: EXPR: column", "POST: EXPR: column", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -691,13 +745,17 @@ mod tests { "SELECT * from t1 where EXISTS(SELECT column from t2)", vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: EXPR: column", "POST: EXPR: column", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -706,16 +764,22 @@ mod tests { "SELECT * from t1 where EXISTS(SELECT column from t2) UNION SELECT * from t3", vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: EXPR: column", "POST: EXPR: column", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "PRE: TABLE FACTOR: t3", "PRE: RELATION: t3", "POST: RELATION: t3", + "POST: TABLE FACTOR: t3", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", ], ), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1b11237f..c2a33b42 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6210,6 +6210,9 @@ impl<'a> Parser<'a> { } else { let name = self.parse_object_name()?; + // Parse potential version qualifier + let version = self.parse_table_version()?; + // Postgres, MSSQL: table-valued functions: let args = if self.consume_token(&Token::LParen) { Some(self.parse_optional_args()?) @@ -6240,10 +6243,25 @@ impl<'a> Parser<'a> { alias, args, with_hints, + version, }) } } + /// Parse a given table version specifier. + /// + /// For now it only supports timestamp versioning for BigQuery and MSSQL dialects. + pub fn parse_table_version(&mut self) -> Result, ParserError> { + if dialect_of!(self is BigQueryDialect | MsSqlDialect) + && self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) + { + let expr = self.parse_expr()?; + Ok(Some(TableVersion::ForSystemTimeAsOf(expr))) + } else { + Ok(None) + } + } + pub fn parse_derived_table_factor( &mut self, lateral: IsLateral, diff --git a/src/test_utils.rs b/src/test_utils.rs index 0ec59509..91130fb5 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -221,6 +221,7 @@ pub fn table(name: impl Into) -> TableFactor { alias: None, args: None, with_hints: vec![], + version: None, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index bbe1a6e9..ca711b26 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -95,6 +95,7 @@ fn parse_table_identifiers() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] },] @@ -143,6 +144,31 @@ fn parse_table_identifiers() { test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]); } +#[test] +fn parse_table_time_travel() { + let version = "2023-08-18 23:08:18".to_string(); + let sql = format!("SELECT 1 FROM t1 FOR SYSTEM_TIME AS OF '{version}'"); + let select = bigquery().verified_only_select(&sql); + assert_eq!( + select.from, + vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("t1")]), + alias: None, + args: None, + with_hints: vec![], + version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( + Value::SingleQuotedString(version) + ))), + }, + joins: vec![] + },] + ); + + let sql = "SELECT 1 FROM t1 FOR SYSTEM TIME AS OF 'some_timestamp'".to_string(); + assert!(bigquery().parse_sql_statements(&sql).is_err()); +} + #[test] fn parse_join_constraint_unnest_alias() { assert_eq!( diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 24c64156..77b936d5 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -62,6 +62,7 @@ fn parse_map_access_expr() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] }], @@ -169,11 +170,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8dfcc6e7..96dac3da 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -214,6 +214,7 @@ fn parse_update_set_from() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -240,6 +241,7 @@ fn parse_update_set_from() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -303,6 +305,7 @@ fn parse_update_with_table_alias() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -365,6 +368,7 @@ fn parse_select_with_table_alias() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![], }] @@ -395,6 +399,7 @@ fn parse_delete_statement() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation ); @@ -422,6 +427,7 @@ fn parse_delete_statement_for_multi_tables() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation ); @@ -431,6 +437,7 @@ fn parse_delete_statement_for_multi_tables() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].joins[0].relation ); @@ -454,6 +461,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation ); @@ -463,6 +471,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, from[1].relation ); @@ -472,6 +481,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, using[0].relation ); @@ -481,6 +491,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, using[0].joins[0].relation ); @@ -508,6 +519,7 @@ fn parse_where_delete_statement() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation, ); @@ -549,6 +561,7 @@ fn parse_where_delete_with_alias_statement() { }), args: None, with_hints: vec![], + version: None, }, from[0].relation, ); @@ -562,6 +575,7 @@ fn parse_where_delete_with_alias_statement() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![], }]), @@ -3564,6 +3578,7 @@ fn test_parse_named_window() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -3902,6 +3917,7 @@ fn parse_interval_and_or_xor() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -4506,6 +4522,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -4515,6 +4532,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -4532,6 +4550,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -4539,6 +4558,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4549,6 +4569,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -4556,6 +4577,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4576,6 +4598,7 @@ fn parse_cross_join() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::CrossJoin, }, @@ -4596,6 +4619,7 @@ fn parse_joins_on() { alias, args: None, with_hints: vec![], + version: None, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -4665,6 +4689,7 @@ fn parse_joins_using() { alias, args: None, with_hints: vec![], + version: None, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -4726,6 +4751,7 @@ fn parse_natural_join() { alias, args: None, with_hints: vec![], + version: None, }, join_operator: f(JoinConstraint::Natural), } @@ -4990,6 +5016,7 @@ fn parse_derived_tables() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6317,6 +6344,7 @@ fn parse_merge() { }), args: None, with_hints: vec![], + version: None, } ); assert_eq!(table, table_no_into); @@ -6340,6 +6368,7 @@ fn parse_merge() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 83b1e537..3587e8d9 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -155,6 +155,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -187,6 +188,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -228,6 +230,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -260,6 +263,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index ddc5a8cc..8cdfe924 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -322,11 +322,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 0bb2ba3d..c4e0f327 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -42,6 +42,31 @@ fn parse_mssql_identifiers() { }; } +#[test] +fn parse_table_time_travel() { + let version = "2023-08-18 23:08:18".to_string(); + let sql = format!("SELECT 1 FROM t1 FOR SYSTEM_TIME AS OF '{version}'"); + let select = ms().verified_only_select(&sql); + assert_eq!( + select.from, + vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("t1")]), + alias: None, + args: None, + with_hints: vec![], + version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( + Value::SingleQuotedString(version) + ))), + }, + joins: vec![] + },] + ); + + let sql = "SELECT 1 FROM t1 FOR SYSTEM TIME AS OF 'some_timestamp'".to_string(); + assert!(ms().parse_sql_statements(&sql).is_err()); +} + #[test] fn parse_mssql_single_quoted_aliases() { let _ = ms_and_generic().one_statement_parses_to("SELECT foo 'alias'", "SELECT foo AS 'alias'"); @@ -283,11 +308,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } @@ -485,7 +512,8 @@ fn parse_substring_in_select() { }]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + version: None, }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 209143b1..3a0177df 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1093,6 +1093,7 @@ fn parse_select_with_numeric_prefix_column_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] }], @@ -1141,6 +1142,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] }], @@ -1200,6 +1202,7 @@ fn parse_update_with_joins() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -1210,6 +1213,7 @@ fn parse_update_with_joins() { }), args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -1324,7 +1328,8 @@ fn parse_substring_in_select() { }]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + version: None, }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index a62c41e4..b3621a34 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2855,11 +2855,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index c44f6dee..9f5f62f7 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -45,6 +45,7 @@ fn test_square_brackets_over_db_schema_table_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], } @@ -89,6 +90,7 @@ fn test_double_quotes_over_db_schema_table_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], } @@ -108,11 +110,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 43ebb8b1..20084989 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -223,11 +223,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } From a2533c20fe5cd1c0060f64190f6ef19fe893fdac Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Aug 2023 08:23:43 -0400 Subject: [PATCH 038/109] Changelog for version 0.37.0 (#953) --- CHANGELOG.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8669f058..4c6c9dcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,25 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.37.0] 2023-08-22 + +### Added +* Support `FOR SYSTEM_TIME AS OF` table time travel clause support, `visit_table_factor` to Visitor (#951) - Thanks @gruuya +* Support MySQL `auto_increment` offset in table definition (#950) - Thanks @ehoeve +* Test for mssql table name in square brackets (#952) - Thanks @lovasoa +* Support additional Postgres `CREATE INDEX` syntax (#943) - Thanks @ForbesLindesay +* Support `ALTER ROLE` syntax of PostgreSQL and MS SQL Server (#942) - Thanks @r4ntix +* Support table-level comments (#946) - Thanks @ehoeve +* Support `DROP TEMPORARY TABLE`, MySQL syntax (#916) - Thanks @liadgiladi +* Support posgres type alias (#933) - Thanks @Kikkon + +### Fixed +* Clarify the value of the special flag (#948) - Thanks @alamb +* Fix `SUBSTRING` from/to argument construction for mssql (#947) - Thanks @jmaness +* Fix: use Rust idiomatic capitalization for newly added DataType enums (#939) - Thanks @Kikkon +* Fix `BEGIN TRANSACTION` being serialized as `START TRANSACTION` (#935) - Thanks @lovasoa +* Fix parsing of datetime functions without parenthesis (#930) - Thanks @lovasoa + ## [0.36.1] 2023-07-19 ### Fixed From b8a58bbc1186503d523726f8f18f06b6e30a3313 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Aug 2023 08:28:03 -0400 Subject: [PATCH 039/109] chore: Release sqlparser version 0.37.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 777e9f4a..43d9f981 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.36.1" +version = "0.37.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 9c2e8bcdbc437e715031540981ff971ef715afdd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Aug 2023 08:32:14 -0400 Subject: [PATCH 040/109] Break test and coverage test into separate jobs (#949) --- .github/workflows/rust.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 836d5561..64c4d114 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -75,11 +75,25 @@ jobs: uses: actions/checkout@v2 - name: Test run: cargo test --all-features + + test-coverage: + runs-on: ubuntu-latest + steps: + - name: Setup Rust + uses: hecrj/setup-rust-action@v1 + with: + rust-version: stable + - name: Install Tarpaulin + uses: actions-rs/install@v0.1 + with: + crate: cargo-tarpaulin + version: 0.14.2 + use-tool-cache: true + - name: Checkout + uses: actions/checkout@v2 - name: Coverage - if: matrix.rust == 'stable' run: cargo tarpaulin -o Lcov --output-dir ./coverage - name: Coveralls - if: matrix.rust == 'stable' uses: coverallsapp/github-action@master with: github-token: ${{ secrets.GITHUB_TOKEN }} From 4a2fa66b553cd2e4e6ff9e837e0f04e1e7687c04 Mon Sep 17 00:00:00 2001 From: David Dolphin <445312+ddol@users.noreply.github.com> Date: Fri, 25 Aug 2023 09:06:25 -0700 Subject: [PATCH 041/109] [cli] add --sqlite param (#956) --- examples/cli.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/cli.rs b/examples/cli.rs index 8af6246a..72f963b1 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -47,6 +47,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--redshift" => Box::new(RedshiftSqlDialect {}), "--clickhouse" => Box::new(ClickHouseDialect {}), "--duckdb" => Box::new(DuckDbDialect {}), + "--sqlite" => Box::new(SQLiteDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {s}"), }; From 14da37d1822ab6b7e96304cec9e409fb96ffdf09 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Aug 2023 12:21:31 -0400 Subject: [PATCH 042/109] Fix Rust 1.72 clippy lints (#957) --- src/parser/mod.rs | 2 +- tests/sqlparser_bigquery.rs | 6 +++--- tests/sqlparser_common.rs | 4 ++-- tests/sqlparser_mysql.rs | 22 +++++++++++----------- tests/sqlparser_postgres.rs | 7 +++---- tests/sqlparser_snowflake.rs | 2 +- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c2a33b42..84621524 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5881,7 +5881,7 @@ impl<'a> Parser<'a> { Some(_) => { let db_name = vec![self.parse_identifier()?]; let ObjectName(table_name) = object_name; - let object_name = db_name.into_iter().chain(table_name.into_iter()).collect(); + let object_name = db_name.into_iter().chain(table_name).collect(); ObjectName(object_name) } None => object_name, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ca711b26..d6a28fd0 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -55,7 +55,7 @@ fn parse_raw_literal() { let sql = r#"SELECT R'abc', R"abc", R'f\(abc,(.*),def\)', R"f\(abc,(.*),def\)""#; let stmt = bigquery().one_statement_parses_to( sql, - r#"SELECT R'abc', R'abc', R'f\(abc,(.*),def\)', R'f\(abc,(.*),def\)'"#, + r"SELECT R'abc', R'abc', R'f\(abc,(.*),def\)', R'f\(abc,(.*),def\)'", ); if let Statement::Query(query) = stmt { if let SetExpr::Select(select) = *query.body { @@ -69,11 +69,11 @@ fn parse_raw_literal() { expr_from_projection(&select.projection[1]) ); assert_eq!( - &Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())), + &Expr::Value(Value::RawStringLiteral(r"f\(abc,(.*),def\)".to_string())), expr_from_projection(&select.projection[2]) ); assert_eq!( - &Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())), + &Expr::Value(Value::RawStringLiteral(r"f\(abc,(.*),def\)".to_string())), expr_from_projection(&select.projection[3]) ); return; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 96dac3da..3fdf3d21 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7212,7 +7212,7 @@ fn parse_with_recursion_limit() { .expect("tokenize to work") .parse_statements(); - assert!(matches!(res, Ok(_)), "{res:?}"); + assert!(res.is_ok(), "{res:?}"); // limit recursion to something smaller, expect parsing to fail let res = Parser::new(&dialect) @@ -7230,7 +7230,7 @@ fn parse_with_recursion_limit() { .with_recursion_limit(50) .parse_statements(); - assert!(matches!(res, Ok(_)), "{res:?}"); + assert!(res.is_ok(), "{res:?}"); } #[test] diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3a0177df..ab7997cd 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -634,11 +634,11 @@ fn parse_escaped_backticks_with_no_escape() { #[test] fn parse_unterminated_escape() { - let sql = r#"SELECT 'I\'m not fine\'"#; + let sql = r"SELECT 'I\'m not fine\'"; let result = std::panic::catch_unwind(|| mysql().one_statement_parses_to(sql, "")); assert!(result.is_err()); - let sql = r#"SELECT 'I\\'m not fine'"#; + let sql = r"SELECT 'I\\'m not fine'"; let result = std::panic::catch_unwind(|| mysql().one_statement_parses_to(sql, "")); assert!(result.is_err()); } @@ -666,7 +666,7 @@ fn parse_escaped_string_with_escape() { _ => unreachable!(), }; } - let sql = r#"SELECT 'I\'m fine'"#; + let sql = r"SELECT 'I\'m fine'"; assert_mysql_query_value(sql, "I'm fine"); let sql = r#"SELECT 'I''m fine'"#; @@ -675,7 +675,7 @@ fn parse_escaped_string_with_escape() { let sql = r#"SELECT 'I\"m fine'"#; assert_mysql_query_value(sql, "I\"m fine"); - let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#; + let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"; assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a "); } @@ -702,8 +702,8 @@ fn parse_escaped_string_with_no_escape() { _ => unreachable!(), }; } - let sql = r#"SELECT 'I\'m fine'"#; - assert_mysql_query_value(sql, r#"I\'m fine"#); + let sql = r"SELECT 'I\'m fine'"; + assert_mysql_query_value(sql, r"I\'m fine"); let sql = r#"SELECT 'I''m fine'"#; assert_mysql_query_value(sql, r#"I''m fine"#); @@ -711,8 +711,8 @@ fn parse_escaped_string_with_no_escape() { let sql = r#"SELECT 'I\"m fine'"#; assert_mysql_query_value(sql, r#"I\"m fine"#); - let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#; - assert_mysql_query_value(sql, r#"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "#); + let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"; + assert_mysql_query_value(sql, r"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "); } #[test] @@ -723,7 +723,7 @@ fn check_roundtrip_of_escaped_string() { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), } - .verified_stmt(r#"SELECT 'I\'m fine'"#); + .verified_stmt(r"SELECT 'I\'m fine'"); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), @@ -733,12 +733,12 @@ fn check_roundtrip_of_escaped_string() { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), } - .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + .verified_stmt(r"SELECT 'I\\\'m fine'"); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), } - .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + .verified_stmt(r"SELECT 'I\\\'m fine'"); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index b3621a34..c34ba75a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2332,8 +2332,7 @@ fn pg_and_generic() -> TestedDialects { #[test] fn parse_escaped_literal_string() { - let sql = - r#"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"#; + let sql = r"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"; let select = pg_and_generic().verified_only_select(sql); assert_eq!(6, select.projection.len()); assert_eq!( @@ -2361,7 +2360,7 @@ fn parse_escaped_literal_string() { expr_from_projection(&select.projection[5]) ); - let sql = r#"SELECT E'\'"#; + let sql = r"SELECT E'\'"; assert_eq!( pg_and_generic() .parse_sql_statements(sql) @@ -2631,7 +2630,7 @@ fn parse_create_role() { err => panic!("Failed to parse CREATE ROLE test case: {err:?}"), } - let negatables = vec![ + let negatables = [ "BYPASSRLS", "CREATEDB", "CREATEROLE", diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 20084989..ecd60852 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1004,7 +1004,7 @@ fn test_copy_into_copy_options() { #[test] fn test_snowflake_stage_object_names() { - let allowed_formatted_names = vec![ + let allowed_formatted_names = [ "my_company.emp_basic", "@namespace.%table_name", "@namespace.%table_name/path", From 4c3a4ad5a87ef8b4dd24a6e8540243e14fbfa148 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Aug 2023 12:21:40 -0400 Subject: [PATCH 043/109] Update release documentation (#954) --- docs/releasing.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/releasing.md b/docs/releasing.md index b71c9759..efe0ab3d 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -45,13 +45,6 @@ $ cargo install cargo-release Note that credentials for authoring in this way are securely stored in the (GitHub) repo secrets as `CRATE_TOKEN`. - * Bump the crate version again (to something like `0.8.1-alpha.0`) to - indicate the start of new development cycle. - -3. Push the updates to the `main` branch upstream: - ``` - $ git push upstream - ``` 4. Check that the new version of the crate is available on crates.io: https://crates.io/crates/sqlparser From b02c3f87ec0b70b608ddbf6e92bf3b94ffc9d211 Mon Sep 17 00:00:00 2001 From: dawg Date: Thu, 7 Sep 2023 22:23:09 +0200 Subject: [PATCH 044/109] feat: show location info in parse errors (#958) --- src/parser/mod.rs | 101 +++++++++++++++++++++++++------------- src/test_utils.rs | 8 ++- src/tokenizer.rs | 48 +++++++++--------- tests/sqlparser_common.rs | 30 ++++++----- 4 files changed, 116 insertions(+), 71 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 84621524..6902fc56 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -44,8 +44,8 @@ pub enum ParserError { // Use `Parser::expected` instead, if possible macro_rules! parser_err { - ($MSG:expr) => { - Err(ParserError::ParserError($MSG.to_string())) + ($MSG:expr, $loc:expr) => { + Err(ParserError::ParserError(format!("{}{}", $MSG, $loc))) }; } @@ -368,8 +368,8 @@ impl<'a> Parser<'a> { debug!("Parsing sql '{}'...", sql); let tokens = Tokenizer::new(self.dialect, sql) .with_unescape(self.options.unescape) - .tokenize()?; - Ok(self.with_tokens(tokens)) + .tokenize_with_location()?; + Ok(self.with_tokens_with_locations(tokens)) } /// Parse potentially multiple statements @@ -724,6 +724,7 @@ impl<'a> Parser<'a> { // Note also that naively `SELECT date` looks like a syntax error because the `date` type // name is not followed by a string literal, but in fact in PostgreSQL it is a valid // expression that should parse as the column name "date". + let loc = self.peek_token().location; return_ok_if_some!(self.maybe_parse(|parser| { match parser.parse_data_type()? { DataType::Interval => parser.parse_interval(), @@ -734,7 +735,7 @@ impl<'a> Parser<'a> { // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the // `type 'string'` syntax for the custom data types at all. - DataType::Custom(..) => parser_err!("dummy"), + DataType::Custom(..) => parser_err!("dummy", loc), data_type => Ok(Expr::TypedString { data_type, value: parser.parse_literal_string()?, @@ -909,7 +910,12 @@ impl<'a> Parser<'a> { let tok = self.next_token(); let key = match tok.token { Token::Word(word) => word.to_ident(), - _ => return parser_err!(format!("Expected identifier, found: {tok}")), + _ => { + return parser_err!( + format!("Expected identifier, found: {tok}"), + tok.location + ) + } }; Ok(Expr::CompositeAccess { expr: Box::new(expr), @@ -1220,7 +1226,10 @@ impl<'a> Parser<'a> { r#in: Box::new(from), }) } else { - parser_err!("Position function must include IN keyword".to_string()) + parser_err!( + "Position function must include IN keyword".to_string(), + self.peek_token().location + ) } } @@ -1884,7 +1893,10 @@ impl<'a> Parser<'a> { } } // Can only happen if `get_next_precedence` got out of sync with this function - _ => parser_err!(format!("No infix parser for token {:?}", tok.token)), + _ => parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.location + ), } } else if Token::DoubleColon == tok { self.parse_pg_cast(expr) @@ -1935,7 +1947,10 @@ impl<'a> Parser<'a> { }) } else { // Can only happen if `get_next_precedence` got out of sync with this function - parser_err!(format!("No infix parser for token {:?}", tok.token)) + parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.location + ) } } @@ -2213,7 +2228,10 @@ impl<'a> Parser<'a> { /// Report unexpected token pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { - parser_err!(format!("Expected {expected}, found: {found}")) + parser_err!( + format!("Expected {expected}, found: {found}"), + found.location + ) } /// Look for an expected keyword and consume it if it exists @@ -2378,13 +2396,14 @@ impl<'a> Parser<'a> { /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { + let loc = self.peek_token().location; let all = self.parse_keyword(Keyword::ALL); let distinct = self.parse_keyword(Keyword::DISTINCT); if !distinct { return Ok(None); } if all { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string()); + return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); } let on = self.parse_keyword(Keyword::ON); if !on { @@ -2986,10 +3005,14 @@ impl<'a> Parser<'a> { let mut admin = vec![]; while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) { + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.location); match keyword { Keyword::AUTHORIZATION => { if authorization_owner.is_some() { - parser_err!("Found multiple AUTHORIZATION") + parser_err!("Found multiple AUTHORIZATION", loc) } else { authorization_owner = Some(self.parse_object_name()?); Ok(()) @@ -2997,7 +3020,7 @@ impl<'a> Parser<'a> { } Keyword::LOGIN | Keyword::NOLOGIN => { if login.is_some() { - parser_err!("Found multiple LOGIN or NOLOGIN") + parser_err!("Found multiple LOGIN or NOLOGIN", loc) } else { login = Some(keyword == Keyword::LOGIN); Ok(()) @@ -3005,7 +3028,7 @@ impl<'a> Parser<'a> { } Keyword::INHERIT | Keyword::NOINHERIT => { if inherit.is_some() { - parser_err!("Found multiple INHERIT or NOINHERIT") + parser_err!("Found multiple INHERIT or NOINHERIT", loc) } else { inherit = Some(keyword == Keyword::INHERIT); Ok(()) @@ -3013,7 +3036,7 @@ impl<'a> Parser<'a> { } Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => { if bypassrls.is_some() { - parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS") + parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc) } else { bypassrls = Some(keyword == Keyword::BYPASSRLS); Ok(()) @@ -3021,7 +3044,7 @@ impl<'a> Parser<'a> { } Keyword::CREATEDB | Keyword::NOCREATEDB => { if create_db.is_some() { - parser_err!("Found multiple CREATEDB or NOCREATEDB") + parser_err!("Found multiple CREATEDB or NOCREATEDB", loc) } else { create_db = Some(keyword == Keyword::CREATEDB); Ok(()) @@ -3029,7 +3052,7 @@ impl<'a> Parser<'a> { } Keyword::CREATEROLE | Keyword::NOCREATEROLE => { if create_role.is_some() { - parser_err!("Found multiple CREATEROLE or NOCREATEROLE") + parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc) } else { create_role = Some(keyword == Keyword::CREATEROLE); Ok(()) @@ -3037,7 +3060,7 @@ impl<'a> Parser<'a> { } Keyword::SUPERUSER | Keyword::NOSUPERUSER => { if superuser.is_some() { - parser_err!("Found multiple SUPERUSER or NOSUPERUSER") + parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc) } else { superuser = Some(keyword == Keyword::SUPERUSER); Ok(()) @@ -3045,7 +3068,7 @@ impl<'a> Parser<'a> { } Keyword::REPLICATION | Keyword::NOREPLICATION => { if replication.is_some() { - parser_err!("Found multiple REPLICATION or NOREPLICATION") + parser_err!("Found multiple REPLICATION or NOREPLICATION", loc) } else { replication = Some(keyword == Keyword::REPLICATION); Ok(()) @@ -3053,7 +3076,7 @@ impl<'a> Parser<'a> { } Keyword::PASSWORD => { if password.is_some() { - parser_err!("Found multiple PASSWORD") + parser_err!("Found multiple PASSWORD", loc) } else { password = if self.parse_keyword(Keyword::NULL) { Some(Password::NullPassword) @@ -3066,7 +3089,7 @@ impl<'a> Parser<'a> { Keyword::CONNECTION => { self.expect_keyword(Keyword::LIMIT)?; if connection_limit.is_some() { - parser_err!("Found multiple CONNECTION LIMIT") + parser_err!("Found multiple CONNECTION LIMIT", loc) } else { connection_limit = Some(Expr::Value(self.parse_number_value()?)); Ok(()) @@ -3075,7 +3098,7 @@ impl<'a> Parser<'a> { Keyword::VALID => { self.expect_keyword(Keyword::UNTIL)?; if valid_until.is_some() { - parser_err!("Found multiple VALID UNTIL") + parser_err!("Found multiple VALID UNTIL", loc) } else { valid_until = Some(Expr::Value(self.parse_value()?)); Ok(()) @@ -3084,14 +3107,14 @@ impl<'a> Parser<'a> { Keyword::IN => { if self.parse_keyword(Keyword::ROLE) { if !in_role.is_empty() { - parser_err!("Found multiple IN ROLE") + parser_err!("Found multiple IN ROLE", loc) } else { in_role = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) } } else if self.parse_keyword(Keyword::GROUP) { if !in_group.is_empty() { - parser_err!("Found multiple IN GROUP") + parser_err!("Found multiple IN GROUP", loc) } else { in_group = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3102,7 +3125,7 @@ impl<'a> Parser<'a> { } Keyword::ROLE => { if !role.is_empty() { - parser_err!("Found multiple ROLE") + parser_err!("Found multiple ROLE", loc) } else { role = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3110,7 +3133,7 @@ impl<'a> Parser<'a> { } Keyword::USER => { if !user.is_empty() { - parser_err!("Found multiple USER") + parser_err!("Found multiple USER", loc) } else { user = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3118,7 +3141,7 @@ impl<'a> Parser<'a> { } Keyword::ADMIN => { if !admin.is_empty() { - parser_err!("Found multiple ADMIN") + parser_err!("Found multiple ADMIN", loc) } else { admin = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3181,14 +3204,19 @@ impl<'a> Parser<'a> { // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(Parser::parse_object_name)?; + + let loc = self.peek_token().location; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); + return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc); } if object_type == ObjectType::Role && (cascade || restrict || purge) { - return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE"); + return parser_err!( + "Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", + loc + ); } Ok(Statement::Drop { object_type, @@ -4446,7 +4474,11 @@ impl<'a> Parser<'a> { fn parse_literal_char(&mut self) -> Result { let s = self.parse_literal_string()?; if s.len() != 1 { - return parser_err!(format!("Expect a char, found {s:?}")); + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.location); + return parser_err!(format!("Expect a char, found {s:?}"), loc); } Ok(s.chars().next().unwrap()) } @@ -4525,7 +4557,7 @@ impl<'a> Parser<'a> { // (i.e., it returns the input string). Token::Number(ref n, l) => match n.parse() { Ok(n) => Ok(Value::Number(n, l)), - Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")), + Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), @@ -6465,10 +6497,11 @@ impl<'a> Parser<'a> { .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) .then(|| self.parse_identifier().unwrap()); + let loc = self.peek_token().location; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE"); + return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc); } Ok(Statement::Revoke { @@ -7929,14 +7962,12 @@ mod tests { #[test] fn test_parser_error_loc() { - // TODO: Once we thread token locations through the parser, we should update this - // test to assert the locations of the referenced token let sql = "SELECT this is a syntax error"; let ast = Parser::parse_sql(&GenericDialect, sql); assert_eq!( ast, Err(ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a" + "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16" .to_string() )) ); diff --git a/src/test_utils.rs b/src/test_utils.rs index 91130fb5..a5e9e739 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -28,6 +28,7 @@ use core::fmt::Debug; use crate::dialect::*; use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Tokenizer; use crate::{ast::*, parser::ParserOptions}; /// Tests use the methods on this struct to invoke the parser on one or @@ -82,8 +83,13 @@ impl TestedDialects { /// the result is the same for all tested dialects. pub fn parse_sql_statements(&self, sql: &str) -> Result, ParserError> { self.one_of_identical_results(|dialect| { + let mut tokenizer = Tokenizer::new(dialect, sql); + if let Some(options) = &self.options { + tokenizer = tokenizer.with_unescape(options.unescape); + } + let tokens = tokenizer.tokenize()?; self.new_parser(dialect) - .try_with_sql(sql)? + .with_tokens(tokens) .parse_statements() }) // To fail the `ensure_multiple_dialects_are_tested` test: diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f20e01b7..1b1b1e96 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -350,7 +350,7 @@ impl fmt::Display for Whitespace { } /// Location in input string -#[derive(Debug, Eq, PartialEq, Clone)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] pub struct Location { /// Line number, starting from 1 pub line: u64, @@ -358,6 +358,20 @@ pub struct Location { pub column: u64, } +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.line == 0 { + return Ok(()); + } + write!( + f, + // TODO: use standard compiler location syntax (::) + " at Line: {}, Column {}", + self.line, self.column, + ) + } +} + /// A [Token] with [Location] attached to it #[derive(Debug, Eq, PartialEq, Clone)] pub struct TokenWithLocation { @@ -400,17 +414,12 @@ impl fmt::Display for TokenWithLocation { #[derive(Debug, PartialEq, Eq)] pub struct TokenizerError { pub message: String, - pub line: u64, - pub col: u64, + pub location: Location, } impl fmt::Display for TokenizerError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} at Line: {}, Column {}", - self.message, self.line, self.col - ) + write!(f, "{}{}", self.message, self.location,) } } @@ -546,10 +555,7 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = self.next_token(&mut state)? { - tokens.push(TokenWithLocation { - token, - location: location.clone(), - }); + tokens.push(TokenWithLocation { token, location }); location = state.location(); } @@ -1122,8 +1128,7 @@ impl<'a> Tokenizer<'a> { ) -> Result { Err(TokenizerError { message: message.into(), - col: loc.column, - line: loc.line, + location: loc, }) } @@ -1368,8 +1373,7 @@ mod tests { fn tokenizer_error_impl() { let err = TokenizerError { message: "test".into(), - line: 1, - col: 1, + location: Location { line: 1, column: 1 }, }; #[cfg(feature = "std")] { @@ -1694,8 +1698,7 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Unterminated string literal".to_string(), - line: 1, - col: 8 + location: Location { line: 1, column: 8 }, }) ); } @@ -1710,8 +1713,10 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Unterminated string literal".to_string(), - line: 1, - col: 35 + location: Location { + line: 1, + column: 35 + } }) ); } @@ -1873,8 +1878,7 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Expected close delimiter '\"' before EOF.".to_string(), - line: 1, - col: 1 + location: Location { line: 1, column: 1 }, }) ); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3fdf3d21..45074336 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -836,7 +836,12 @@ fn test_eof_after_as() { #[test] fn test_no_infix_error() { - let res = Parser::parse_sql(&ClickHouseDialect {}, "ASSERT-URA<<"); + let dialects = TestedDialects { + dialects: vec![Box::new(ClickHouseDialect {})], + options: None, + }; + + let res = dialects.parse_sql_statements("ASSERT-URA<<"); assert_eq!( ParserError::ParserError("No infix parser for token ShiftLeft".to_string()), res.unwrap_err() @@ -3238,19 +3243,21 @@ fn parse_alter_table_alter_column_type() { _ => unreachable!(), } - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT"), - ); + let dialect = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + + let res = + dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); assert_eq!( ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), res.unwrap_err() ); - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'"), - ); + let res = dialect.parse_sql_statements(&format!( + "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" + )); assert_eq!( ParserError::ParserError("Expected end of statement, found: USING".to_string()), res.unwrap_err() @@ -3295,10 +3302,7 @@ fn parse_alter_table_drop_constraint() { _ => unreachable!(), } - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} DROP CONSTRAINT is_active TEXT"), - ); + let res = parse_sql_statements(&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT")); assert_eq!( ParserError::ParserError("Expected end of statement, found: TEXT".to_string()), res.unwrap_err() From e0afd4b179bd5209be6140c0aa02ef49c5a32707 Mon Sep 17 00:00:00 2001 From: SeanTroyUWO Date: Thu, 7 Sep 2023 14:32:50 -0600 Subject: [PATCH 045/109] `ANY` and `ALL` contains their operators (#963) --- src/ast/mod.rs | 28 ++++++++++++++++++++++------ src/parser/mod.rs | 32 ++++++++++++++++++++++++-------- tests/sqlparser_common.rs | 12 ++++++------ 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a241f950..41d66166 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -419,10 +419,18 @@ pub enum Expr { pattern: Box, escape_char: Option, }, - /// Any operation e.g. `1 ANY (1)` or `foo > ANY(bar)`, It will be wrapped in the right side of BinaryExpr - AnyOp(Box), - /// ALL operation e.g. `1 ALL (1)` or `foo > ALL(bar)`, It will be wrapped in the right side of BinaryExpr - AllOp(Box), + /// Any operation e.g. `foo > ANY(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] + AnyOp { + left: Box, + compare_op: BinaryOperator, + right: Box, + }, + /// ALL operation e.g. `foo > ALL(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] + AllOp { + left: Box, + compare_op: BinaryOperator, + right: Box, + }, /// Unary operation e.g. `NOT foo` UnaryOp { op: UnaryOperator, expr: Box }, /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` @@ -724,8 +732,16 @@ impl fmt::Display for Expr { pattern ), }, - Expr::AnyOp(expr) => write!(f, "ANY({expr})"), - Expr::AllOp(expr) => write!(f, "ALL({expr})"), + Expr::AnyOp { + left, + compare_op, + right, + } => write!(f, "{left} {compare_op} ANY({right})"), + Expr::AllOp { + left, + compare_op, + right, + } => write!(f, "{left} {compare_op} ALL({right})"), Expr::UnaryOp { op, expr } => { if op == &UnaryOperator::PGPostfixFactorial { write!(f, "{expr}{op}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6902fc56..9de67a51 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1782,16 +1782,32 @@ impl<'a> Parser<'a> { let right = self.parse_subexpr(precedence)?; self.expect_token(&Token::RParen)?; - let right = match keyword { - Keyword::ALL => Box::new(Expr::AllOp(Box::new(right))), - Keyword::ANY => Box::new(Expr::AnyOp(Box::new(right))), - _ => unreachable!(), + if !matches!( + op, + BinaryOperator::Gt + | BinaryOperator::Lt + | BinaryOperator::GtEq + | BinaryOperator::LtEq + | BinaryOperator::Eq + | BinaryOperator::NotEq + ) { + return parser_err!(format!( + "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" + )); }; - Ok(Expr::BinaryOp { - left: Box::new(expr), - op, - right, + Ok(match keyword { + Keyword::ALL => Expr::AllOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + }, + Keyword::ANY => Expr::AnyOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + }, + _ => unreachable!(), }) } else { Ok(Expr::BinaryOp { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 45074336..a9c4130b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1558,10 +1558,10 @@ fn parse_bitwise_ops() { fn parse_binary_any() { let select = verified_only_select("SELECT a = ANY(b)"); assert_eq!( - SelectItem::UnnamedExpr(Expr::BinaryOp { + SelectItem::UnnamedExpr(Expr::AnyOp { left: Box::new(Expr::Identifier(Ident::new("a"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::AnyOp(Box::new(Expr::Identifier(Ident::new("b"))))), + compare_op: BinaryOperator::Eq, + right: Box::new(Expr::Identifier(Ident::new("b"))), }), select.projection[0] ); @@ -1571,10 +1571,10 @@ fn parse_binary_any() { fn parse_binary_all() { let select = verified_only_select("SELECT a = ALL(b)"); assert_eq!( - SelectItem::UnnamedExpr(Expr::BinaryOp { + SelectItem::UnnamedExpr(Expr::AllOp { left: Box::new(Expr::Identifier(Ident::new("a"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::AllOp(Box::new(Expr::Identifier(Ident::new("b"))))), + compare_op: BinaryOperator::Eq, + right: Box::new(Expr::Identifier(Ident::new("b"))), }), select.projection[0] ); From 25e037c50fc46bc061126d583bc64c2da7c6cabf Mon Sep 17 00:00:00 2001 From: Forbes Lindesay Date: Thu, 7 Sep 2023 21:39:47 +0100 Subject: [PATCH 046/109] feat: allow multiple actions in one `ALTER TABLE` statement (#960) --- src/ast/mod.rs | 24 ++- src/parser/mod.rs | 342 ++++++++++++++++++----------------- src/test_utils.rs | 20 ++ tests/sqlparser_common.rs | 179 +++++++----------- tests/sqlparser_mysql.rs | 36 ++-- tests/sqlparser_postgres.rs | 66 +++++-- tests/sqlparser_snowflake.rs | 8 +- 7 files changed, 349 insertions(+), 326 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 41d66166..79edea7d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1407,7 +1407,9 @@ pub enum Statement { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] name: ObjectName, - operation: AlterTableOperation, + if_exists: bool, + only: bool, + operations: Vec, }, AlterIndex { name: ObjectName, @@ -2618,8 +2620,24 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::AlterTable { name, operation } => { - write!(f, "ALTER TABLE {name} {operation}") + Statement::AlterTable { + name, + if_exists, + only, + operations, + } => { + write!(f, "ALTER TABLE ")?; + if *if_exists { + write!(f, "IF EXISTS ")?; + } + if *only { + write!(f, "ONLY ")?; + } + write!( + f, + "{name} {operations}", + operations = display_comma_separated(operations) + ) } Statement::AlterIndex { name, operation } => { write!(f, "ALTER INDEX {name} {operation}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9de67a51..b872bbfc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4081,6 +4081,174 @@ impl<'a> Parser<'a> { Ok(SqlOption { name, value }) } + pub fn parse_alter_table_operation(&mut self) -> Result { + let operation = if self.parse_keyword(Keyword::ADD) { + if let Some(constraint) = self.parse_optional_table_constraint()? { + AlterTableOperation::AddConstraint(constraint) + } else { + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions: partitions, + } + } else { + let column_keyword = self.parse_keyword(Keyword::COLUMN); + + let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) + { + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) + || if_not_exists + } else { + false + }; + + let column_def = self.parse_column_def()?; + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, + column_def, + } + } + } + } else if self.parse_keyword(Keyword::RENAME) { + if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::CONSTRAINT) { + let old_name = self.parse_identifier()?; + self.expect_keyword(Keyword::TO)?; + let new_name = self.parse_identifier()?; + AlterTableOperation::RenameConstraint { old_name, new_name } + } else if self.parse_keyword(Keyword::TO) { + let table_name = self.parse_object_name()?; + AlterTableOperation::RenameTable { table_name } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_column_name = self.parse_identifier()?; + self.expect_keyword(Keyword::TO)?; + let new_column_name = self.parse_identifier()?; + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, + } + } + } else if self.parse_keyword(Keyword::DROP) { + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } + } else if self.parse_keyword(Keyword::CONSTRAINT) { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropConstraint { + if_exists, + name, + cascade, + } + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + AlterTableOperation::DropPrimaryKey + } else { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, + } + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, + } + } else if self.parse_keyword(Keyword::CHANGE) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_name = self.parse_identifier()?; + let new_name = self.parse_identifier()?; + let data_type = self.parse_data_type()?; + let mut options = vec![]; + while let Some(option) = self.parse_optional_column_option()? { + options.push(option); + } + + AlterTableOperation::ChangeColumn { + old_name, + new_name, + data_type, + options, + } + } else if self.parse_keyword(Keyword::ALTER) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let column_name = self.parse_identifier()?; + let is_postgresql = dialect_of!(self is PostgreSqlDialect); + + let op = if self.parse_keywords(&[Keyword::SET, Keyword::NOT, Keyword::NULL]) { + AlterColumnOperation::SetNotNull {} + } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { + AlterColumnOperation::DropNotNull {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { + AlterColumnOperation::SetDefault { + value: self.parse_expr()?, + } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { + AlterColumnOperation::DropDefault {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) + || (is_postgresql && self.parse_keyword(Keyword::TYPE)) + { + let data_type = self.parse_data_type()?; + let using = if is_postgresql && self.parse_keyword(Keyword::USING) { + Some(self.parse_expr()?) + } else { + None + }; + AlterColumnOperation::SetDataType { data_type, using } + } else { + return self.expected( + "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN", + self.peek_token(), + ); + }; + AlterTableOperation::AlterColumn { column_name, op } + } else if self.parse_keyword(Keyword::SWAP) { + self.expect_keyword(Keyword::WITH)?; + let table_name = self.parse_object_name()?; + AlterTableOperation::SwapWith { table_name } + } else { + return self.expected( + "ADD, RENAME, PARTITION, SWAP or DROP after ALTER TABLE", + self.peek_token(), + ); + }; + Ok(operation) + } + pub fn parse_alter(&mut self) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, @@ -4091,177 +4259,15 @@ impl<'a> Parser<'a> { match object_type { Keyword::VIEW => self.parse_alter_view(), Keyword::TABLE => { - let _ = self.parse_keyword(Keyword::ONLY); // [ ONLY ] + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name()?; - let operation = if self.parse_keyword(Keyword::ADD) { - if let Some(constraint) = self.parse_optional_table_constraint()? { - AlterTableOperation::AddConstraint(constraint) - } else { - let if_not_exists = - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::AddPartitions { - if_not_exists, - new_partitions: partitions, - } - } else { - let column_keyword = self.parse_keyword(Keyword::COLUMN); - - let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) - { - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) - || if_not_exists - } else { - false - }; - - let column_def = self.parse_column_def()?; - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - column_def, - } - } - } - } else if self.parse_keyword(Keyword::RENAME) { - if dialect_of!(self is PostgreSqlDialect) - && self.parse_keyword(Keyword::CONSTRAINT) - { - let old_name = self.parse_identifier()?; - self.expect_keyword(Keyword::TO)?; - let new_name = self.parse_identifier()?; - AlterTableOperation::RenameConstraint { old_name, new_name } - } else if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_object_name()?; - AlterTableOperation::RenameTable { table_name } - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_column_name = self.parse_identifier()?; - self.expect_keyword(Keyword::TO)?; - let new_column_name = self.parse_identifier()?; - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - } - } - } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: true, - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: false, - } - } else if self.parse_keyword(Keyword::CONSTRAINT) { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropConstraint { - if_exists, - name, - cascade, - } - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - AlterTableOperation::DropPrimaryKey - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, - } - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let before = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::RENAME)?; - self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; - self.expect_token(&Token::LParen)?; - let renames = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::RenamePartitions { - old_partitions: before, - new_partitions: renames, - } - } else if self.parse_keyword(Keyword::CHANGE) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_name = self.parse_identifier()?; - let new_name = self.parse_identifier()?; - let data_type = self.parse_data_type()?; - let mut options = vec![]; - while let Some(option) = self.parse_optional_column_option()? { - options.push(option); - } - - AlterTableOperation::ChangeColumn { - old_name, - new_name, - data_type, - options, - } - } else if self.parse_keyword(Keyword::ALTER) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let column_name = self.parse_identifier()?; - let is_postgresql = dialect_of!(self is PostgreSqlDialect); - - let op = if self.parse_keywords(&[Keyword::SET, Keyword::NOT, Keyword::NULL]) { - AlterColumnOperation::SetNotNull {} - } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { - AlterColumnOperation::DropNotNull {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { - AlterColumnOperation::SetDefault { - value: self.parse_expr()?, - } - } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { - AlterColumnOperation::DropDefault {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) - || (is_postgresql && self.parse_keyword(Keyword::TYPE)) - { - let data_type = self.parse_data_type()?; - let using = if is_postgresql && self.parse_keyword(Keyword::USING) { - Some(self.parse_expr()?) - } else { - None - }; - AlterColumnOperation::SetDataType { data_type, using } - } else { - return self.expected( - "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN", - self.peek_token(), - ); - }; - AlterTableOperation::AlterColumn { column_name, op } - } else if self.parse_keyword(Keyword::SWAP) { - self.expect_keyword(Keyword::WITH)?; - let table_name = self.parse_object_name()?; - AlterTableOperation::SwapWith { table_name } - } else { - return self.expected( - "ADD, RENAME, PARTITION, SWAP or DROP after ALTER TABLE", - self.peek_token(), - ); - }; + let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; Ok(Statement::AlterTable { name: table_name, - operation, + if_exists, + only, + operations, }) } Keyword::INDEX => { diff --git a/src/test_utils.rs b/src/test_utils.rs index a5e9e739..0db1e7d2 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -209,6 +209,26 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } } +pub fn alter_table_op_with_name(stmt: Statement, expected_name: &str) -> AlterTableOperation { + match stmt { + Statement::AlterTable { + name, + if_exists, + only: is_only, + operations, + } => { + assert_eq!(name.to_string(), expected_name); + assert!(!if_exists); + assert!(!is_only); + only(operations) + } + _ => panic!("Expected ALTER TABLE statement"), + } +} +pub fn alter_table_op(stmt: Statement) -> AlterTableOperation { + alter_table_op_with_name(stmt, "tab") +} + /// Creates a `Value::Number`, panic'ing if n is not a number pub fn number(n: &str) -> Value { Value::Number(n.parse().unwrap(), false) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a9c4130b..1bf108cb 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -30,8 +30,8 @@ use sqlparser::dialect::{ use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; use test_utils::{ - all_dialects, assert_eq_vec, expr_from_projection, join, number, only, table, table_alias, - TestedDialects, + all_dialects, alter_table_op, assert_eq_vec, expr_from_projection, join, number, only, table, + table_alias, TestedDialects, }; #[macro_use] @@ -2920,19 +2920,17 @@ fn parse_create_external_table_lowercase() { #[test] fn parse_alter_table() { let add_column = "ALTER TABLE tab ADD COLUMN foo TEXT;"; - match one_statement_parses_to(add_column, "ALTER TABLE tab ADD COLUMN foo TEXT") { - Statement::AlterTable { - name, - operation: - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - column_def, - }, + match alter_table_op(one_statement_parses_to( + add_column, + "ALTER TABLE tab ADD COLUMN foo TEXT", + )) { + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, + column_def, } => { assert!(column_keyword); assert!(!if_not_exists); - assert_eq!("tab", name.to_string()); assert_eq!("foo", column_def.name.to_string()); assert_eq!("TEXT", column_def.data_type.to_string()); } @@ -2940,28 +2938,19 @@ fn parse_alter_table() { }; let rename_table = "ALTER TABLE tab RENAME TO new_tab"; - match verified_stmt(rename_table) { - Statement::AlterTable { - name, - operation: AlterTableOperation::RenameTable { table_name }, - } => { - assert_eq!("tab", name.to_string()); - assert_eq!("new_tab", table_name.to_string()) + match alter_table_op(verified_stmt(rename_table)) { + AlterTableOperation::RenameTable { table_name } => { + assert_eq!("new_tab", table_name.to_string()); } _ => unreachable!(), }; let rename_column = "ALTER TABLE tab RENAME COLUMN foo TO new_foo"; - match verified_stmt(rename_column) { - Statement::AlterTable { - name, - operation: - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - }, + match alter_table_op(verified_stmt(rename_column)) { + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, } => { - assert_eq!("tab", name.to_string()); assert_eq!(old_column_name.to_string(), "foo"); assert_eq!(new_column_name.to_string(), "new_foo"); } @@ -3047,21 +3036,15 @@ fn parse_alter_view_with_columns() { #[test] fn parse_alter_table_add_column() { - match verified_stmt("ALTER TABLE tab ADD foo TEXT") { - Statement::AlterTable { - operation: AlterTableOperation::AddColumn { column_keyword, .. }, - .. - } => { + match alter_table_op(verified_stmt("ALTER TABLE tab ADD foo TEXT")) { + AlterTableOperation::AddColumn { column_keyword, .. } => { assert!(!column_keyword); } _ => unreachable!(), }; - match verified_stmt("ALTER TABLE tab ADD COLUMN foo TEXT") { - Statement::AlterTable { - operation: AlterTableOperation::AddColumn { column_keyword, .. }, - .. - } => { + match alter_table_op(verified_stmt("ALTER TABLE tab ADD COLUMN foo TEXT")) { + AlterTableOperation::AddColumn { column_keyword, .. } => { assert!(column_keyword); } _ => unreachable!(), @@ -3080,24 +3063,19 @@ fn parse_alter_table_add_column_if_not_exists() { options: None, }; - match dialects.verified_stmt("ALTER TABLE tab ADD IF NOT EXISTS foo TEXT") { - Statement::AlterTable { - operation: AlterTableOperation::AddColumn { if_not_exists, .. }, - .. - } => { + match alter_table_op(dialects.verified_stmt("ALTER TABLE tab ADD IF NOT EXISTS foo TEXT")) { + AlterTableOperation::AddColumn { if_not_exists, .. } => { assert!(if_not_exists); } _ => unreachable!(), }; - match dialects.verified_stmt("ALTER TABLE tab ADD COLUMN IF NOT EXISTS foo TEXT") { - Statement::AlterTable { - operation: - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - .. - }, + match alter_table_op( + dialects.verified_stmt("ALTER TABLE tab ADD COLUMN IF NOT EXISTS foo TEXT"), + ) { + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, .. } => { assert!(column_keyword); @@ -3123,12 +3101,10 @@ fn parse_alter_table_constraints() { check_one("CHECK (end_date > start_date OR end_date IS NULL)"); fn check_one(constraint_text: &str) { - match verified_stmt(&format!("ALTER TABLE tab ADD {constraint_text}")) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AddConstraint(constraint), - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt(&format!( + "ALTER TABLE tab ADD {constraint_text}" + ))) { + AlterTableOperation::AddConstraint(constraint) => { assert_eq!(constraint_text, constraint.to_string()); } _ => unreachable!(), @@ -3150,17 +3126,12 @@ fn parse_alter_table_drop_column() { ); fn check_one(constraint_text: &str) { - match verified_stmt(&format!("ALTER TABLE tab {constraint_text}")) { - Statement::AlterTable { - name, - operation: - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, - }, + match alter_table_op(verified_stmt(&format!("ALTER TABLE tab {constraint_text}"))) { + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, } => { - assert_eq!("tab", name.to_string()); assert_eq!("is_active", column_name.to_string()); assert!(if_exists); assert!(cascade); @@ -3173,12 +3144,10 @@ fn parse_alter_table_drop_column() { #[test] fn parse_alter_table_alter_column() { let alter_stmt = "ALTER TABLE tab"; - match verified_stmt(&format!("{alter_stmt} ALTER COLUMN is_active SET NOT NULL")) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt(&format!( + "{alter_stmt} ALTER COLUMN is_active SET NOT NULL" + ))) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!(op, AlterColumnOperation::SetNotNull {}); } @@ -3190,14 +3159,10 @@ fn parse_alter_table_alter_column() { "ALTER TABLE tab ALTER COLUMN is_active DROP NOT NULL", ); - match verified_stmt(&format!( + match alter_table_op(verified_stmt(&format!( "{alter_stmt} ALTER COLUMN is_active SET DEFAULT false" - )) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + ))) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!( op, @@ -3209,12 +3174,10 @@ fn parse_alter_table_alter_column() { _ => unreachable!(), } - match verified_stmt(&format!("{alter_stmt} ALTER COLUMN is_active DROP DEFAULT")) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt(&format!( + "{alter_stmt} ALTER COLUMN is_active DROP DEFAULT" + ))) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!(op, AlterColumnOperation::DropDefault {}); } @@ -3225,12 +3188,10 @@ fn parse_alter_table_alter_column() { #[test] fn parse_alter_table_alter_column_type() { let alter_stmt = "ALTER TABLE tab"; - match verified_stmt("ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT") { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt( + "ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT", + )) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!( op, @@ -3267,34 +3228,28 @@ fn parse_alter_table_alter_column_type() { #[test] fn parse_alter_table_drop_constraint() { let alter_stmt = "ALTER TABLE tab"; - match verified_stmt("ALTER TABLE tab DROP CONSTRAINT constraint_name CASCADE") { - Statement::AlterTable { - name, - operation: - AlterTableOperation::DropConstraint { - name: constr_name, - if_exists, - cascade, - }, + match alter_table_op(verified_stmt( + "ALTER TABLE tab DROP CONSTRAINT constraint_name CASCADE", + )) { + AlterTableOperation::DropConstraint { + name: constr_name, + if_exists, + cascade, } => { - assert_eq!("tab", name.to_string()); assert_eq!("constraint_name", constr_name.to_string()); assert!(!if_exists); assert!(cascade); } _ => unreachable!(), } - match verified_stmt("ALTER TABLE tab DROP CONSTRAINT IF EXISTS constraint_name") { - Statement::AlterTable { - name, - operation: - AlterTableOperation::DropConstraint { - name: constr_name, - if_exists, - cascade, - }, + match alter_table_op(verified_stmt( + "ALTER TABLE tab DROP CONSTRAINT IF EXISTS constraint_name", + )) { + AlterTableOperation::DropConstraint { + name: constr_name, + if_exists, + cascade, } => { - assert_eq!("tab", name.to_string()); assert_eq!("constraint_name", constr_name.to_string()); assert!(if_exists); assert!(!cascade); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ab7997cd..7346b9c0 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -14,6 +14,7 @@ //! Test SQL syntax specific to MySQL. The parser based on the generic dialect //! is also tested (on the inputs it can handle). +use matches::assert_matches; use sqlparser::ast::Expr; use sqlparser::ast::Value; use sqlparser::ast::*; @@ -1256,15 +1257,10 @@ fn parse_update_with_joins() { #[test] fn parse_alter_table_drop_primary_key() { - match mysql_and_generic().verified_stmt("ALTER TABLE tab DROP PRIMARY KEY") { - Statement::AlterTable { - name, - operation: AlterTableOperation::DropPrimaryKey, - } => { - assert_eq!("tab", name.to_string()); - } - _ => unreachable!(), - } + assert_matches!( + alter_table_op(mysql_and_generic().verified_stmt("ALTER TABLE tab DROP PRIMARY KEY")), + AlterTableOperation::DropPrimaryKey + ); } #[test] @@ -1278,22 +1274,16 @@ fn parse_alter_table_change_column() { }; let sql1 = "ALTER TABLE orders CHANGE COLUMN description desc TEXT NOT NULL"; - match mysql().verified_stmt(sql1) { - Statement::AlterTable { name, operation } => { - assert_eq!(expected_name, name); - assert_eq!(expected_operation, operation); - } - _ => unreachable!(), - } + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql1), &expected_name.to_string()); + assert_eq!(expected_operation, operation); let sql2 = "ALTER TABLE orders CHANGE description desc TEXT NOT NULL"; - match mysql().one_statement_parses_to(sql2, sql1) { - Statement::AlterTable { name, operation } => { - assert_eq!(expected_name, name); - assert_eq!(expected_operation, operation); - } - _ => unreachable!(), - } + let operation = alter_table_op_with_name( + mysql().one_statement_parses_to(sql2, sql1), + &expected_name.to_string(), + ); + assert_eq!(expected_operation, operation); } #[test] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index c34ba75a..093fac8c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -546,12 +546,10 @@ fn parse_create_table_constraints_only() { #[test] fn parse_alter_table_constraints_rename() { - match pg().verified_stmt("ALTER TABLE tab RENAME CONSTRAINT old_name TO new_name") { - Statement::AlterTable { - name, - operation: AlterTableOperation::RenameConstraint { old_name, new_name }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op( + pg().verified_stmt("ALTER TABLE tab RENAME CONSTRAINT old_name TO new_name"), + ) { + AlterTableOperation::RenameConstraint { old_name, new_name } => { assert_eq!(old_name.to_string(), "old_name"); assert_eq!(new_name.to_string(), "new_name"); } @@ -566,14 +564,12 @@ fn parse_alter_table_alter_column() { "ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'", ); - match pg() - .verified_stmt("ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'") - { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op( + pg().verified_stmt( + "ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'", + ), + ) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); let using_expr = Expr::Value(Value::SingleQuotedString("text".to_string())); assert_eq!( @@ -588,6 +584,48 @@ fn parse_alter_table_alter_column() { } } +#[test] +fn parse_alter_table_add_columns() { + match pg().verified_stmt("ALTER TABLE IF EXISTS ONLY tab ADD COLUMN a TEXT, ADD COLUMN b INT") { + Statement::AlterTable { + name, + if_exists, + only, + operations, + } => { + assert_eq!(name.to_string(), "tab"); + assert!(if_exists); + assert!(only); + assert_eq!( + operations, + vec![ + AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "a".into(), + data_type: DataType::Text, + collation: None, + options: vec![], + }, + }, + AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "b".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + }, + ] + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_if_not_exists() { let sql = "CREATE TABLE IF NOT EXISTS uk_cities ()"; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index ecd60852..dabe66e1 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -502,12 +502,8 @@ fn test_select_wildcard_with_exclude_and_rename() { #[test] fn test_alter_table_swap_with() { let sql = "ALTER TABLE tab1 SWAP WITH tab2"; - match snowflake_and_generic().verified_stmt(sql) { - Statement::AlterTable { - name, - operation: AlterTableOperation::SwapWith { table_name }, - } => { - assert_eq!("tab1", name.to_string()); + match alter_table_op_with_name(snowflake_and_generic().verified_stmt(sql), "tab1") { + AlterTableOperation::SwapWith { table_name } => { assert_eq!("tab2", table_name.to_string()); } _ => unreachable!(), From 2593dcfb79bb8709a6014673d47ffefcf9a68e20 Mon Sep 17 00:00:00 2001 From: ding-young Date: Fri, 8 Sep 2023 19:12:44 +0900 Subject: [PATCH 047/109] Add missing token loc in parse err msg (#965) --- src/parser/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b872bbfc..348267fd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1791,9 +1791,12 @@ impl<'a> Parser<'a> { | BinaryOperator::Eq | BinaryOperator::NotEq ) { - return parser_err!(format!( + return parser_err!( + format!( "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" - )); + ), + tok.location + ); }; Ok(match keyword { From bb7b05e106496c204b96d5371ed91bfb1edef728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkay=20=C5=9Eahin?= <124376117+berkaysynnada@users.noreply.github.com> Date: Fri, 8 Sep 2023 13:47:56 +0300 Subject: [PATCH 048/109] feat: Group By All (#964) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 2 +- src/ast/query.rs | 37 ++++++++++++++++++++++++++++++++--- src/parser/mod.rs | 8 ++++++-- tests/sqlparser_clickhouse.rs | 2 +- tests/sqlparser_common.rs | 26 ++++++++++++++++++------ tests/sqlparser_duckdb.rs | 8 ++++---- tests/sqlparser_mssql.rs | 4 ++-- tests/sqlparser_mysql.rs | 16 +++++++-------- tests/sqlparser_postgres.rs | 18 ++++++++--------- 9 files changed, 85 insertions(+), 36 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 79edea7d..eb8830bb 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -36,7 +36,7 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, + Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, GroupByExpr, IdentWithAlias, Join, JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, diff --git a/src/ast/query.rs b/src/ast/query.rs index b7001765..c9fcdecc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -214,7 +214,7 @@ pub struct Select { /// WHERE pub selection: Option, /// GROUP BY - pub group_by: Vec, + pub group_by: GroupByExpr, /// CLUSTER BY (Hive) pub cluster_by: Vec, /// DISTRIBUTE BY (Hive) @@ -255,8 +255,13 @@ impl fmt::Display for Select { if let Some(ref selection) = self.selection { write!(f, " WHERE {selection}")?; } - if !self.group_by.is_empty() { - write!(f, " GROUP BY {}", display_comma_separated(&self.group_by))?; + match &self.group_by { + GroupByExpr::All => write!(f, " GROUP BY ALL")?, + GroupByExpr::Expressions(exprs) => { + if !exprs.is_empty() { + write!(f, " GROUP BY {}", display_comma_separated(exprs))?; + } + } } if !self.cluster_by.is_empty() { write!( @@ -1218,3 +1223,29 @@ impl fmt::Display for SelectInto { write!(f, "INTO{}{}{} {}", temporary, unlogged, table, self.name) } } + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum GroupByExpr { + /// ALL syntax of [Snowflake], and [DuckDB] + /// + /// [Snowflake]: + /// [DuckDB]: + All, + + /// Expressions + Expressions(Vec), +} + +impl fmt::Display for GroupByExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GroupByExpr::All => write!(f, "GROUP BY ALL"), + GroupByExpr::Expressions(col_names) => { + let col_names = display_comma_separated(col_names); + write!(f, "GROUP BY ({col_names})") + } + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 348267fd..6f81e1e4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5662,9 +5662,13 @@ impl<'a> Parser<'a> { }; let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_group_by_expr)? + if self.parse_keyword(Keyword::ALL) { + GroupByExpr::All + } else { + GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?) + } } else { - vec![] + GroupByExpr::Expressions(vec![]) }; let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 77b936d5..5241777e 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -97,7 +97,7 @@ fn parse_map_access_expr() { right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))) }) }), - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1bf108cb..be10914d 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -247,7 +247,9 @@ fn parse_update_set_from() { }], lateral_views: vec![], selection: None, - group_by: vec![Expr::Identifier(Ident::new("id"))], + group_by: GroupByExpr::Expressions(vec![Expr::Identifier(Ident::new( + "id" + ))]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1808,10 +1810,10 @@ fn parse_select_group_by() { let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname"; let select = verified_only_select(sql); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("lname")), Expr::Identifier(Ident::new("fname")), - ], + ]), select.group_by ); @@ -1822,6 +1824,18 @@ fn parse_select_group_by() { ); } +#[test] +fn parse_select_group_by_all() { + let sql = "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL"; + let select = verified_only_select(sql); + assert_eq!(GroupByExpr::All, select.group_by); + + one_statement_parses_to( + "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL", + "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL", + ); +} + #[test] fn parse_select_having() { let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1"; @@ -3543,7 +3557,7 @@ fn test_parse_named_window() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -3930,7 +3944,7 @@ fn parse_interval_and_or_xor() { }), }), }), - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -6333,7 +6347,7 @@ fn parse_merge() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 3587e8d9..0fe4bb0e 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -161,7 +161,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -194,7 +194,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -236,7 +236,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -269,7 +269,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index c4e0f327..9ed12ac2 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -103,7 +103,7 @@ fn parse_create_procedure() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -519,7 +519,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 7346b9c0..00901799 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -496,7 +496,7 @@ fn parse_escaped_quote_identifiers_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -538,7 +538,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -577,7 +577,7 @@ fn parse_escaped_backticks_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -616,7 +616,7 @@ fn parse_escaped_backticks_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1100,7 +1100,7 @@ fn parse_select_with_numeric_prefix_column_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1149,7 +1149,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1325,7 +1325,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1604,7 +1604,7 @@ fn parse_hex_string_introducer() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 093fac8c..079e4695 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -990,7 +990,7 @@ fn parse_copy_to() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), having: None, named_window: vec![], cluster_by: vec![], @@ -2019,7 +2019,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2035,7 +2035,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -3321,14 +3321,14 @@ fn parse_select_group_by_grouping_sets() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, GROUPING SETS ((brand), (size), ())" ); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("size")), Expr::GroupingSets(vec![ vec![Expr::Identifier(Ident::new("brand"))], vec![Expr::Identifier(Ident::new("size"))], vec![], ]), - ], + ]), select.group_by ); } @@ -3339,13 +3339,13 @@ fn parse_select_group_by_rollup() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, ROLLUP (brand, size)", ); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("size")), Expr::Rollup(vec![ vec![Expr::Identifier(Ident::new("brand"))], vec![Expr::Identifier(Ident::new("size"))], ]), - ], + ]), select.group_by ); } @@ -3356,13 +3356,13 @@ fn parse_select_group_by_cube() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, CUBE (brand, size)", ); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("size")), Expr::Cube(vec![ vec![Expr::Identifier(Ident::new("brand"))], vec![Expr::Identifier(Ident::new("size"))], ]), - ], + ]), select.group_by ); } From 0480ee9886e73affaebe330844cd5c26e0e8e1b0 Mon Sep 17 00:00:00 2001 From: artorias1024 <82564604+artorias1024@users.noreply.github.com> Date: Fri, 8 Sep 2023 18:58:31 +0800 Subject: [PATCH 049/109] feat: Add support for parsing the syntax of MySQL UNIQUE KEY. (#962) Co-authored-by: yukunpeng Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 12 ++++++--- tests/sqlparser_mysql.rs | 56 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6f81e1e4..4d642919 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3941,9 +3941,15 @@ impl<'a> Parser<'a> { match next_token.token { Token::Word(w) if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => { let is_primary = w.keyword == Keyword::PRIMARY; - if is_primary { - self.expect_keyword(Keyword::KEY)?; - } + + // parse optional [KEY] + let _ = self.parse_keyword(Keyword::KEY); + + // optional constraint name + let name = self + .maybe_parse(|parser| parser.parse_identifier()) + .or(name); + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; Ok(Some(TableConstraint::Unique { name, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 00901799..5aeffb0b 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -297,6 +297,62 @@ fn parse_create_table_auto_increment() { } } +#[test] +fn parse_create_table_unique_key() { + let sql = "CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, UNIQUE KEY bar_key (bar))"; + let canonical = "CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key UNIQUE (bar))"; + match mysql().one_statement_parses_to(sql, canonical) { + Statement::CreateTable { + name, + columns, + constraints, + .. + } => { + assert_eq!(name.to_string(), "foo"); + assert_eq!( + vec![TableConstraint::Unique { + name: Some(Ident::new("bar_key")), + columns: vec![Ident::new("bar")], + is_primary: false + }], + constraints + ); + assert_eq!( + vec![ + ColumnDef { + name: Ident::new("id"), + data_type: DataType::Int(None), + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Unique { is_primary: true }, + }, + ColumnOptionDef { + name: None, + option: ColumnOption::DialectSpecific(vec![Token::make_keyword( + "AUTO_INCREMENT" + )]), + }, + ], + }, + ColumnDef { + name: Ident::new("bar"), + data_type: DataType::Int(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + },], + }, + ], + columns + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_comment() { let canonical = "CREATE TABLE foo (bar INT) COMMENT 'baz'"; From a16791d0199633333909044b2e56ca21d96ac83b Mon Sep 17 00:00:00 2001 From: William Date: Thu, 14 Sep 2023 13:56:49 -0400 Subject: [PATCH 050/109] Support `UNNEST` as a table factor for PostgreSQL (#968) --- src/parser/mod.rs | 2 +- tests/sqlparser_postgres.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4d642919..11f96787 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6240,7 +6240,7 @@ impl<'a> Parser<'a> { // appearing alone in parentheses (e.g. `FROM (mytable)`) self.expected("joined table", self.peek_token()) } - } else if dialect_of!(self is BigQueryDialect | GenericDialect) + } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.parse_keyword(Keyword::UNNEST) { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 079e4695..77e5ba45 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3450,3 +3450,30 @@ fn parse_create_table_with_alias() { _ => unreachable!(), } } + +#[test] +fn parse_join_constraint_unnest_alias() { + assert_eq!( + only( + pg().verified_only_select("SELECT * FROM t1 JOIN UNNEST(t1.a) AS f ON c1 = c2") + .from + ) + .joins, + vec![Join { + relation: TableFactor::UNNEST { + alias: table_alias("f"), + array_exprs: vec![Expr::CompoundIdentifier(vec![ + Ident::new("t1"), + Ident::new("a") + ])], + with_offset: false, + with_offset_alias: None + }, + join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::Identifier("c1".into())), + op: BinaryOperator::Eq, + right: Box::new(Expr::Identifier("c2".into())), + })), + }] + ); +} From f6e4be4c154e81cf457249a310004e95d16ecf6f Mon Sep 17 00:00:00 2001 From: "chunshao.rcs" Date: Fri, 15 Sep 2023 02:21:47 +0800 Subject: [PATCH 051/109] Support mysql `partition` to table selection (#959) Co-authored-by: Andrew Lamb --- src/ast/query.rs | 6 +++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 41 +++++++++++++++++++++++++++++++++++ src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 2 ++ tests/sqlparser_clickhouse.rs | 16 ++++++++------ tests/sqlparser_common.rs | 29 +++++++++++++++++++++++++ tests/sqlparser_duckdb.rs | 4 ++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 3 +++ tests/sqlparser_mysql.rs | 5 +++++ tests/sqlparser_postgres.rs | 1 + tests/sqlparser_redshift.rs | 3 +++ tests/sqlparser_snowflake.rs | 1 + 14 files changed, 108 insertions(+), 7 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index c9fcdecc..af35c37a 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -670,6 +670,8 @@ pub enum TableFactor { /// Optional version qualifier to facilitate table time-travel, as /// supported by BigQuery and MSSQL. version: Option, + /// [Partition selection](https://dev.mysql.com/doc/refman/8.0/en/partitioning-selection.html), supported by MySQL. + partitions: Vec, }, Derived { lateral: bool, @@ -730,8 +732,12 @@ impl fmt::Display for TableFactor { args, with_hints, version, + partitions, } => { write!(f, "{name}")?; + if !partitions.is_empty() { + write!(f, "PARTITION ({})", display_comma_separated(partitions))?; + } if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; } diff --git a/src/keywords.rs b/src/keywords.rs index c73535fc..ad0526cc 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -716,6 +716,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::QUALIFY, Keyword::WINDOW, Keyword::END, + // for MYSQL PARTITION SELECTION + Keyword::PARTITION, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 11f96787..ba8f5784 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6277,6 +6277,14 @@ impl<'a> Parser<'a> { } else { let name = self.parse_object_name()?; + let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::PARTITION) + { + self.parse_partitions()? + } else { + vec![] + }; + // Parse potential version qualifier let version = self.parse_table_version()?; @@ -6311,6 +6319,7 @@ impl<'a> Parser<'a> { args, with_hints, version, + partitions, }) } } @@ -7483,6 +7492,13 @@ impl<'a> Parser<'a> { representation: UserDefinedTypeRepresentation::Composite { attributes }, }) } + + fn parse_partitions(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + Ok(partitions) + } } impl Word { @@ -8100,4 +8116,29 @@ mod tests { "sql parser error: Unexpected token following period in identifier: *", ); } + + #[test] + fn test_mysql_partition_selection() { + let sql = "SELECT * FROM employees PARTITION (p0, p2)"; + let expected = vec!["p0", "p2"]; + + let ast: Vec = Parser::parse_sql(&MySqlDialect {}, sql).unwrap(); + assert_eq!(ast.len(), 1); + if let Statement::Query(v) = &ast[0] { + if let SetExpr::Select(select) = &*v.body { + assert_eq!(select.from.len(), 1); + let from: &TableWithJoins = &select.from[0]; + let table_factor = &from.relation; + if let TableFactor::Table { partitions, .. } = table_factor { + let actual: Vec<&str> = partitions + .iter() + .map(|ident| ident.value.as_str()) + .collect(); + assert_eq!(expected, actual); + } + } + } else { + panic!("fail to parse mysql partition selection"); + } + } } diff --git a/src/test_utils.rs b/src/test_utils.rs index 0db1e7d2..b81cd5f4 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -248,6 +248,7 @@ pub fn table(name: impl Into) -> TableFactor { args: None, with_hints: vec![], version: None, + partitions: vec![], } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d6a28fd0..3502d7df 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -96,6 +96,7 @@ fn parse_table_identifiers() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] },] @@ -160,6 +161,7 @@ fn parse_table_time_travel() { version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( Value::SingleQuotedString(version) ))), + partitions: vec![], }, joins: vec![] },] diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5241777e..a14598b3 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -63,15 +63,16 @@ fn parse_map_access_expr() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, - joins: vec![] + joins: vec![], }], lateral_views: vec![], selection: Some(BinaryOp { left: Box::new(BinaryOp { left: Box::new(Identifier(Ident::new("id"))), op: BinaryOperator::Eq, - right: Box::new(Expr::Value(Value::SingleQuotedString("test".to_string()))) + right: Box::new(Expr::Value(Value::SingleQuotedString("test".to_string()))), }), op: BinaryOperator::And, right: Box::new(BinaryOp { @@ -91,11 +92,11 @@ fn parse_map_access_expr() { distinct: false, special: false, order_by: vec![], - })] + })], }), op: BinaryOperator::NotEq, - right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))) - }) + right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), + }), }), group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], @@ -103,7 +104,7 @@ fn parse_map_access_expr() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, }, select ); @@ -117,7 +118,7 @@ fn parse_array_expr() { &Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("1".to_string())), - Expr::Value(Value::SingleQuotedString("2".to_string())) + Expr::Value(Value::SingleQuotedString("2".to_string())), ], named: false, }), @@ -171,6 +172,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index be10914d..6f780de9 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -215,6 +215,7 @@ fn parse_update_set_from() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -242,6 +243,7 @@ fn parse_update_set_from() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -308,6 +310,7 @@ fn parse_update_with_table_alias() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -371,6 +374,7 @@ fn parse_select_with_table_alias() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }] @@ -402,6 +406,7 @@ fn parse_delete_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation ); @@ -430,6 +435,7 @@ fn parse_delete_statement_for_multi_tables() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation ); @@ -440,6 +446,7 @@ fn parse_delete_statement_for_multi_tables() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].joins[0].relation ); @@ -464,6 +471,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation ); @@ -474,6 +482,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[1].relation ); @@ -484,6 +493,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, using[0].relation ); @@ -494,6 +504,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, using[0].joins[0].relation ); @@ -522,6 +533,7 @@ fn parse_where_delete_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation, ); @@ -564,6 +576,7 @@ fn parse_where_delete_with_alias_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation, ); @@ -578,6 +591,7 @@ fn parse_where_delete_with_alias_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }]), @@ -3552,6 +3566,7 @@ fn test_parse_named_window() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -3891,6 +3906,7 @@ fn parse_interval_and_or_xor() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -4496,6 +4512,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -4506,6 +4523,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -4524,6 +4542,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![Join { relation: TableFactor::Table { @@ -4532,6 +4551,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4543,6 +4563,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![Join { relation: TableFactor::Table { @@ -4551,6 +4572,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4572,6 +4594,7 @@ fn parse_cross_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::CrossJoin, }, @@ -4593,6 +4616,7 @@ fn parse_joins_on() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -4663,6 +4687,7 @@ fn parse_joins_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -4725,6 +4750,7 @@ fn parse_natural_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: f(JoinConstraint::Natural), } @@ -4990,6 +5016,7 @@ fn parse_derived_tables() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6318,6 +6345,7 @@ fn parse_merge() { args: None, with_hints: vec![], version: None, + partitions: vec![], } ); assert_eq!(table, table_no_into); @@ -6342,6 +6370,7 @@ fn parse_merge() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 0fe4bb0e..b05cc0dd 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -156,6 +156,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -189,6 +190,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -231,6 +233,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -264,6 +267,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 8cdfe924..6ca47e12 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -323,6 +323,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 9ed12ac2..135e5d13 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -58,6 +58,7 @@ fn parse_table_time_travel() { version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( Value::SingleQuotedString(version) ))), + partitions: vec![], }, joins: vec![] },] @@ -309,6 +310,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -514,6 +516,7 @@ fn parse_substring_in_select() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 5aeffb0b..80ef9f98 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1151,6 +1151,7 @@ fn parse_select_with_numeric_prefix_column_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], @@ -1200,6 +1201,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], @@ -1260,6 +1262,7 @@ fn parse_update_with_joins() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![Join { relation: TableFactor::Table { @@ -1271,6 +1274,7 @@ fn parse_update_with_joins() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -1376,6 +1380,7 @@ fn parse_substring_in_select() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 77e5ba45..bb385781 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2893,6 +2893,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 9f5f62f7..f17ca584 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -46,6 +46,7 @@ fn test_square_brackets_over_db_schema_table_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], } @@ -91,6 +92,7 @@ fn test_double_quotes_over_db_schema_table_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], } @@ -111,6 +113,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index dabe66e1..e1db7ec6 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -224,6 +224,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); From 71c35d4dfddc89cfdd8f67bdc4c5d4e701a355e8 Mon Sep 17 00:00:00 2001 From: Ilya Date: Wed, 20 Sep 2023 04:31:11 +0300 Subject: [PATCH 052/109] Add support for == operator for Sqlite (#970) --- src/tokenizer.rs | 1 + tests/sqlparser_sqlite.rs | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1b1b1e96..175b5d3b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -885,6 +885,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume match chars.peek() { Some('>') => self.consume_and_return(chars, Token::RArrow), + Some('=') => self.consume_and_return(chars, Token::DoubleEq), _ => Ok(Some(Token::Eq)), } } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 8f6cc757..fd7a2246 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -61,6 +61,14 @@ fn parse_create_virtual_table() { sqlite_and_generic().verified_stmt(sql); } +#[test] +fn double_equality_operator() { + // Sqlite supports this operator: https://www.sqlite.org/lang_expr.html#binaryops + let input = "SELECT a==b FROM t"; + let expected = "SELECT a = b FROM t"; + let _ = sqlite_and_generic().one_statement_parses_to(input, expected); +} + #[test] fn parse_create_table_auto_increment() { let sql = "CREATE TABLE foo (bar INT PRIMARY KEY AUTOINCREMENT)"; From 521ffa945c9b9562d1622cb2860faf0830994bc6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Sep 2023 13:45:19 -0400 Subject: [PATCH 053/109] Changelog for 0.38.0 release (#973) --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c6c9dcc..9e3e4e1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,29 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.38.0] 2023-09-21 + +### Added + +* Support `==`operator for Sqlite (#970) - Thanks @marhoily +* Support mysql `PARTITION` to table selection (#959) - Thanks @chunshao90 +* Support `UNNEST` as a table factor for PostgreSQL (#968) @hexedpackets +* Support MySQL `UNIQUE KEY` syntax (#962) - Thanks @artorias1024 +* Support` `GROUP BY ALL` (#964) - @berkaysynnada +* Support multiple actions in one ALTER TABLE statement (#960) - Thanks @ForbesLindesay +* Add `--sqlite param` to CLI (#956) - Thanks @ddol + +### Fixed +* Fix Rust 1.72 clippy lints (#957) - Thanks @alamb + +### Changed +* Add missing token loc in parse err msg (#965) - Thanks @ding-young +* Change how `ANY` and `ALL` expressions are represented in AST (#963) - Thanks @SeanTroyUWO +* Show location info in parse errors (#958) - Thanks @MartinNowak +* Update release documentation (#954) - Thanks @alamb +* Break test and coverage test into separate jobs (#949) - Thanks @alamb + + ## [0.37.0] 2023-08-22 ### Added From 7723ea56c5119c7d1a15233c18eb2aaf48b60dc0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Sep 2023 13:47:15 -0400 Subject: [PATCH 054/109] chore: Release sqlparser version 0.38.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 43d9f981..4cc2f1ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.37.0" +version = "0.38.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 495d0a02d5680c93a8886fde7cec4d9998c9b595 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 2 Oct 2023 13:10:56 +0200 Subject: [PATCH 055/109] Add support for ATTACH DATABASE (#989) --- src/ast/mod.rs | 18 ++++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 13 +++++++++++++ tests/sqlparser_sqlite.rs | 18 ++++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index eb8830bb..48274f68 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1429,6 +1429,16 @@ pub enum Statement { name: Ident, operation: AlterRoleOperation, }, + /// ATTACH DATABASE 'path/to/file' AS alias + /// (SQLite-specific) + AttachDatabase { + /// The name to bind to the newly attached database + schema_name: Ident, + /// An expression that indicates the path to the database file + database_file_name: Expr, + /// true if the syntax is 'ATTACH DATABASE', false if it's just 'ATTACH' + database: bool, + }, /// DROP Drop { /// The type of the object to drop: TABLE, VIEW, etc. @@ -1969,6 +1979,14 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::AttachDatabase { + schema_name, + database_file_name, + database, + } => { + let keyword = if *database { "DATABASE " } else { "" }; + write!(f, "ATTACH {keyword}{database_file_name} AS {schema_name}") + } Statement::Analyze { table_name, partitions, diff --git a/src/keywords.rs b/src/keywords.rs index ad0526cc..eee96135 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -95,6 +95,7 @@ define_keywords!( ASYMMETRIC, AT, ATOMIC, + ATTACH, AUTHORIZATION, AUTOINCREMENT, AUTO_INCREMENT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ba8f5784..49cd2489 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -456,6 +456,7 @@ impl<'a> Parser<'a> { Ok(Statement::Query(Box::new(self.parse_query()?))) } Keyword::TRUNCATE => Ok(self.parse_truncate()?), + Keyword::ATTACH => Ok(self.parse_attach_database()?), Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::CACHE => Ok(self.parse_cache_table()?), @@ -543,6 +544,18 @@ impl<'a> Parser<'a> { }) } + pub fn parse_attach_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let database_file_name = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let schema_name = self.parse_identifier()?; + Ok(Statement::AttachDatabase { + database, + schema_name, + database_file_name, + }) + } + pub fn parse_analyze(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index fd7a2246..c4e69d53 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -259,6 +259,24 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_attach_database() { + let sql = "ATTACH DATABASE 'test.db' AS test"; + let verified_stmt = sqlite().verified_stmt(sql); + assert_eq!(sql, format!("{}", verified_stmt)); + match verified_stmt { + Statement::AttachDatabase { + schema_name, + database_file_name: Expr::Value(Value::SingleQuotedString(literal_name)), + database: true, + } => { + assert_eq!(schema_name.value, "test"); + assert_eq!(literal_name, "test.db"); + } + _ => unreachable!(), + } +} + fn sqlite() -> TestedDialects { TestedDialects { dialects: vec![Box::new(SQLiteDialect {})], From e718ce6c42365d4fb987ce4977716c28091020a5 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 13:23:25 +0200 Subject: [PATCH 056/109] bigquery: EXTRACT support For DAYOFWEEK, DAYOFYEAR, ISOWEEK, TIME (#980) --- src/ast/value.rs | 8 ++++++++ src/keywords.rs | 3 +++ src/parser/mod.rs | 4 ++++ tests/sqlparser_common.rs | 4 ++++ 4 files changed, 19 insertions(+) diff --git a/src/ast/value.rs b/src/ast/value.rs index 9c18a325..e6f13925 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -117,6 +117,8 @@ pub enum DateTimeField { Month, Week, Day, + DayOfWeek, + DayOfYear, Date, Hour, Minute, @@ -127,6 +129,7 @@ pub enum DateTimeField { Doy, Epoch, Isodow, + IsoWeek, Isoyear, Julian, Microsecond, @@ -138,6 +141,7 @@ pub enum DateTimeField { Nanosecond, Nanoseconds, Quarter, + Time, Timezone, TimezoneHour, TimezoneMinute, @@ -151,6 +155,8 @@ impl fmt::Display for DateTimeField { DateTimeField::Month => "MONTH", DateTimeField::Week => "WEEK", DateTimeField::Day => "DAY", + DateTimeField::DayOfWeek => "DAYOFWEEK", + DateTimeField::DayOfYear => "DAYOFYEAR", DateTimeField::Date => "DATE", DateTimeField::Hour => "HOUR", DateTimeField::Minute => "MINUTE", @@ -162,6 +168,7 @@ impl fmt::Display for DateTimeField { DateTimeField::Epoch => "EPOCH", DateTimeField::Isodow => "ISODOW", DateTimeField::Isoyear => "ISOYEAR", + DateTimeField::IsoWeek => "ISOWEEK", DateTimeField::Julian => "JULIAN", DateTimeField::Microsecond => "MICROSECOND", DateTimeField::Microseconds => "MICROSECONDS", @@ -172,6 +179,7 @@ impl fmt::Display for DateTimeField { DateTimeField::Nanosecond => "NANOSECOND", DateTimeField::Nanoseconds => "NANOSECONDS", DateTimeField::Quarter => "QUARTER", + DateTimeField::Time => "TIME", DateTimeField::Timezone => "TIMEZONE", DateTimeField::TimezoneHour => "TIMEZONE_HOUR", DateTimeField::TimezoneMinute => "TIMEZONE_MINUTE", diff --git a/src/keywords.rs b/src/keywords.rs index eee96135..6fb74a8e 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -196,6 +196,8 @@ define_keywords!( DATE, DATETIME, DAY, + DAYOFWEEK, + DAYOFYEAR, DEALLOCATE, DEC, DECADE, @@ -334,6 +336,7 @@ define_keywords!( IS, ISODOW, ISOLATION, + ISOWEEK, ISOYEAR, JAR, JOIN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 49cd2489..279abf96 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1508,6 +1508,8 @@ impl<'a> Parser<'a> { Keyword::MONTH => Ok(DateTimeField::Month), Keyword::WEEK => Ok(DateTimeField::Week), Keyword::DAY => Ok(DateTimeField::Day), + Keyword::DAYOFWEEK => Ok(DateTimeField::DayOfWeek), + Keyword::DAYOFYEAR => Ok(DateTimeField::DayOfYear), Keyword::DATE => Ok(DateTimeField::Date), Keyword::HOUR => Ok(DateTimeField::Hour), Keyword::MINUTE => Ok(DateTimeField::Minute), @@ -1519,6 +1521,7 @@ impl<'a> Parser<'a> { Keyword::EPOCH => Ok(DateTimeField::Epoch), Keyword::ISODOW => Ok(DateTimeField::Isodow), Keyword::ISOYEAR => Ok(DateTimeField::Isoyear), + Keyword::ISOWEEK => Ok(DateTimeField::IsoWeek), Keyword::JULIAN => Ok(DateTimeField::Julian), Keyword::MICROSECOND => Ok(DateTimeField::Microsecond), Keyword::MICROSECONDS => Ok(DateTimeField::Microseconds), @@ -1529,6 +1532,7 @@ impl<'a> Parser<'a> { Keyword::NANOSECOND => Ok(DateTimeField::Nanosecond), Keyword::NANOSECONDS => Ok(DateTimeField::Nanoseconds), Keyword::QUARTER => Ok(DateTimeField::Quarter), + Keyword::TIME => Ok(DateTimeField::Time), Keyword::TIMEZONE => Ok(DateTimeField::Timezone), Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour), Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6f780de9..a9ce3cd6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2069,6 +2069,8 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(MONTH FROM d)"); verified_stmt("SELECT EXTRACT(WEEK FROM d)"); verified_stmt("SELECT EXTRACT(DAY FROM d)"); + verified_stmt("SELECT EXTRACT(DAYOFWEEK FROM d)"); + verified_stmt("SELECT EXTRACT(DAYOFYEAR FROM d)"); verified_stmt("SELECT EXTRACT(DATE FROM d)"); verified_stmt("SELECT EXTRACT(HOUR FROM d)"); verified_stmt("SELECT EXTRACT(MINUTE FROM d)"); @@ -2082,6 +2084,7 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(DOY FROM d)"); verified_stmt("SELECT EXTRACT(EPOCH FROM d)"); verified_stmt("SELECT EXTRACT(ISODOW FROM d)"); + verified_stmt("SELECT EXTRACT(ISOWEEK FROM d)"); verified_stmt("SELECT EXTRACT(ISOYEAR FROM d)"); verified_stmt("SELECT EXTRACT(JULIAN FROM d)"); verified_stmt("SELECT EXTRACT(MICROSECOND FROM d)"); @@ -2094,6 +2097,7 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(TIMEZONE FROM d)"); verified_stmt("SELECT EXTRACT(TIMEZONE_HOUR FROM d)"); verified_stmt("SELECT EXTRACT(TIMEZONE_MINUTE FROM d)"); + verified_stmt("SELECT EXTRACT(TIME FROM d)"); let res = parse_sql_statements("SELECT EXTRACT(JIFFY FROM d)"); assert_eq!( From 4903bd4b8b8b615a2d1eb0bfad028952321ede56 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 13:39:44 +0200 Subject: [PATCH 057/109] Add test for clickhouse: tokenize `==` as Token::DoubleEq (#981) --- src/test_utils.rs | 3 +++ src/tokenizer.rs | 24 +++++++++++++++++++++++- tests/sqlparser_clickhouse.rs | 8 ++++++++ tests/sqlparser_common.rs | 4 ++-- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index b81cd5f4..8c64bfac 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -31,6 +31,9 @@ use crate::parser::{Parser, ParserError}; use crate::tokenizer::Tokenizer; use crate::{ast::*, parser::ParserOptions}; +#[cfg(test)] +use pretty_assertions::assert_eq; + /// Tests use the methods on this struct to invoke the parser on one or /// multiple dialects. pub struct TestedDialects { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 175b5d3b..067aa5a8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1368,7 +1368,7 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool #[cfg(test)] mod tests { use super::*; - use crate::dialect::{GenericDialect, MsSqlDialect}; + use crate::dialect::{ClickHouseDialect, GenericDialect, MsSqlDialect}; #[test] fn tokenizer_error_impl() { @@ -1414,6 +1414,28 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_clickhouse_double_equal() { + let sql = String::from("SELECT foo=='1'"); + let dialect = ClickHouseDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "foo".to_string(), + quote_style: None, + keyword: Keyword::NoKeyword, + }), + Token::DoubleEq, + Token::SingleQuotedString("1".to_string()), + ]; + + compare(expected, tokens); + } + #[test] fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index a14598b3..936b0799 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -336,6 +336,14 @@ fn parse_create_table() { ); } +#[test] +fn parse_double_equal() { + clickhouse().one_statement_parses_to( + r#"SELECT foo FROM bar WHERE buz == 'buz'"#, + r#"SELECT foo FROM bar WHERE buz = 'buz'"#, + ); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a9ce3cd6..46503c7f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6792,10 +6792,10 @@ fn parse_time_functions() { // Validating Parenthesis let sql_without_parens = format!("SELECT {}", func_name); - let mut ast_without_parens = select_localtime_func_call_ast.clone(); + let mut ast_without_parens = select_localtime_func_call_ast; ast_without_parens.special = true; assert_eq!( - &Expr::Function(ast_without_parens.clone()), + &Expr::Function(ast_without_parens), expr_from_projection(&verified_only_select(&sql_without_parens).projection[0]) ); } From ed39329060bbc34d4fc7655f0d9973dacdea5534 Mon Sep 17 00:00:00 2001 From: William Date: Mon, 2 Oct 2023 08:36:17 -0400 Subject: [PATCH 058/109] Add JumpWire to users in README (#990) --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1195cc94..454ea6c2 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users This parser is currently being used by the [DataFusion] query engine, -[LocustDB], [Ballista], [GlueSQL], and [Opteryx]. +[LocustDB], [Ballista], [GlueSQL], [Opteryx], and [JumpWire]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. @@ -179,6 +179,7 @@ licensed as above, without any additional terms or conditions. [Ballista]: https://github.com/apache/arrow-ballista [GlueSQL]: https://github.com/gluesql/gluesql [Opteryx]: https://github.com/mabel-dev/opteryx +[JumpWire]: https://github.com/extragoodlabs/jumpwire [Pratt Parser]: https://tdop.github.io/ [sql-2016-grammar]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html [sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 From 6ffc3b3a52c6dc49d4f62733ddf3451ff3f9039d Mon Sep 17 00:00:00 2001 From: Ulrich Schmidt-Goertz Date: Mon, 2 Oct 2023 14:42:58 +0200 Subject: [PATCH 059/109] Support DELETE with ORDER BY and LIMIT (MySQL) (#992) --- src/ast/mod.rs | 12 ++++++++++++ src/parser/mod.rs | 13 ++++++++++++- tests/sqlparser_common.rs | 2 ++ tests/sqlparser_mysql.rs | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 48274f68..f2dbb889 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1301,6 +1301,10 @@ pub enum Statement { selection: Option, /// RETURNING returning: Option>, + /// ORDER BY (MySQL) + order_by: Vec, + /// LIMIT (MySQL) + limit: Option, }, /// CREATE VIEW CreateView { @@ -2141,6 +2145,8 @@ impl fmt::Display for Statement { using, selection, returning, + order_by, + limit, } => { write!(f, "DELETE ")?; if !tables.is_empty() { @@ -2156,6 +2162,12 @@ impl fmt::Display for Statement { if let Some(returning) = returning { write!(f, " RETURNING {}", display_comma_separated(returning))?; } + if !order_by.is_empty() { + write!(f, " ORDER BY {}", display_comma_separated(order_by))?; + } + if let Some(limit) = limit { + write!(f, " LIMIT {limit}")?; + } Ok(()) } Statement::Close { cursor } => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 279abf96..a3ebcc47 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5306,12 +5306,21 @@ impl<'a> Parser<'a> { } else { None }; - let returning = if self.parse_keyword(Keyword::RETURNING) { Some(self.parse_comma_separated(Parser::parse_select_item)?) } else { None }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; Ok(Statement::Delete { tables, @@ -5319,6 +5328,8 @@ impl<'a> Parser<'a> { using, selection, returning, + order_by, + limit, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 46503c7f..d73061f7 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -525,6 +525,7 @@ fn parse_where_delete_statement() { using, selection, returning, + .. } => { assert_eq!( TableFactor::Table { @@ -565,6 +566,7 @@ fn parse_where_delete_with_alias_statement() { using, selection, returning, + .. } => { assert_eq!( TableFactor::Table { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 80ef9f98..f1a054bf 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1315,6 +1315,38 @@ fn parse_update_with_joins() { } } +#[test] +fn parse_delete_with_order_by() { + let sql = "DELETE FROM customers ORDER BY id DESC"; + match mysql().verified_stmt(sql) { + Statement::Delete { order_by, .. } => { + assert_eq!( + vec![OrderByExpr { + expr: Expr::Identifier(Ident { + value: "id".to_owned(), + quote_style: None + }), + asc: Some(false), + nulls_first: None, + }], + order_by + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_delete_with_limit() { + let sql = "DELETE FROM customers LIMIT 100"; + match mysql().verified_stmt(sql) { + Statement::Delete { limit, .. } => { + assert_eq!(Some(Expr::Value(number("100"))), limit); + } + _ => unreachable!(), + } +} + #[test] fn parse_alter_table_drop_primary_key() { assert_matches!( From 993769ec0267e8684a034ebeb268ae0360785822 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Mon, 2 Oct 2023 14:48:51 +0200 Subject: [PATCH 060/109] Add support for mixed BigQuery table name quoting (#971) Co-authored-by: ifeanyi --- src/parser/mod.rs | 21 ++++++++ src/test_utils.rs | 18 +++++++ tests/sqlparser_bigquery.rs | 95 +++++++++++++++++++++++++++++++++++-- 3 files changed, 129 insertions(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a3ebcc47..a388a913 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5041,6 +5041,27 @@ impl<'a> Parser<'a> { break; } } + + // BigQuery accepts any number of quoted identifiers of a table name. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers + if dialect_of!(self is BigQueryDialect) + && idents.iter().any(|ident| ident.value.contains('.')) + { + idents = idents + .into_iter() + .flat_map(|ident| { + ident + .value + .split('.') + .map(|value| Ident { + value: value.into(), + quote_style: ident.quote_style, + }) + .collect::>() + }) + .collect() + } + Ok(ObjectName(idents)) } diff --git a/src/test_utils.rs b/src/test_utils.rs index 8c64bfac..f0c5e425 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -162,6 +162,24 @@ impl TestedDialects { } } + /// Ensures that `sql` parses as a single [`Select`], and that additionally: + /// + /// 1. parsing `sql` results in the same [`Statement`] as parsing + /// `canonical`. + /// + /// 2. re-serializing the result of parsing `sql` produces the same + /// `canonical` sql string + pub fn verified_only_select_with_canonical(&self, query: &str, canonical: &str) -> Select { + let q = match self.one_statement_parses_to(query, canonical) { + Statement::Query(query) => *query, + _ => panic!("Expected Query"), + }; + match *q.body { + SetExpr::Select(s) => *s, + _ => panic!("Expected SetExpr::Select"), + } + } + /// Ensures that `sql` parses as an [`Expr`], and that /// re-serializing the parse result produces the same `sql` /// string (is not modified after a serialization round-trip). diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 3502d7df..e05581d5 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -13,6 +13,8 @@ #[macro_use] mod test_utils; +use std::ops::Deref; + use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; use test_utils::*; @@ -84,9 +86,24 @@ fn parse_raw_literal() { #[test] fn parse_table_identifiers() { - fn test_table_ident(ident: &str, expected: Vec) { + /// Parses a table identifier ident and verifies that re-serializing the + /// parsed identifier produces the original ident string. + /// + /// In some cases, re-serializing the result of the parsed ident is not + /// expected to produce the original ident string. canonical is provided + /// instead as the canonical representation of the identifier for comparison. + /// For example, re-serializing the result of ident `foo.bar` produces + /// the equivalent canonical representation `foo`.`bar` + fn test_table_ident(ident: &str, canonical: Option<&str>, expected: Vec) { let sql = format!("SELECT 1 FROM {ident}"); - let select = bigquery().verified_only_select(&sql); + let canonical = canonical.map(|ident| format!("SELECT 1 FROM {ident}")); + + let select = if let Some(canonical) = canonical { + bigquery().verified_only_select_with_canonical(&sql, canonical.deref()) + } else { + bigquery().verified_only_select(&sql) + }; + assert_eq!( select.from, vec![TableWithJoins { @@ -102,26 +119,30 @@ fn parse_table_identifiers() { },] ); } + fn test_table_ident_err(ident: &str) { let sql = format!("SELECT 1 FROM {ident}"); assert!(bigquery().parse_sql_statements(&sql).is_err()); } - test_table_ident("da-sh-es", vec![Ident::new("da-sh-es")]); + test_table_ident("da-sh-es", None, vec![Ident::new("da-sh-es")]); - test_table_ident("`spa ce`", vec![Ident::with_quote('`', "spa ce")]); + test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]); test_table_ident( "`!@#$%^&*()-=_+`", + None, vec![Ident::with_quote('`', "!@#$%^&*()-=_+")], ); test_table_ident( "_5abc.dataField", + None, vec![Ident::new("_5abc"), Ident::new("dataField")], ); test_table_ident( "`5abc`.dataField", + None, vec![Ident::with_quote('`', "5abc"), Ident::new("dataField")], ); @@ -129,6 +150,7 @@ fn parse_table_identifiers() { test_table_ident( "abc5.dataField", + None, vec![Ident::new("abc5"), Ident::new("dataField")], ); @@ -136,13 +158,76 @@ fn parse_table_identifiers() { test_table_ident( "`GROUP`.dataField", + None, vec![Ident::with_quote('`', "GROUP"), Ident::new("dataField")], ); // TODO: this should be error // test_table_ident_err("GROUP.dataField"); - test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]); + test_table_ident( + "abc5.GROUP", + None, + vec![Ident::new("abc5"), Ident::new("GROUP")], + ); + + test_table_ident( + "`foo.bar.baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`foo.bar`.`baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`foo`.`bar.baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`foo`.`bar`.`baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`5abc.dataField`", + Some("`5abc`.`dataField`"), + vec![ + Ident::with_quote('`', "5abc"), + Ident::with_quote('`', "dataField"), + ], + ); + + test_table_ident( + "`_5abc.da-sh-es`", + Some("`_5abc`.`da-sh-es`"), + vec![ + Ident::with_quote('`', "_5abc"), + Ident::with_quote('`', "da-sh-es"), + ], + ); } #[test] From 2786c7eaf1d0b420ca5a5f338b45b6cc0fbd68be Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 17:53:32 +0200 Subject: [PATCH 061/109] clickhouse: add support for LIMIT BY (#977) --- src/ast/query.rs | 7 +++++++ src/parser/mod.rs | 13 ++++++++++++- tests/sqlparser_clickhouse.rs | 18 ++++++++++++++++++ tests/sqlparser_common.rs | 5 +++++ tests/sqlparser_mssql.rs | 3 +++ tests/sqlparser_mysql.rs | 9 +++++++++ tests/sqlparser_postgres.rs | 2 ++ 7 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index af35c37a..d5f17079 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -35,6 +35,10 @@ pub struct Query { pub order_by: Vec, /// `LIMIT { | ALL }` pub limit: Option, + + /// `LIMIT { } BY { ,,... } }` + pub limit_by: Vec, + /// `OFFSET [ { ROW | ROWS } ]` pub offset: Option, /// `FETCH { FIRST | NEXT } [ PERCENT ] { ROW | ROWS } | { ONLY | WITH TIES }` @@ -58,6 +62,9 @@ impl fmt::Display for Query { if let Some(ref offset) = self.offset { write!(f, " {offset}")?; } + if !self.limit_by.is_empty() { + write!(f, " BY {}", display_separated(&self.limit_by, ", "))?; + } if let Some(ref fetch) = self.fetch { write!(f, " {fetch}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a388a913..dcd731a6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5431,6 +5431,7 @@ impl<'a> Parser<'a> { with, body: Box::new(SetExpr::Insert(insert)), limit: None, + limit_by: vec![], order_by: vec![], offset: None, fetch: None, @@ -5442,6 +5443,7 @@ impl<'a> Parser<'a> { with, body: Box::new(SetExpr::Update(update)), limit: None, + limit_by: vec![], order_by: vec![], offset: None, fetch: None, @@ -5468,7 +5470,7 @@ impl<'a> Parser<'a> { offset = Some(self.parse_offset()?) } - if dialect_of!(self is GenericDialect | MySqlDialect) + if dialect_of!(self is GenericDialect | MySqlDialect | ClickHouseDialect) && limit.is_some() && offset.is_none() && self.consume_token(&Token::Comma) @@ -5483,6 +5485,14 @@ impl<'a> Parser<'a> { } } + let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::BY) + { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) } else { @@ -5499,6 +5509,7 @@ impl<'a> Parser<'a> { body, order_by, limit, + limit_by, offset, fetch, locks, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 936b0799..9efe4a36 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -25,6 +25,7 @@ use sqlparser::ast::TableFactor::Table; use sqlparser::ast::*; use sqlparser::dialect::ClickHouseDialect; +use sqlparser::dialect::GenericDialect; #[test] fn parse_map_access_expr() { @@ -344,9 +345,26 @@ fn parse_double_equal() { ); } +#[test] +fn parse_limit_by() { + clickhouse_and_generic().verified_stmt( + r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset"#, + ); + clickhouse_and_generic().verified_stmt( + r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset, toStartOfDay(created_at)"#, + ); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], options: None, } } + +fn clickhouse_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(ClickHouseDialect {}), Box::new(GenericDialect {})], + options: None, + } +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index d73061f7..80e4cdf0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -261,6 +261,7 @@ fn parse_update_set_from() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -2662,6 +2663,7 @@ fn parse_create_table_as_table() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -2685,6 +2687,7 @@ fn parse_create_table_as_table() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -3976,6 +3979,7 @@ fn parse_interval_and_or_xor() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -6392,6 +6396,7 @@ fn parse_merge() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 135e5d13..f9eb4d8f 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -92,6 +92,7 @@ fn parse_create_procedure() { body: vec![Statement::Query(Box::new(Query { with: None, limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -493,6 +494,7 @@ fn parse_substring_in_select() { assert_eq!( Box::new(Query { with: None, + body: Box::new(SetExpr::Select(Box::new(Select { distinct: Some(Distinct::Distinct), top: None, @@ -532,6 +534,7 @@ fn parse_substring_in_select() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f1a054bf..80b9dcfd 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -562,6 +562,7 @@ fn parse_escaped_quote_identifiers_with_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -604,6 +605,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -643,6 +645,7 @@ fn parse_escaped_backticks_with_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -682,6 +685,7 @@ fn parse_escaped_backticks_with_no_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -956,6 +960,7 @@ fn parse_simple_insert() { })), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -991,6 +996,7 @@ fn parse_empty_row_insert() { })), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -1049,6 +1055,7 @@ fn parse_insert_with_on_duplicate_update() { })), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -1428,6 +1435,7 @@ fn parse_substring_in_select() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -1708,6 +1716,7 @@ fn parse_hex_string_introducer() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index bb385781..fe336bda 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1000,6 +1000,7 @@ fn parse_copy_to() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -2046,6 +2047,7 @@ fn parse_array_subquery_expr() { }), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], From 40e2ecbdf34f0068863ed74b0ae3a8eb410c6401 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 2 Oct 2023 10:28:13 -0700 Subject: [PATCH 062/109] snowflake: support for UNPIVOT and a fix for chained PIVOTs (#983) --- src/ast/query.rs | 67 ++++++++++------ src/keywords.rs | 2 + src/parser/mod.rs | 54 +++++++++---- tests/sqlparser_common.rs | 155 +++++++++++++++++++++++++++++++++++--- 4 files changed, 231 insertions(+), 47 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index d5f17079..88b0931d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -720,13 +720,28 @@ pub enum TableFactor { /// For example `FROM monthly_sales PIVOT(sum(amount) FOR MONTH IN ('JAN', 'FEB'))` /// See Pivot { - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - name: ObjectName, - table_alias: Option, + #[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] + table: Box, aggregate_function: Expr, // Function expression value_column: Vec, pivot_values: Vec, - pivot_alias: Option, + alias: Option, + }, + /// An UNPIVOT operation on a table. + /// + /// Syntax: + /// ```sql + /// table UNPIVOT(value FOR name IN (column1, [ column2, ... ])) [ alias ] + /// ``` + /// + /// See . + Unpivot { + #[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] + table: Box, + value: Ident, + name: Ident, + columns: Vec, + alias: Option, }, } @@ -810,32 +825,42 @@ impl fmt::Display for TableFactor { Ok(()) } TableFactor::Pivot { - name, - table_alias, + table, aggregate_function, value_column, pivot_values, - pivot_alias, + alias, } => { - write!(f, "{}", name)?; - if table_alias.is_some() { - write!(f, " AS {}", table_alias.as_ref().unwrap())?; - } write!( f, - " PIVOT({} FOR {} IN (", + "{} PIVOT({} FOR {} IN ({}))", + table, aggregate_function, - Expr::CompoundIdentifier(value_column.to_vec()) + Expr::CompoundIdentifier(value_column.to_vec()), + display_comma_separated(pivot_values) )?; - for value in pivot_values { - write!(f, "{}", value)?; - if !value.eq(pivot_values.last().unwrap()) { - write!(f, ", ")?; - } + if alias.is_some() { + write!(f, " AS {}", alias.as_ref().unwrap())?; } - write!(f, "))")?; - if pivot_alias.is_some() { - write!(f, " AS {}", pivot_alias.as_ref().unwrap())?; + Ok(()) + } + TableFactor::Unpivot { + table, + value, + name, + columns, + alias, + } => { + write!( + f, + "{} UNPIVOT({} FOR {} IN ({}))", + table, + value, + name, + display_comma_separated(columns) + )?; + if alias.is_some() { + write!(f, " AS {}", alias.as_ref().unwrap())?; } Ok(()) } diff --git a/src/keywords.rs b/src/keywords.rs index 6fb74a8e..d8570803 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -635,6 +635,7 @@ define_keywords!( UNKNOWN, UNLOGGED, UNNEST, + UNPIVOT, UNSIGNED, UNTIL, UPDATE, @@ -693,6 +694,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::HAVING, Keyword::ORDER, Keyword::PIVOT, + Keyword::UNPIVOT, Keyword::TOP, Keyword::LATERAL, Keyword::VIEW, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dcd731a6..45600f42 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6276,9 +6276,8 @@ impl<'a> Parser<'a> { | TableFactor::Table { alias, .. } | TableFactor::UNNEST { alias, .. } | TableFactor::TableFunction { alias, .. } - | TableFactor::Pivot { - pivot_alias: alias, .. - } + | TableFactor::Pivot { alias, .. } + | TableFactor::Unpivot { alias, .. } | TableFactor::NestedJoin { alias, .. } => { // but not `FROM (mytable AS alias1) AS alias2`. if let Some(inner_alias) = alias { @@ -6357,11 +6356,6 @@ impl<'a> Parser<'a> { let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - // Pivot - if self.parse_keyword(Keyword::PIVOT) { - return self.parse_pivot_table_factor(name, alias); - } - // MSSQL-specific table hints: let mut with_hints = vec![]; if self.parse_keyword(Keyword::WITH) { @@ -6373,14 +6367,25 @@ impl<'a> Parser<'a> { self.prev_token(); } }; - Ok(TableFactor::Table { + + let mut table = TableFactor::Table { name, alias, args, with_hints, version, partitions, - }) + }; + + while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { + table = match kw { + Keyword::PIVOT => self.parse_pivot_table_factor(table)?, + Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, + _ => unreachable!(), + } + } + + Ok(table) } } @@ -6417,8 +6422,7 @@ impl<'a> Parser<'a> { pub fn parse_pivot_table_factor( &mut self, - name: ObjectName, - table_alias: Option, + table: TableFactor, ) -> Result { self.expect_token(&Token::LParen)?; let function_name = match self.next_token().token { @@ -6435,12 +6439,32 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Pivot { - name, - table_alias, + table: Box::new(table), aggregate_function: function, value_column, pivot_values, - pivot_alias: alias, + alias, + }) + } + + pub fn parse_unpivot_table_factor( + &mut self, + table: TableFactor, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_identifier()?; + self.expect_keyword(Keyword::FOR)?; + let name = self.parse_identifier()?; + self.expect_keyword(Keyword::IN)?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Unpivot { + table: Box::new(table), + value, + name, + columns, + alias, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 80e4cdf0..3c4a2d9e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -20,7 +20,7 @@ use matches::assert_matches; use sqlparser::ast::SelectItem::UnnamedExpr; -use sqlparser::ast::TableFactor::Pivot; +use sqlparser::ast::TableFactor::{Pivot, Unpivot}; use sqlparser::ast::*; use sqlparser::dialect::{ AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect, @@ -7257,10 +7257,16 @@ fn parse_pivot_table() { assert_eq!( verified_only_select(sql).from[0].relation, Pivot { - name: ObjectName(vec![Ident::new("monthly_sales")]), - table_alias: Some(TableAlias { - name: Ident::new("a"), - columns: vec![] + table: Box::new(TableFactor::Table { + name: ObjectName(vec![Ident::new("monthly_sales")]), + alias: Some(TableAlias { + name: Ident::new("a"), + columns: vec![] + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], }), aggregate_function: Expr::Function(Function { name: ObjectName(vec![Ident::new("SUM")]), @@ -7279,7 +7285,7 @@ fn parse_pivot_table() { Value::SingleQuotedString("MAR".to_string()), Value::SingleQuotedString("APR".to_string()), ], - pivot_alias: Some(TableAlias { + alias: Some(TableAlias { name: Ident { value: "p".to_string(), quote_style: None @@ -7290,17 +7296,15 @@ fn parse_pivot_table() { ); assert_eq!(verified_stmt(sql).to_string(), sql); + // parsing should succeed with empty alias let sql_without_table_alias = concat!( "SELECT * FROM monthly_sales ", "PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ", "ORDER BY EMPID" ); assert_matches!( - verified_only_select(sql_without_table_alias).from[0].relation, - Pivot { - table_alias: None, // parsing should succeed with empty alias - .. - } + &verified_only_select(sql_without_table_alias).from[0].relation, + Pivot { table, .. } if matches!(&**table, TableFactor::Table { alias: None, .. }) ); assert_eq!( verified_stmt(sql_without_table_alias).to_string(), @@ -7308,6 +7312,135 @@ fn parse_pivot_table() { ); } +#[test] +fn parse_unpivot_table() { + let sql = concat!( + "SELECT * FROM sales AS s ", + "UNPIVOT(quantity FOR quarter IN (Q1, Q2, Q3, Q4)) AS u (product, quarter, quantity)" + ); + + pretty_assertions::assert_eq!( + verified_only_select(sql).from[0].relation, + Unpivot { + table: Box::new(TableFactor::Table { + name: ObjectName(vec![Ident::new("sales")]), + alias: Some(TableAlias { + name: Ident::new("s"), + columns: vec![] + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }), + value: Ident { + value: "quantity".to_string(), + quote_style: None + }, + + name: Ident { + value: "quarter".to_string(), + quote_style: None + }, + columns: ["Q1", "Q2", "Q3", "Q4"] + .into_iter() + .map(Ident::new) + .collect(), + alias: Some(TableAlias { + name: Ident::new("u"), + columns: ["product", "quarter", "quantity"] + .into_iter() + .map(Ident::new) + .collect() + }), + } + ); + assert_eq!(verified_stmt(sql).to_string(), sql); + + let sql_without_aliases = concat!( + "SELECT * FROM sales ", + "UNPIVOT(quantity FOR quarter IN (Q1, Q2, Q3, Q4))" + ); + + assert_matches!( + &verified_only_select(sql_without_aliases).from[0].relation, + Unpivot { + table, + alias: None, + .. + } if matches!(&**table, TableFactor::Table { alias: None, .. }) + ); + assert_eq!( + verified_stmt(sql_without_aliases).to_string(), + sql_without_aliases + ); +} + +#[test] +fn parse_pivot_unpivot_table() { + let sql = concat!( + "SELECT * FROM census AS c ", + "UNPIVOT(population FOR year IN (population_2000, population_2010)) AS u ", + "PIVOT(sum(population) FOR year IN ('population_2000', 'population_2010')) AS p" + ); + + pretty_assertions::assert_eq!( + verified_only_select(sql).from[0].relation, + Pivot { + table: Box::new(Unpivot { + table: Box::new(TableFactor::Table { + name: ObjectName(vec![Ident::new("census")]), + alias: Some(TableAlias { + name: Ident::new("c"), + columns: vec![] + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }), + value: Ident { + value: "population".to_string(), + quote_style: None + }, + + name: Ident { + value: "year".to_string(), + quote_style: None + }, + columns: ["population_2000", "population_2010"] + .into_iter() + .map(Ident::new) + .collect(), + alias: Some(TableAlias { + name: Ident::new("u"), + columns: vec![] + }), + }), + aggregate_function: Expr::Function(Function { + name: ObjectName(vec![Ident::new("sum")]), + args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("population")) + ))]), + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + value_column: vec![Ident::new("year")], + pivot_values: vec![ + Value::SingleQuotedString("population_2000".to_string()), + Value::SingleQuotedString("population_2010".to_string()) + ], + alias: Some(TableAlias { + name: Ident::new("p"), + columns: vec![] + }), + } + ); + assert_eq!(verified_stmt(sql).to_string(), sql); +} + /// Makes a predicate that looks like ((user_id = $id) OR user_id = $2...) fn make_where_clause(num: usize) -> String { use std::fmt::Write; From c811e2260505e2651b04e85e886ec09d237a0602 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 19:42:01 +0200 Subject: [PATCH 063/109] =?UTF-8?q?redshift:=20add=20support=20for=20CREAT?= =?UTF-8?q?E=20VIEW=20=E2=80=A6=20WITH=20NO=20SCHEMA=20BINDING=20(#979)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ast/mod.rs | 11 +++++++++-- src/keywords.rs | 1 + src/parser/mod.rs | 10 ++++++++++ tests/sqlparser_common.rs | 12 ++++++++++++ tests/sqlparser_redshift.rs | 14 ++++++++++++++ 5 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f2dbb889..6f9d32c8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -577,7 +577,7 @@ pub enum Expr { /// /// Syntax: /// ```sql - /// MARCH (, , ...) AGAINST ( []) + /// MATCH (, , ...) AGAINST ( []) /// /// = CompoundIdentifier /// = String literal @@ -1316,6 +1316,8 @@ pub enum Statement { query: Box, with_options: Vec, cluster_by: Vec, + /// if true, has RedShift [`WITH NO SCHEMA BINDING`] clause + with_no_schema_binding: bool, }, /// CREATE TABLE CreateTable { @@ -2271,6 +2273,7 @@ impl fmt::Display for Statement { materialized, with_options, cluster_by, + with_no_schema_binding, } => { write!( f, @@ -2288,7 +2291,11 @@ impl fmt::Display for Statement { if !cluster_by.is_empty() { write!(f, " CLUSTER BY ({})", display_comma_separated(cluster_by))?; } - write!(f, " AS {query}") + write!(f, " AS {query}")?; + if *with_no_schema_binding { + write!(f, " WITH NO SCHEMA BINDING")?; + } + Ok(()) } Statement::CreateTable { name, diff --git a/src/keywords.rs b/src/keywords.rs index d8570803..e1bbf44a 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -110,6 +110,7 @@ define_keywords!( BIGINT, BIGNUMERIC, BINARY, + BINDING, BLOB, BLOOMFILTER, BOOL, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 45600f42..5f678869 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2974,6 +2974,15 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. + + let with_no_schema_binding = dialect_of!(self is RedshiftSqlDialect | GenericDialect) + && self.parse_keywords(&[ + Keyword::WITH, + Keyword::NO, + Keyword::SCHEMA, + Keyword::BINDING, + ]); + Ok(Statement::CreateView { name, columns, @@ -2982,6 +2991,7 @@ impl<'a> Parser<'a> { or_replace, with_options, cluster_by, + with_no_schema_binding, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3c4a2d9e..027dc312 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5320,6 +5320,7 @@ fn parse_create_view() { materialized, with_options, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5328,6 +5329,7 @@ fn parse_create_view() { assert!(!or_replace); assert_eq!(with_options, vec![]); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5368,6 +5370,7 @@ fn parse_create_view_with_columns() { query, materialized, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![Ident::new("has"), Ident::new("cols")]); @@ -5376,6 +5379,7 @@ fn parse_create_view_with_columns() { assert!(!materialized); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5393,6 +5397,7 @@ fn parse_create_or_replace_view() { query, materialized, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5401,6 +5406,7 @@ fn parse_create_or_replace_view() { assert!(!materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5422,6 +5428,7 @@ fn parse_create_or_replace_materialized_view() { query, materialized, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5430,6 +5437,7 @@ fn parse_create_or_replace_materialized_view() { assert!(materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5447,6 +5455,7 @@ fn parse_create_materialized_view() { materialized, with_options, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5455,6 +5464,7 @@ fn parse_create_materialized_view() { assert_eq!(with_options, vec![]); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5472,6 +5482,7 @@ fn parse_create_materialized_view_with_cluster_by() { materialized, with_options, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5480,6 +5491,7 @@ fn parse_create_materialized_view_with_cluster_by() { assert_eq!(with_options, vec![]); assert!(!or_replace); assert_eq!(cluster_by, vec![Ident::new("foo")]); + assert!(!late_binding); } _ => unreachable!(), } diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index f17ca584..5ae539b3 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -16,6 +16,7 @@ mod test_utils; use test_utils::*; use sqlparser::ast::*; +use sqlparser::dialect::GenericDialect; use sqlparser::dialect::RedshiftSqlDialect; #[test] @@ -272,6 +273,13 @@ fn redshift() -> TestedDialects { } } +fn redshift_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(RedshiftSqlDialect {}), Box::new(GenericDialect {})], + options: None, + } +} + #[test] fn test_sharp() { let sql = "SELECT #_of_values"; @@ -281,3 +289,9 @@ fn test_sharp() { select.projection[0] ); } + +#[test] +fn test_create_view_with_no_schema_binding() { + redshift_and_generic() + .verified_stmt("CREATE VIEW myevent AS SELECT eventname FROM event WITH NO SCHEMA BINDING"); +} From 02f3d78a920ad90a91d318fbb3b08890c53e1a28 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Oct 2023 15:25:03 -0400 Subject: [PATCH 064/109] Fix for clippy 1.73 (#995) --- src/ast/mod.rs | 2 +- src/ast/visitor.rs | 2 +- src/parser/mod.rs | 12 ++++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6f9d32c8..d4e2f26e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -155,7 +155,7 @@ impl fmt::Display for Ident { let escaped = value::escape_quoted_string(&self.value, q); write!(f, "{q}{escaped}{q}") } - Some(q) if q == '[' => write!(f, "[{}]", self.value), + Some('[') => write!(f, "[{}]", self.value), None => f.write_str(&self.value), _ => panic!("unexpected quote style"), } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index bb7c1967..09cb20a0 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -490,7 +490,7 @@ where /// /// This demonstrates how to effectively replace an expression with another more complicated one /// that references the original. This example avoids unnecessary allocations by using the -/// [`std::mem`](std::mem) family of functions. +/// [`std::mem`] family of functions. /// /// ``` /// # use sqlparser::parser::Parser; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5f678869..d0b3cef7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4701,7 +4701,11 @@ impl<'a> Parser<'a> { pub fn parse_literal_string(&mut self) -> Result { let next_token = self.next_token(); match next_token.token { - Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), + Token::Word(Word { + value, + keyword: Keyword::NoKeyword, + .. + }) => Ok(value), Token::SingleQuotedString(s) => Ok(s), Token::DoubleQuotedString(s) => Ok(s), Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { @@ -5853,8 +5857,8 @@ impl<'a> Parser<'a> { self.expect_token(&Token::Colon)?; } else if self.parse_keyword(Keyword::ROLE) { let context_modifier = match modifier { - Some(keyword) if keyword == Keyword::LOCAL => ContextModifier::Local, - Some(keyword) if keyword == Keyword::SESSION => ContextModifier::Session, + Some(Keyword::LOCAL) => ContextModifier::Local, + Some(Keyword::SESSION) => ContextModifier::Session, _ => ContextModifier::None, }; @@ -6897,7 +6901,7 @@ impl<'a> Parser<'a> { } } - /// Parse an [`WildcardAdditionalOptions`](WildcardAdditionalOptions) information for wildcard select items. + /// Parse an [`WildcardAdditionalOptions`] information for wildcard select items. /// /// If it is not possible to parse it, will return an option. pub fn parse_wildcard_additional_options( From 5263da68cdaa052dfd4f8989760569eae253253e Mon Sep 17 00:00:00 2001 From: Gabriel Villalonga Simon Date: Thu, 5 Oct 2023 20:32:43 +0100 Subject: [PATCH 065/109] Handle CREATE [TEMPORARY|TEMP] VIEW [IF NOT EXISTS] (#993) --- src/ast/mod.rs | 12 +++++++-- src/parser/mod.rs | 12 +++++++-- tests/sqlparser_common.rs | 55 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_sqlite.rs | 31 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d4e2f26e..d048ccc1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1318,6 +1318,10 @@ pub enum Statement { cluster_by: Vec, /// if true, has RedShift [`WITH NO SCHEMA BINDING`] clause with_no_schema_binding: bool, + /// if true, has SQLite `IF NOT EXISTS` clause + if_not_exists: bool, + /// if true, has SQLite `TEMP` or `TEMPORARY` clause + temporary: bool, }, /// CREATE TABLE CreateTable { @@ -2274,13 +2278,17 @@ impl fmt::Display for Statement { with_options, cluster_by, with_no_schema_binding, + if_not_exists, + temporary, } => { write!( f, - "CREATE {or_replace}{materialized}VIEW {name}", + "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, materialized = if *materialized { "MATERIALIZED " } else { "" }, - name = name + name = name, + temporary = if *temporary { "TEMPORARY " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" } )?; if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0b3cef7..922a791f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2478,7 +2478,7 @@ impl<'a> Parser<'a> { self.parse_create_table(or_replace, temporary, global, transient) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); - self.parse_create_view(or_replace) + self.parse_create_view(or_replace, temporary) } else if self.parse_keyword(Keyword::EXTERNAL) { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { @@ -2955,9 +2955,15 @@ impl<'a> Parser<'a> { } } - pub fn parse_create_view(&mut self, or_replace: bool) -> Result { + pub fn parse_create_view( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { let materialized = self.parse_keyword(Keyword::MATERIALIZED); self.expect_keyword(Keyword::VIEW)?; + let if_not_exists = dialect_of!(self is SQLiteDialect|GenericDialect) + && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; @@ -2992,6 +2998,8 @@ impl<'a> Parser<'a> { with_options, cluster_by, with_no_schema_binding, + if_not_exists, + temporary, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 027dc312..c0ec456a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5321,6 +5321,8 @@ fn parse_create_view() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5330,6 +5332,8 @@ fn parse_create_view() { assert_eq!(with_options, vec![]); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5371,6 +5375,8 @@ fn parse_create_view_with_columns() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![Ident::new("has"), Ident::new("cols")]); @@ -5380,6 +5386,39 @@ fn parse_create_view_with_columns() { assert!(!or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_view_temporary() { + let sql = "CREATE TEMPORARY VIEW myschema.myview AS SELECT foo FROM bar"; + match verified_stmt(sql) { + Statement::CreateView { + name, + columns, + query, + or_replace, + materialized, + with_options, + cluster_by, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(!materialized); + assert!(!or_replace); + assert_eq!(with_options, vec![]); + assert_eq!(cluster_by, vec![]); + assert!(!late_binding); + assert!(!if_not_exists); + assert!(temporary); } _ => unreachable!(), } @@ -5398,6 +5437,8 @@ fn parse_create_or_replace_view() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5407,6 +5448,8 @@ fn parse_create_or_replace_view() { assert!(or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5429,6 +5472,8 @@ fn parse_create_or_replace_materialized_view() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5438,6 +5483,8 @@ fn parse_create_or_replace_materialized_view() { assert!(or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5456,6 +5503,8 @@ fn parse_create_materialized_view() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5465,6 +5514,8 @@ fn parse_create_materialized_view() { assert!(!or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5483,6 +5534,8 @@ fn parse_create_materialized_view_with_cluster_by() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5492,6 +5545,8 @@ fn parse_create_materialized_view_with_cluster_by() { assert!(!or_replace); assert_eq!(cluster_by, vec![Ident::new("foo")]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index c4e69d53..39a82cc8 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -61,6 +61,37 @@ fn parse_create_virtual_table() { sqlite_and_generic().verified_stmt(sql); } +#[test] +fn parse_create_view_temporary_if_not_exists() { + let sql = "CREATE TEMPORARY VIEW IF NOT EXISTS myschema.myview AS SELECT foo FROM bar"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::CreateView { + name, + columns, + query, + or_replace, + materialized, + with_options, + cluster_by, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(!materialized); + assert!(!or_replace); + assert_eq!(with_options, vec![]); + assert_eq!(cluster_by, vec![]); + assert!(!late_binding); + assert!(if_not_exists); + assert!(temporary); + } + _ => unreachable!(), + } +} + #[test] fn double_equality_operator() { // Sqlite supports this operator: https://www.sqlite.org/lang_expr.html#binaryops From 83cb734b3c206502dd73998def455da554c37eef Mon Sep 17 00:00:00 2001 From: Zdenko Nevrala Date: Fri, 6 Oct 2023 20:48:18 +0200 Subject: [PATCH 066/109] Support Snowflake/BigQuery TRIM. (#975) --- src/ast/mod.rs | 6 ++++++ src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_bigquery.rs | 26 ++++++++++++++++++++++++++ tests/sqlparser_common.rs | 24 ++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 25 +++++++++++++++++++++++++ 5 files changed, 95 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d048ccc1..87f7ebb3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -496,12 +496,14 @@ pub enum Expr { /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) /// TRIM() + /// TRIM(, [, characters]) -- only Snowflake or Bigquery /// ``` Trim { expr: Box, // ([BOTH | LEADING | TRAILING] trim_where: Option, trim_what: Option>, + trim_characters: Option>, }, /// ```sql /// OVERLAY( PLACING FROM [ FOR ] @@ -895,6 +897,7 @@ impl fmt::Display for Expr { expr, trim_where, trim_what, + trim_characters, } => { write!(f, "TRIM(")?; if let Some(ident) = trim_where { @@ -905,6 +908,9 @@ impl fmt::Display for Expr { } else { write!(f, "{expr}")?; } + if let Some(characters) = trim_characters { + write!(f, ", {}", display_comma_separated(characters))?; + } write!(f, ")") } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 922a791f..95f1f8ed 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1315,6 +1315,7 @@ impl<'a> Parser<'a> { /// ```sql /// TRIM ([WHERE] ['text' FROM] 'text') /// TRIM ('text') + /// TRIM(, [, characters]) -- only Snowflake or BigQuery /// ``` pub fn parse_trim_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; @@ -1336,6 +1337,18 @@ impl<'a> Parser<'a> { expr: Box::new(expr), trim_where, trim_what: Some(trim_what), + trim_characters: None, + }) + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) + { + let characters = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where: None, + trim_what: None, + trim_characters: Some(characters), }) } else { self.expect_token(&Token::RParen)?; @@ -1343,6 +1356,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), trim_where, trim_what: None, + trim_characters: None, }) } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index e05581d5..7a9a8d1c 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -17,6 +17,7 @@ use std::ops::Deref; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; +use sqlparser::parser::ParserError; use test_utils::*; #[test] @@ -549,3 +550,28 @@ fn parse_map_access_offset() { bigquery().verified_only_select(sql); } } + +#[test] +fn test_bigquery_trim() { + let real_sql = r#"SELECT customer_id, TRIM(item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; + assert_eq!(bigquery().verified_stmt(real_sql).to_string(), real_sql); + + let sql_only_select = "SELECT TRIM('xyz', 'a')"; + let select = bigquery().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value(Value::SingleQuotedString("xyz".to_owned()))), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value(Value::SingleQuotedString("a".to_owned()))]), + }, + expr_from_projection(only(&select.projection)) + ); + + // missing comma separation + let error_sql = "SELECT TRIM('xyz' 'a')"; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + bigquery().parse_sql_statements(error_sql).unwrap_err() + ); +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c0ec456a..1511aa76 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5225,6 +5225,30 @@ fn parse_trim() { ParserError::ParserError("Expected ), found: 'xyz'".to_owned()), parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); + + //keep Snowflake/BigQuery TRIM syntax failing + let all_expected_snowflake = TestedDialects { + dialects: vec![ + //Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(AnsiDialect {}), + //Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}), + Box::new(RedshiftSqlDialect {}), + Box::new(MySqlDialect {}), + //Box::new(BigQueryDialect {}), + Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), + ], + options: None, + }; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + all_expected_snowflake + .parse_sql_statements("SELECT TRIM('xyz', 'a')") + .unwrap_err() + ); } #[test] diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e1db7ec6..e92656d0 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1039,3 +1039,28 @@ fn test_snowflake_stage_object_names() { } } } + +#[test] +fn test_snowflake_trim() { + let real_sql = r#"SELECT customer_id, TRIM(sub_items.value:item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; + assert_eq!(snowflake().verified_stmt(real_sql).to_string(), real_sql); + + let sql_only_select = "SELECT TRIM('xyz', 'a')"; + let select = snowflake().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value(Value::SingleQuotedString("xyz".to_owned()))), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value(Value::SingleQuotedString("a".to_owned()))]), + }, + expr_from_projection(only(&select.projection)) + ); + + // missing comma separation + let error_sql = "SELECT TRIM('xyz' 'a')"; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + snowflake().parse_sql_statements(error_sql).unwrap_err() + ); +} From c68e9775a22acf00e54b33542b10ac6d1a8cf887 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 20:33:12 +0200 Subject: [PATCH 067/109] Support bigquery `CAST AS x [STRING|DATE] FORMAT` syntax (#978) --- src/ast/mod.rs | 64 ++++++++++++++++++++++++++++++++++-- src/parser/mod.rs | 23 +++++++++++++ tests/sqlparser_bigquery.rs | 35 ++++++++++++++++++-- tests/sqlparser_common.rs | 10 ++++++ tests/sqlparser_postgres.rs | 3 +- tests/sqlparser_snowflake.rs | 1 + 6 files changed, 130 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 87f7ebb3..fc15efbc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -322,6 +322,16 @@ impl fmt::Display for JsonOperator { } } +/// Options for `CAST` / `TRY_CAST` +/// BigQuery: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CastFormat { + Value(Value), + ValueAtTimeZone(Value, Value), +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -437,12 +447,18 @@ pub enum Expr { Cast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` // this differs from CAST in the choice of how to implement invalid conversions TryCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)` // only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting @@ -450,6 +466,9 @@ pub enum Expr { SafeCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { @@ -597,6 +616,15 @@ pub enum Expr { }, } +impl fmt::Display for CastFormat { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CastFormat::Value(v) => write!(f, "{v}"), + CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"), + } + } +} + impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -753,9 +781,39 @@ impl fmt::Display for Expr { write!(f, "{op}{expr}") } } - Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"), - Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"), - Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"), + Expr::Cast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "CAST({expr} AS {data_type})") + } + } + Expr::TryCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "TRY_CAST({expr} AS {data_type})") + } + } + Expr::SafeCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "SAFE_CAST({expr} AS {data_type})") + } + } Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 95f1f8ed..829b299a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> { }) } + pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FORMAT) { + let value = self.parse_value()?; + if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { + Ok(Some(CastFormat::ValueAtTimeZone( + value, + self.parse_value()?, + ))) + } else { + Ok(Some(CastFormat::Value(value))) + } + } else { + Ok(None) + } + } + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { expr: Box::new(expr), data_type, + format, }) } @@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::TryCast { expr: Box::new(expr), data_type, + format, }) } @@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::SafeCast { expr: Box::new(expr), data_type, + format, }) } @@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> { Ok(Expr::Cast { expr: Box::new(expr), data_type: self.parse_data_type()?, + format: None, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7a9a8d1c..b3f683b9 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -304,8 +304,39 @@ fn parse_trailing_comma() { #[test] fn parse_cast_type() { - let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#; - bigquery().verified_only_select(sql); + let sql = r"SELECT SAFE_CAST(1 AS INT64)"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_date_format() { + let sql = + r"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_time_format() { + let sql = r"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_timestamp_format_tz() { + let sql = r"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_string_to_bytes_format() { + let sql = r"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_bytes_to_string_format() { + let sql = r"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string"; + bigquery_and_generic().verified_only_select(sql); } #[test] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1511aa76..ff8bdd7a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1934,6 +1934,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1944,6 +1945,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1970,6 +1972,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Nvarchar(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1980,6 +1983,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1990,6 +1994,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2000,6 +2005,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2010,6 +2016,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2020,6 +2027,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2030,6 +2038,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2043,6 +2052,7 @@ fn parse_try_cast() { &Expr::TryCast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe336bda..65472366 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1782,7 +1782,8 @@ fn parse_array_index_expr() { })), data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new( DataType::Int(None) - )))))) + )))))), + format: None, }))), indexes: vec![num[1].clone(), num[2].clone()], }, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e92656d0..bb988665 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -167,6 +167,7 @@ fn parse_array() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(None), + format: None, }, expr_from_projection(only(&select.projection)) ); From 88510f662563786a6e3af6b1ed109444bcd332e7 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 21:49:18 +0200 Subject: [PATCH 068/109] fix column `COLLATE` not displayed (#1012) --- src/ast/ddl.rs | 3 +++ tests/sqlparser_common.rs | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index a4640d55..f1575d97 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -517,6 +517,9 @@ pub struct ColumnDef { impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {}", self.name, self.data_type)?; + if let Some(collation) = &self.collation { + write!(f, " COLLATE {collation}")?; + } for option in &self.options { write!(f, " {option}")?; } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ff8bdd7a..3b8775e4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7635,3 +7635,8 @@ fn parse_create_type() { create_type ); } + +#[test] +fn parse_create_table_collate() { + pg_and_generic().verified_stmt("CREATE TABLE tbl (foo INT, bar TEXT COLLATE \"de_DE\")"); +} From c03586b727a659bb6d22d77910f4d4e9b9d9688c Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Fri, 20 Oct 2023 22:13:22 +0200 Subject: [PATCH 069/109] Support mysql `RLIKE` and `REGEXP` binary operators (#1017) --- src/ast/mod.rs | 21 +++++++++++++++++++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 19 +++++++++++++++++-- src/test_utils.rs | 2 +- tests/sqlparser_mysql.rs | 12 ++++++++++++ 5 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fc15efbc..3b003001 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -429,6 +429,14 @@ pub enum Expr { pattern: Box, escape_char: Option, }, + /// MySQL: RLIKE regex or REGEXP regex + RLike { + negated: bool, + expr: Box, + pattern: Box, + // true for REGEXP, false for RLIKE (no difference in semantics) + regexp: bool, + }, /// Any operation e.g. `foo > ANY(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] AnyOp { left: Box, @@ -740,6 +748,19 @@ impl fmt::Display for Expr { pattern ), }, + Expr::RLike { + negated, + expr, + pattern, + regexp, + } => write!( + f, + "{} {}{} {}", + expr, + if *negated { "NOT " } else { "" }, + if *regexp { "REGEXP" } else { "RLIKE" }, + pattern + ), Expr::SimilarTo { negated, expr, diff --git a/src/keywords.rs b/src/keywords.rs index e1bbf44a..6327ccc8 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -498,6 +498,7 @@ define_keywords!( REFERENCES, REFERENCING, REGCLASS, + REGEXP, REGR_AVGX, REGR_AVGY, REGR_COUNT, @@ -524,6 +525,7 @@ define_keywords!( RETURNS, REVOKE, RIGHT, + RLIKE, ROLE, ROLLBACK, ROLLUP, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 829b299a..0065f798 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1932,10 +1932,21 @@ impl<'a> Parser<'a> { | Keyword::BETWEEN | Keyword::LIKE | Keyword::ILIKE - | Keyword::SIMILAR => { + | Keyword::SIMILAR + | Keyword::REGEXP + | Keyword::RLIKE => { self.prev_token(); let negated = self.parse_keyword(Keyword::NOT); - if self.parse_keyword(Keyword::IN) { + let regexp = self.parse_keyword(Keyword::REGEXP); + let rlike = self.parse_keyword(Keyword::RLIKE); + if regexp || rlike { + Ok(Expr::RLike { + negated, + expr: Box::new(expr), + pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + regexp, + }) + } else if self.parse_keyword(Keyword::IN) { self.parse_in(expr, negated) } else if self.parse_keyword(Keyword::BETWEEN) { self.parse_between(expr, negated) @@ -2178,6 +2189,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), _ => Ok(0), }, @@ -2186,6 +2199,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), diff --git a/src/test_utils.rs b/src/test_utils.rs index f0c5e425..76a3e073 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -111,7 +111,7 @@ impl TestedDialects { /// 2. re-serializing the result of parsing `sql` produces the same /// `canonical` sql string pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { - let mut statements = self.parse_sql_statements(sql).unwrap(); + let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); if !canonical.is_empty() && sql != canonical { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 80b9dcfd..6e59198d 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1454,6 +1454,18 @@ fn parse_show_variables() { mysql_and_generic().verified_stmt("SHOW VARIABLES WHERE value = '3306'"); } +#[test] +fn parse_rlike_and_regexp() { + for s in &[ + "SELECT 1 WHERE 'a' RLIKE '^a$'", + "SELECT 1 WHERE 'a' REGEXP '^a$'", + "SELECT 1 WHERE 'a' NOT RLIKE '^a$'", + "SELECT 1 WHERE 'a' NOT REGEXP '^a$'", + ] { + mysql_and_generic().verified_only_select(s); + } +} + #[test] fn parse_kill() { let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5"); From 5c10668dbb60bccaf11f224013d333a48e32ec38 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Tue, 24 Oct 2023 01:37:31 +0400 Subject: [PATCH 070/109] Add support for UNION DISTINCT BY NAME syntax (#997) Co-authored-by: Andrew Lamb --- src/ast/query.rs | 5 +- src/parser/mod.rs | 4 +- tests/sqlparser_duckdb.rs | 232 ++++++++++++++------------------------ 3 files changed, 89 insertions(+), 152 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 88b0931d..824fab1b 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -120,7 +120,8 @@ impl fmt::Display for SetExpr { SetQuantifier::All | SetQuantifier::Distinct | SetQuantifier::ByName - | SetQuantifier::AllByName => write!(f, " {set_quantifier}")?, + | SetQuantifier::AllByName + | SetQuantifier::DistinctByName => write!(f, " {set_quantifier}")?, SetQuantifier::None => write!(f, "{set_quantifier}")?, } write!(f, " {right}")?; @@ -160,6 +161,7 @@ pub enum SetQuantifier { Distinct, ByName, AllByName, + DistinctByName, None, } @@ -170,6 +172,7 @@ impl fmt::Display for SetQuantifier { SetQuantifier::Distinct => write!(f, "DISTINCT"), SetQuantifier::ByName => write!(f, "BY NAME"), SetQuantifier::AllByName => write!(f, "ALL BY NAME"), + SetQuantifier::DistinctByName => write!(f, "DISTINCT BY NAME"), SetQuantifier::None => write!(f, ""), } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0065f798..68a8cef1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5696,7 +5696,9 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { Some(SetOperator::Union) => { - if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { SetQuantifier::ByName } else if self.parse_keyword(Keyword::ALL) { if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index b05cc0dd..db11d1e7 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -132,155 +132,87 @@ fn test_create_table_macro() { #[test] fn test_select_union_by_name() { - let ast = duckdb().verified_query("SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::ByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - - assert_eq!(ast.body, expected); + let q1 = "SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"; + let q2 = "SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"; + let q3 = "SELECT * FROM capitals UNION DISTINCT BY NAME SELECT * FROM weather"; - let ast = - duckdb().verified_query("SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::AllByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - assert_eq!(ast.body, expected); + for (ast, expected_quantifier) in &[ + (duckdb().verified_query(q1), SetQuantifier::ByName), + (duckdb().verified_query(q2), SetQuantifier::AllByName), + (duckdb().verified_query(q3), SetQuantifier::DistinctByName), + ] { + let expected = Box::::new(SetExpr::SetOperation { + op: SetOperator::Union, + set_quantifier: *expected_quantifier, + left: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + right: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + }); + assert_eq!(ast.body, expected); + } } From 56f24ce2361bb2f9ee9d7566c3b1ce256ee02d8b Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 23 Oct 2023 14:50:45 -0700 Subject: [PATCH 071/109] Support subquery as function arg w/o parens in Snowflake dialect (#996) --- src/parser/mod.rs | 20 +++++++++++++++++++- tests/sqlparser_snowflake.rs | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 68a8cef1..1c1d8b23 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1507,7 +1507,7 @@ impl<'a> Parser<'a> { within_group: false, })); } - // Snowflake defines ORDERY BY in within group instead of inside the function like + // Snowflake defines ORDER BY in within group instead of inside the function like // ANSI SQL. self.expect_token(&Token::RParen)?; let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { @@ -6914,6 +6914,24 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::RParen) { Ok((vec![], vec![])) } else { + // Snowflake permits a subquery to be passed as an argument without + // an enclosing set of parens if it's the only argument. + if dialect_of!(self is SnowflakeDialect) + && self + .parse_one_of_keywords(&[Keyword::WITH, Keyword::SELECT]) + .is_some() + { + self.prev_token(); + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + return Ok(( + vec![FunctionArg::Unnamed(FunctionArgExpr::from( + WildcardExpr::Expr(Expr::Subquery(Box::new(subquery))), + ))], + vec![], + )); + } + let args = self.parse_comma_separated(Parser::parse_function_args)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { self.parse_comma_separated(Parser::parse_order_by_expr)? diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index bb988665..79c9eb1e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1065,3 +1065,23 @@ fn test_snowflake_trim() { snowflake().parse_sql_statements(error_sql).unwrap_err() ); } + +#[test] +fn parse_subquery_function_argument() { + // Snowflake allows passing an unparenthesized subquery as the single + // argument to a function. + snowflake().one_statement_parses_to( + "SELECT parse_json(SELECT '{}')", + "SELECT parse_json((SELECT '{}'))", + ); + + // Subqueries that begin with WITH work too. + snowflake().one_statement_parses_to( + "SELECT parse_json(WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q)", + "SELECT parse_json((WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q))", + ); + + // Commas are parsed as part of the subquery, not additional arguments to + // the function. + snowflake().one_statement_parses_to("SELECT func(SELECT 1, 2)", "SELECT func((SELECT 1, 2))"); +} From e857a452016d82dfc00398a5483ce9551dff9565 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 23 Oct 2023 23:55:11 +0200 Subject: [PATCH 072/109] Support `SELECT * EXCEPT/REPLACE` syntax from ClickHouse (#1013) --- src/ast/query.rs | 2 ++ src/parser/mod.rs | 37 +++++++++++++++++++++++------------ tests/sqlparser_clickhouse.rs | 18 +++++++++++++++++ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 824fab1b..4289b0bd 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -434,11 +434,13 @@ pub struct WildcardAdditionalOptions { /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. + /// Clickhouse syntax: pub opt_except: Option, /// `[RENAME ...]`. pub opt_rename: Option, /// `[REPLACE]` /// BigQuery syntax: + /// Clickhouse syntax: pub opt_replace: Option, } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1c1d8b23..9e0d595c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6993,7 +6993,8 @@ impl<'a> Parser<'a> { } else { None }; - let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_except()? } else { None @@ -7004,7 +7005,8 @@ impl<'a> Parser<'a> { None }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_replace()? } else { None @@ -7047,18 +7049,27 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - let idents = self.parse_parenthesized_column_list(Mandatory, false)?; - match &idents[..] { - [] => { - return self.expected( - "at least one column should be parsed by the expect clause", - self.peek_token(), - )?; + if self.peek_token().token == Token::LParen { + let idents = self.parse_parenthesized_column_list(Mandatory, false)?; + match &idents[..] { + [] => { + return self.expected( + "at least one column should be parsed by the expect clause", + self.peek_token(), + )?; + } + [first, idents @ ..] => Some(ExceptSelectItem { + first_element: first.clone(), + additional_elements: idents.to_vec(), + }), } - [first, idents @ ..] => Some(ExceptSelectItem { - first_element: first.clone(), - additional_elements: idents.to_vec(), - }), + } else { + // Clickhouse allows EXCEPT column_name + let ident = self.parse_identifier()?; + Some(ExceptSelectItem { + first_element: ident, + additional_elements: vec![], + }) } } else { None diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 9efe4a36..8cca0da0 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -355,6 +355,24 @@ fn parse_limit_by() { ); } +#[test] +fn parse_select_star_except() { + clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); +} + +#[test] +fn parse_select_star_except_no_parens() { + clickhouse().one_statement_parses_to( + "SELECT * EXCEPT prev_status FROM anomalies", + "SELECT * EXCEPT (prev_status) FROM anomalies", + ); +} + +#[test] +fn parse_select_star_replace() { + clickhouse().verified_stmt("SELECT * REPLACE (i + 1 AS i) FROM columns_transformers"); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], From ce62fe6d274d354fef34fad919b58f6ba16c61a3 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 00:06:39 +0200 Subject: [PATCH 073/109] Support `FILTER` in over clause (#1007) Co-authored-by: Andrew Lamb --- README.md | 2 +- src/ast/mod.rs | 9 +++++++++ src/ast/visitor.rs | 2 +- src/dialect/sqlite.rs | 4 ++++ src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 4 ++++ tests/sqlparser_common.rs | 19 +++++++++++++++++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_mysql.rs | 6 ++++++ tests/sqlparser_postgres.rs | 6 ++++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 33 +++++++++++++++++++++++++++++++++ 15 files changed, 102 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 454ea6c2..e987c2a2 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ println!("AST: {:?}", ast); This outputs ```rust -AST: [Query(Query { ctes: [], body: Select(Select { distinct: false, projection: [UnnamedExpr(Identifier("a")), UnnamedExpr(Identifier("b")), UnnamedExpr(Value(Long(123))), UnnamedExpr(Function(Function { name: ObjectName(["myfunc"]), args: [Identifier("b")], over: None, distinct: false }))], from: [TableWithJoins { relation: Table { name: ObjectName(["table_1"]), alias: None, args: [], with_hints: [] }, joins: [] }], selection: Some(BinaryOp { left: BinaryOp { left: Identifier("a"), op: Gt, right: Identifier("b") }, op: And, right: BinaryOp { left: Identifier("b"), op: Lt, right: Value(Long(100)) } }), group_by: [], having: None }), order_by: [OrderByExpr { expr: Identifier("a"), asc: Some(false) }, OrderByExpr { expr: Identifier("b"), asc: None }], limit: None, offset: None, fetch: None })] +AST: [Query(Query { ctes: [], body: Select(Select { distinct: false, projection: [UnnamedExpr(Identifier("a")), UnnamedExpr(Identifier("b")), UnnamedExpr(Value(Long(123))), UnnamedExpr(Function(Function { name: ObjectName(["myfunc"]), args: [Identifier("b")], filter: None, over: None, distinct: false }))], from: [TableWithJoins { relation: Table { name: ObjectName(["table_1"]), alias: None, args: [], with_hints: [] }, joins: [] }], selection: Some(BinaryOp { left: BinaryOp { left: Identifier("a"), op: Gt, right: Identifier("b") }, op: And, right: BinaryOp { left: Identifier("b"), op: Lt, right: Value(Long(100)) } }), group_by: [], having: None }), order_by: [OrderByExpr { expr: Identifier("a"), asc: Some(false) }, OrderByExpr { expr: Identifier("b"), asc: None }], limit: None, offset: None, fetch: None })] ``` diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3b003001..11ce9b81 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1070,8 +1070,11 @@ impl Display for WindowType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WindowSpec { + /// `OVER (PARTITION BY ...)` pub partition_by: Vec, + /// `OVER (ORDER BY ...)` pub order_by: Vec, + /// `OVER (window frame)` pub window_frame: Option, } @@ -3729,6 +3732,8 @@ impl fmt::Display for CloseCursor { pub struct Function { pub name: ObjectName, pub args: Vec, + /// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)` + pub filter: Option>, pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, @@ -3777,6 +3782,10 @@ impl fmt::Display for Function { display_comma_separated(&self.order_by), )?; + if let Some(filter_cond) = &self.filter { + write!(f, " FILTER (WHERE {filter_cond})")?; + } + if let Some(o) = &self.over { write!(f, " OVER {o}")?; } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 09cb20a0..4e025f96 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -506,7 +506,7 @@ where /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], -/// over: None, distinct: false, special: false, order_by: vec![], +/// filter: None, over: None, distinct: false, special: false, order_by: vec![], /// }); /// } /// ControlFlow::<()>::Continue(()) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index fa21224f..37c7c7fa 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -35,6 +35,10 @@ impl Dialect for SQLiteDialect { || ('\u{007f}'..='\u{ffff}').contains(&ch) } + fn supports_filter_during_aggregation(&self) -> bool { + true + } + fn is_identifier_part(&self, ch: char) -> bool { self.is_identifier_start(ch) || ch.is_ascii_digit() } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9e0d595c..3bf5228c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -772,6 +772,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -957,6 +958,17 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let distinct = self.parse_all_or_distinct()?.is_some(); let (args, order_by) = self.parse_optional_args_with_orderby()?; + let filter = if self.dialect.supports_filter_during_aggregation() + && self.parse_keyword(Keyword::FILTER) + && self.consume_token(&Token::LParen) + && self.parse_keyword(Keyword::WHERE) + { + let filter = Some(Box::new(self.parse_expr()?)); + self.expect_token(&Token::RParen)?; + filter + } else { + None + }; let over = if self.parse_keyword(Keyword::OVER) { if self.consume_token(&Token::LParen) { let window_spec = self.parse_window_spec()?; @@ -970,6 +982,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + filter, over, distinct, special: false, @@ -987,6 +1000,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + filter: None, over: None, distinct: false, special, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index b3f683b9..fe95b187 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -564,6 +564,7 @@ fn parse_map_access_offset() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( number("0") ))),], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 8cca0da0..7d9cb030 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,6 +50,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("endpoint".to_string()) ))), ], + filter: None, over: None, distinct: false, special: false, @@ -89,6 +90,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("app".to_string()) ))), ], + filter: None, over: None, distinct: false, special: false, @@ -138,6 +140,7 @@ fn parse_array_fn() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x1")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x2")))), ], + filter: None, over: None, distinct: false, special: false, @@ -196,6 +199,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3b8775e4..9eb52f6e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -875,6 +875,7 @@ fn parse_select_count_wildcard() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + filter: None, over: None, distinct: false, special: false, @@ -895,6 +896,7 @@ fn parse_select_count_distinct() { op: UnaryOperator::Plus, expr: Box::new(Expr::Identifier(Ident::new("x"))), }))], + filter: None, over: None, distinct: true, special: false, @@ -1862,6 +1864,7 @@ fn parse_select_having() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + filter: None, over: None, distinct: false, special: false, @@ -1887,6 +1890,7 @@ fn parse_select_qualify() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), args: vec![], + filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![Expr::Identifier(Ident::new("p"))], order_by: vec![OrderByExpr { @@ -3342,6 +3346,7 @@ fn parse_scalar_function_in_projection() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("id")) ))], + filter: None, over: None, distinct: false, special: false, @@ -3461,6 +3466,7 @@ fn parse_named_argument_function() { ))), }, ], + filter: None, over: None, distinct: false, special: false, @@ -3492,6 +3498,7 @@ fn parse_window_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), args: vec![], + filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![], order_by: vec![OrderByExpr { @@ -3535,6 +3542,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window1".to_string(), quote_style: None, @@ -3560,6 +3568,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window2".to_string(), quote_style: None, @@ -4029,6 +4038,7 @@ fn parse_at_timezone() { quote_style: None, }]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero.clone()))], + filter: None, over: None, distinct: false, special: false, @@ -4056,6 +4066,7 @@ fn parse_at_timezone() { quote_style: None, },],), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero))], + filter: None, over: None, distinct: false, special: false, @@ -4067,6 +4078,7 @@ fn parse_at_timezone() { Value::SingleQuotedString("%Y-%m-%dT%H".to_string()), ),),), ], + filter: None, over: None, distinct: false, special: false, @@ -4225,6 +4237,7 @@ fn parse_table_function() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( Value::SingleQuotedString("1".to_owned()), )))], + filter: None, over: None, distinct: false, special: false, @@ -4376,6 +4389,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + filter: None, over: None, distinct: false, special: false, @@ -4405,6 +4419,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + filter: None, over: None, distinct: false, special: false, @@ -4416,6 +4431,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("5")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("6")))), ], + filter: None, over: None, distinct: false, special: false, @@ -6888,6 +6904,7 @@ fn parse_time_functions() { let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), args: vec![], + filter: None, over: None, distinct: false, special: false, @@ -7374,6 +7391,7 @@ fn parse_pivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("amount"),]) ))]), + filter: None, over: None, distinct: false, special: false, @@ -7523,6 +7541,7 @@ fn parse_pivot_unpivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("population")) ))]), + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 6ca47e12..6f3a8f99 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -346,6 +346,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f9eb4d8f..ebadf95f 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -334,6 +334,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 6e59198d..3bcb8443 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1071,6 +1071,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("description")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1084,6 +1085,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_create")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1097,6 +1099,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_read")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1110,6 +1113,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_update")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1123,6 +1127,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_delete")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1512,6 +1517,7 @@ fn parse_table_colum_option_on_update() { option: ColumnOption::OnUpdate(Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_TIMESTAMP")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 65472366..0256579d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2275,6 +2275,7 @@ fn test_composite_value() { named: true } )))], + filter: None, over: None, distinct: false, special: false, @@ -2436,6 +2437,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2447,6 +2449,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2458,6 +2461,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2469,6 +2473,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2919,6 +2924,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 5ae539b3..6238d1ec 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -137,6 +137,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 79c9eb1e..3319af7b 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -248,6 +248,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 39a82cc8..8d7ccf31 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -290,6 +290,39 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_window_function_with_filter() { + for func_name in [ + "row_number", + "rank", + "max", + "count", + "user_defined_function", + ] { + let sql = format!("SELECT {}(x) FILTER (WHERE y) OVER () FROM t", func_name); + let select = sqlite().verified_only_select(&sql); + assert_eq!(select.to_string(), sql); + assert_eq!( + select.projection, + vec![SelectItem::UnnamedExpr(Expr::Function(Function { + name: ObjectName(vec![Ident::new(func_name)]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("x")) + ))], + over: Some(WindowType::WindowSpec(WindowSpec { + partition_by: vec![], + order_by: vec![], + window_frame: None, + })), + filter: Some(Box::new(Expr::Identifier(Ident::new("y")))), + distinct: false, + special: false, + order_by: vec![] + }))] + ); + } +} + #[test] fn parse_attach_database() { let sql = "ATTACH DATABASE 'test.db' AS test"; From 2798b65b42c529bd089742a2028e94d59d82e493 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Oct 2023 18:07:00 -0400 Subject: [PATCH 074/109] snowflake/generic: `position` can be the name of a column (#1022) Co-authored-by: Lukasz Stefaniak --- src/parser/mod.rs | 4 +++- tests/sqlparser_snowflake.rs | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3bf5228c..e79f31ba 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -794,7 +794,9 @@ impl<'a> Parser<'a> { Keyword::EXTRACT => self.parse_extract_expr(), Keyword::CEIL => self.parse_ceil_floor_expr(true), Keyword::FLOOR => self.parse_ceil_floor_expr(false), - Keyword::POSITION => self.parse_position_expr(), + Keyword::POSITION if self.peek_token().token == Token::LParen => { + self.parse_position_expr() + } Keyword::SUBSTRING => self.parse_substring_expr(), Keyword::OVERLAY => self.parse_overlay_expr(), Keyword::TRIM => self.parse_trim_expr(), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 3319af7b..7e6f1813 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1067,6 +1067,12 @@ fn test_snowflake_trim() { ); } +#[test] +fn parse_position_not_function_columns() { + snowflake_and_generic() + .verified_stmt("SELECT position FROM tbl1 WHERE position NOT IN ('first', 'last')"); +} + #[test] fn parse_subquery_function_argument() { // Snowflake allows passing an unparenthesized subquery as the single From 8b2a248d7b90edce93e8c443d31a790d553fc0c2 Mon Sep 17 00:00:00 2001 From: Ilya Date: Tue, 24 Oct 2023 01:07:39 +0300 Subject: [PATCH 075/109] parse SQLite pragma statement (#969) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 18 ++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 28 ++++++++++++++++++++++++ tests/sqlparser_sqlite.rs | 45 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 11ce9b81..5aa42c96 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1914,6 +1914,12 @@ pub enum Statement { name: ObjectName, representation: UserDefinedTypeRepresentation, }, + // PRAGMA . = + Pragma { + name: ObjectName, + value: Option, + is_eq: bool, + }, } impl fmt::Display for Statement { @@ -3276,6 +3282,18 @@ impl fmt::Display for Statement { } => { write!(f, "CREATE TYPE {name} AS {representation}") } + Statement::Pragma { name, value, is_eq } => { + write!(f, "PRAGMA {name}")?; + if value.is_some() { + let val = value.as_ref().unwrap(); + if *is_eq { + write!(f, " = {val}")?; + } else { + write!(f, "({val})")?; + } + } + Ok(()) + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index 6327ccc8..40520360 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -472,6 +472,7 @@ define_keywords!( POSITION, POSITION_REGEX, POWER, + PRAGMA, PRECEDES, PRECEDING, PRECISION, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e79f31ba..f83f019e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -491,6 +491,8 @@ impl<'a> Parser<'a> { Keyword::EXECUTE => Ok(self.parse_execute()?), Keyword::PREPARE => Ok(self.parse_prepare()?), Keyword::MERGE => Ok(self.parse_merge()?), + // `PRAGMA` is sqlite specific https://www.sqlite.org/pragma.html + Keyword::PRAGMA => Ok(self.parse_pragma()?), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -7502,6 +7504,32 @@ impl<'a> Parser<'a> { }) } + // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] + pub fn parse_pragma(&mut self) -> Result { + let name = self.parse_object_name()?; + if self.consume_token(&Token::LParen) { + let value = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + Ok(Statement::Pragma { + name, + value: Some(value), + is_eq: false, + }) + } else if self.consume_token(&Token::Eq) { + Ok(Statement::Pragma { + name, + value: Some(self.parse_number_value()?), + is_eq: true, + }) + } else { + Ok(Statement::Pragma { + name, + value: None, + is_eq: false, + }) + } + } + /// ```sql /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] /// ``` diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 8d7ccf31..2fdd4e3d 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -24,6 +24,51 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, SQLiteDialect}; use sqlparser::tokenizer::Token; +#[test] +fn pragma_no_value() { + let sql = "PRAGMA cache_size"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: None, + is_eq: false, + } => { + assert_eq!("cache_size", name.to_string()); + } + _ => unreachable!(), + } +} +#[test] +fn pragma_eq_style() { + let sql = "PRAGMA cache_size = 10"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: Some(val), + is_eq: true, + } => { + assert_eq!("cache_size", name.to_string()); + assert_eq!("10", val.to_string()); + } + _ => unreachable!(), + } +} +#[test] +fn pragma_funciton_style() { + let sql = "PRAGMA cache_size(10)"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: Some(val), + is_eq: false, + } => { + assert_eq!("cache_size", name.to_string()); + assert_eq!("10", val.to_string()); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_without_rowid() { let sql = "CREATE TABLE t (a INT) WITHOUT ROWID"; From 6739d377bd2c5acfbc4d4631651ee7a857caefec Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Oct 2023 18:09:02 -0400 Subject: [PATCH 076/109] Add docstrings for `Dialect`s, update README (#1016) --- README.md | 31 +++++++++++++++++++++---------- src/dialect/ansi.rs | 1 + src/dialect/bigquery.rs | 1 + src/dialect/clickhouse.rs | 1 + src/dialect/duckdb.rs | 1 + src/dialect/generic.rs | 2 ++ src/dialect/hive.rs | 1 + src/dialect/mod.rs | 3 +++ src/dialect/mssql.rs | 2 +- src/dialect/mysql.rs | 2 +- src/dialect/postgresql.rs | 1 + src/dialect/redshift.rs | 1 + src/dialect/snowflake.rs | 1 + src/dialect/sqlite.rs | 1 + 14 files changed, 37 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e987c2a2..58f5b8d4 100644 --- a/README.md +++ b/README.md @@ -124,28 +124,36 @@ parser](docs/custom_sql_parser.md). ## Contributing Contributions are highly encouraged! However, the bandwidth we have to -maintain this crate is fairly limited. +maintain this crate is limited. Please read the following sections carefully. -Pull requests that add support for or fix a bug in a feature in the -SQL standard, or a feature in a popular RDBMS, like Microsoft SQL +### New Syntax + +The most commonly accepted PRs add support for or fix a bug in a feature in the +SQL standard, or a a popular RDBMS, such as Microsoft SQL Server or PostgreSQL, will likely be accepted after a brief -review. +review. Any SQL feature that is dialect specific should be parsed by *both* the relevant [`Dialect`] +as well as [`GenericDialect`]. + +### Major API Changes The current maintainers do not plan for any substantial changes to -this crate's API at this time. And thus, PRs proposing major refactors +this crate's API. PRs proposing major refactors are not likely to be accepted. -Please be aware that, while we hope to review PRs in a reasonably -timely fashion, it may take a while. In order to speed the process, +### Testing + +While we hope to review PRs in a reasonably +timely fashion, it may take a week or more. In order to speed the process, please make sure the PR passes all CI checks, and includes tests demonstrating your code works as intended (and to avoid regressions). Remember to also test error paths. PRs without tests will not be reviewed or merged. Since the CI ensures that `cargo test`, `cargo fmt`, and `cargo clippy`, pass you -will likely want to run all three commands locally before submitting +should likely to run all three commands locally before submitting your PR. +### Filing Issues If you are unable to submit a patch, feel free to file an issue instead. Please try to include: @@ -156,8 +164,9 @@ try to include: * links to documentation for the feature for a few of the most popular databases that support it. -If you need support for a feature, you will likely need to implement -it yourself. Our goal as maintainers is to facilitate the integration +Unfortunately, if you need support for a feature, you will likely need to implement +it yourself, or file a well enough described ticket that another member of the community can do so. +Our goal as maintainers is to facilitate the integration of various features from various contributors, but not to provide the implementations ourselves, as we simply don't have the resources. @@ -183,3 +192,5 @@ licensed as above, without any additional terms or conditions. [Pratt Parser]: https://tdop.github.io/ [sql-2016-grammar]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html [sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 +[`Dialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html +[`GenericDialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/struct.GenericDialect.html diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index 14c83ae1..d07bc07e 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [ANSI SQL](https://en.wikipedia.org/wiki/SQL:2011). #[derive(Debug)] pub struct AnsiDialect {} diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 8266a32f..46f27fea 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/) #[derive(Debug, Default)] pub struct BigQueryDialect; diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 395116f9..50fbde99 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). #[derive(Debug)] pub struct ClickHouseDialect {} diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 4e6e9d9a..a4f9309e 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [DuckDB](https://duckdb.org/) #[derive(Debug, Default)] pub struct DuckDbDialect; diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 8310954c..4be4b9e2 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -12,6 +12,8 @@ use crate::dialect::Dialect; +/// A permissive, general purpose [`Dialect`], which parses a wide variety of SQL +/// statements, from many different dialects. #[derive(Debug, Default)] pub struct GenericDialect; diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 96cefb1d..20800c1d 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [Hive](https://hive.apache.org/). #[derive(Debug)] pub struct HiveDialect {} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e174528b..625f9ce0 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -64,6 +64,9 @@ macro_rules! dialect_of { /// custom extensions or various historical reasons. This trait /// encapsulates the parsing differences between dialects. /// +/// [`GenericDialect`] is the most permissive dialect, and parses the union of +/// all the other dialects, when there is no ambiguity. +/// /// # Examples /// Most users create a [`Dialect`] directly, as shown on the [module /// level documentation]: diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index f0439810..26ecd478 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -12,7 +12,7 @@ use crate::dialect::Dialect; -// [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) dialect +/// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) #[derive(Debug)] pub struct MsSqlDialect {} diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 0f914ed0..8c3de74b 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -19,7 +19,7 @@ use crate::{ keywords::Keyword, }; -/// [MySQL](https://www.mysql.com/) +/// A [`Dialect`] for [MySQL](https://www.mysql.com/) #[derive(Debug)] pub struct MySqlDialect {} diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index d131ff9c..a0b192c8 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -16,6 +16,7 @@ use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; +/// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) #[derive(Debug)] pub struct PostgreSqlDialect {} diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index c85f3dc2..73457ab3 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -16,6 +16,7 @@ use core::str::Chars; use super::PostgreSqlDialect; +/// A [`Dialect`] for [RedShift](https://aws.amazon.com/redshift/) #[derive(Debug)] pub struct RedshiftSqlDialect {} diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 713394a1..33425e84 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -28,6 +28,7 @@ use alloc::vec::Vec; #[cfg(not(feature = "std"))] use alloc::{format, vec}; +/// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) #[derive(Debug, Default)] pub struct SnowflakeDialect; diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 37c7c7fa..68515d24 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -15,6 +15,7 @@ use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; +/// A [`Dialect`] for [SQLite](https://www.sqlite.org) #[derive(Debug)] pub struct SQLiteDialect {} From 86aa1b96be1c1fbf56cbe7cb04e12370df53605c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mehmet=20Emin=20KARAKA=C5=9E?= Date: Tue, 24 Oct 2023 12:45:25 +0300 Subject: [PATCH 077/109] Support `INSERT IGNORE` in `MySql` and `GenericDialect` (#1004) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 6 +++++- src/parser/mod.rs | 4 ++++ tests/sqlparser_mysql.rs | 41 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5aa42c96..17f6d3a0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1296,6 +1296,8 @@ pub enum Statement { Insert { /// Only for Sqlite or: Option, + /// Only for mysql + ignore: bool, /// INTO - optional keyword into: bool, /// TABLE @@ -2126,6 +2128,7 @@ impl fmt::Display for Statement { } Statement::Insert { or, + ignore, into, table_name, overwrite, @@ -2142,8 +2145,9 @@ impl fmt::Display for Statement { } else { write!( f, - "INSERT{over}{int}{tbl} {table_name} ", + "INSERT{ignore}{over}{int}{tbl} {table_name} ", table_name = table_name, + ignore = if *ignore { " IGNORE" } else { "" }, over = if *overwrite { " OVERWRITE" } else { "" }, int = if *into { " INTO" } else { "" }, tbl = if *table { " TABLE" } else { "" } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f83f019e..d0b11ffe 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6755,6 +6755,9 @@ impl<'a> Parser<'a> { None }; + let ignore = dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::IGNORE); + let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); let into = action == Some(Keyword::INTO); let overwrite = action == Some(Keyword::OVERWRITE); @@ -6852,6 +6855,7 @@ impl<'a> Parser<'a> { Ok(Statement::Insert { or, table_name, + ignore, into, overwrite, partitioned, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3bcb8443..8391bbad 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -972,6 +972,47 @@ fn parse_simple_insert() { } } +#[test] +fn parse_ignore_insert() { + let sql = r"INSERT IGNORE INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; + + match mysql_and_generic().verified_stmt(sql) { + Statement::Insert { + table_name, + columns, + source, + on, + ignore, + .. + } => { + assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); + assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert!(on.is_none()); + assert!(ignore); + assert_eq!( + Box::new(Query { + with: None, + body: Box::new(SetExpr::Values(Values { + explicit_row: false, + rows: vec![vec![ + Expr::Value(Value::SingleQuotedString("Test Some Inserts".to_string())), + Expr::Value(number("1")) + ]] + })), + order_by: vec![], + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![] + }), + source + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_empty_row_insert() { let sql = "INSERT INTO tb () VALUES (), ()"; From 57090537f0b2984681ff9333c57f8a8ce7c995cb Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 12:30:05 +0200 Subject: [PATCH 078/109] Test that `regexp` can be used as an identifier in postgres (#1018) --- tests/sqlparser_postgres.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 0256579d..64fcbd38 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3390,6 +3390,13 @@ fn parse_truncate() { ); } +#[test] +fn parse_select_regexp_as_column_name() { + pg_and_generic().verified_only_select( + "SELECT REGEXP.REGEXP AS REGEXP FROM REGEXP AS REGEXP WHERE REGEXP.REGEXP", + ); +} + #[test] fn parse_create_table_with_alias() { let sql = "CREATE TABLE public.datatype_aliases From 9832adb37651da83483263cd652ff6ab01a7060f Mon Sep 17 00:00:00 2001 From: Chris A Date: Tue, 24 Oct 2023 05:33:51 -0500 Subject: [PATCH 079/109] Support "with" identifiers surrounded by backticks in `GenericDialect` (#1010) --- src/dialect/mod.rs | 2 +- tests/sqlparser_hive.rs | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 625f9ce0..856cfe1c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -95,7 +95,7 @@ pub trait Dialect: Debug + Any { /// MySQL, MS SQL, and sqlite). You can accept one of characters listed /// in `Word::matching_end_quote` here fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '"' + ch == '"' || ch == '`' } /// Determine if quoted characters are proper for identifier fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 6f3a8f99..f63b9cef 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::ast::{ SelectItem, Statement, TableFactor, UnaryOperator, Value, }; use sqlparser::dialect::{GenericDialect, HiveDialect}; -use sqlparser::parser::ParserError; +use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::test_utils::*; #[test] @@ -32,6 +32,20 @@ fn parse_table_create() { hive().verified_stmt(iof); } +fn generic(options: Option) -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options, + } +} + +#[test] +fn parse_describe() { + let describe = r#"DESCRIBE namespace.`table`"#; + hive().verified_stmt(describe); + generic(None).verified_stmt(describe); +} + #[test] fn parse_insert_overwrite() { let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; @@ -265,13 +279,8 @@ fn parse_create_function() { _ => unreachable!(), } - let generic = TestedDialects { - dialects: vec![Box::new(GenericDialect {})], - options: None, - }; - assert_eq!( - generic.parse_sql_statements(sql).unwrap_err(), + generic(None).parse_sql_statements(sql).unwrap_err(), ParserError::ParserError( "Expected an object type after CREATE, found: FUNCTION".to_string() ) From 004a8dc5ddbbbfc0935c09fb572cd6161af33525 Mon Sep 17 00:00:00 2001 From: Chris A Date: Tue, 24 Oct 2023 06:19:01 -0500 Subject: [PATCH 080/109] Support multiple `PARTITION` statements in `ALTER TABLE ADD` statement (#1011) Co-authored-by: Chris A Co-authored-by: Andrew Lamb --- src/ast/ddl.rs | 24 +++++++++++++++++++++--- src/ast/mod.rs | 5 +++-- src/dialect/generic.rs | 4 ++++ src/parser/mod.rs | 22 +++++++++++++++++----- tests/sqlparser_hive.rs | 6 ++++++ 5 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index f1575d97..da2c8c9e 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -69,7 +69,7 @@ pub enum AlterTableOperation { /// Add Partitions AddPartitions { if_not_exists: bool, - new_partitions: Vec, + new_partitions: Vec, }, DropPartitions { partitions: Vec, @@ -119,8 +119,8 @@ impl fmt::Display for AlterTableOperation { new_partitions, } => write!( f, - "ADD{ine} PARTITION ({})", - display_comma_separated(new_partitions), + "ADD{ine} {}", + display_separated(new_partitions, " "), ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } ), AlterTableOperation::AddConstraint(c) => write!(f, "ADD {c}"), @@ -771,3 +771,21 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { Ok(()) } } + +/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Partition { + pub partitions: Vec, +} + +impl fmt::Display for Partition { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "PARTITION ({})", + display_comma_separated(&self.partitions) + ) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 17f6d3a0..4c69d3ed 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -31,8 +31,9 @@ pub use self::data_type::{ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, - ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ProcedureParam, ReferentialAction, - TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, + ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, + UserDefinedTypeRepresentation, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 4be4b9e2..ea5cc6c3 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -18,6 +18,10 @@ use crate::dialect::Dialect; pub struct GenericDialect; impl Dialect for GenericDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' || ch == '`' + } + fn is_identifier_start(&self, ch: char) -> bool { ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@' } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0b11ffe..8930b0f4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4195,6 +4195,13 @@ impl<'a> Parser<'a> { Ok(SqlOption { name, value }) } + pub fn parse_partition(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Partition { partitions }) + } + pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { if let Some(constraint) = self.parse_optional_table_constraint()? { @@ -4202,13 +4209,18 @@ impl<'a> Parser<'a> { } else { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + let mut new_partitions = vec![]; + loop { + if self.parse_keyword(Keyword::PARTITION) { + new_partitions.push(self.parse_partition()?); + } else { + break; + } + } + if !new_partitions.is_empty() { AlterTableOperation::AddPartitions { if_not_exists, - new_partitions: partitions, + new_partitions, } } else { let column_keyword = self.parse_keyword(Keyword::COLUMN); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index f63b9cef..534a224e 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -128,6 +128,12 @@ fn test_add_partition() { hive().verified_stmt(add); } +#[test] +fn test_add_multiple_partitions() { + let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (`a` = 'asdf', `b` = 2) PARTITION (`a` = 'asdh', `b` = 3)"; + hive().verified_stmt(add); +} + #[test] fn test_drop_partition() { let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; From c5a7d6ccb97292ace1399f24f88dbb1027c0987f Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 13:20:12 +0200 Subject: [PATCH 081/109] Support for single-quoted identifiers (#1021) Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 38 ++++++++++++++++++++++++++------------ tests/sqlparser_sqlite.rs | 5 +++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8930b0f4..eb7c4a00 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -620,18 +620,29 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - Token::Word(w) if self.peek_token().token == Token::Period => { - let mut id_parts: Vec = vec![w.to_ident()]; - - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident()), - Token::Mul => { - return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts))); - } - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); + t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { + if self.peek_token().token == Token::Period { + let mut id_parts: Vec = vec![match t { + Token::Word(w) => w.to_ident(), + Token::SingleQuotedString(s) => Ident::with_quote('\'', s), + _ => unreachable!(), // We matched above + }]; + + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.to_ident()), + Token::SingleQuotedString(s) => { + // SQLite has single-quoted identifiers + id_parts.push(Ident::with_quote('\'', s)) + } + Token::Mul => { + return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts))); + } + _ => { + return self + .expected("an identifier or a '*' after '.'", next_token); + } } } } @@ -830,6 +841,9 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { Token::Word(w) => id_parts.push(w.to_ident()), + Token::SingleQuotedString(s) => { + id_parts.push(Ident::with_quote('\'', s)) + } _ => { return self .expected("an identifier or a '*' after '.'", next_token); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 2fdd4e3d..b657acdd 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -335,6 +335,11 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_single_quoted_identified() { + sqlite().verified_only_select("SELECT 't'.*, t.'x' FROM 't'"); + // TODO: add support for select 't'.x +} #[test] fn parse_window_function_with_filter() { for func_name in [ From 8262abcd311e2b129cfba369e7332efb833db188 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Oct 2023 07:35:59 -0400 Subject: [PATCH 082/109] Improve documentation on Parser::consume_token and friends (#994) --- src/parser/mod.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eb7c4a00..a1323f7a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2346,7 +2346,7 @@ impl<'a> Parser<'a> { } } - /// Report unexpected token + /// Report `found` was encountered instead of `expected` pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { parser_err!( format!("Expected {expected}, found: {found}"), @@ -2354,7 +2354,8 @@ impl<'a> Parser<'a> { ) } - /// Look for an expected keyword and consume it if it exists + /// If the current token is the `expected` keyword, consume it and returns + /// true. Otherwise, no tokens are consumed and returns false. #[must_use] pub fn parse_keyword(&mut self, expected: Keyword) -> bool { match self.peek_token().token { @@ -2366,7 +2367,9 @@ impl<'a> Parser<'a> { } } - /// Look for an expected sequence of keywords and consume them if they exist + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns true. Otherwise, no tokens are + /// consumed and returns false #[must_use] pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { let index = self.index; @@ -2381,7 +2384,9 @@ impl<'a> Parser<'a> { true } - /// Look for one of the given keywords and return the one that matches. + /// If the current token is one of the given `keywords`, consume the token + /// and return the keyword that matches. Otherwise, no tokens are consumed + /// and returns `None`. #[must_use] pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { match self.peek_token().token { @@ -2398,7 +2403,8 @@ impl<'a> Parser<'a> { } } - /// Bail out if the current token is not one of the expected keywords, or consume it if it is + /// If the current token is one of the expected keywords, consume the token + /// and return the keyword that matches. Otherwise, return an error. pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { if let Some(keyword) = self.parse_one_of_keywords(keywords) { Ok(keyword) @@ -2411,7 +2417,8 @@ impl<'a> Parser<'a> { } } - /// Bail out if the current token is not an expected keyword, or consume it if it is + /// If the current token is the `expected` keyword, consume the token. + /// Otherwise return an error. pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> { if self.parse_keyword(expected) { Ok(()) @@ -2420,8 +2427,8 @@ impl<'a> Parser<'a> { } } - /// Bail out if the following tokens are not the expected sequence of - /// keywords, or consume them if they are. + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns Ok. Otherwise, return an Error. pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { for &kw in expected { self.expect_keyword(kw)?; From b89edaa98b6f8cbea105b59fdc455c300d28a828 Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:45:59 +0300 Subject: [PATCH 083/109] Support `IGNORE|RESPECT` NULLs clause in window functions (#998) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 26 ++++++++++++++++ src/ast/visitor.rs | 1 + src/keywords.rs | 1 + src/parser/mod.rs | 16 ++++++++++ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 4 +++ tests/sqlparser_common.rs | 58 +++++++++++++++++++++++++++++++++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_mysql.rs | 6 ++++ tests/sqlparser_postgres.rs | 6 ++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 1 + 14 files changed, 124 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4c69d3ed..b52bdf84 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1161,6 +1161,26 @@ impl fmt::Display for WindowFrameUnits { } } +/// Specifies Ignore / Respect NULL within window functions. +/// For example +/// `FIRST_VALUE(column2) IGNORE NULLS OVER (PARTITION BY column1)` +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum NullTreatment { + IgnoreNulls, + RespectNulls, +} + +impl fmt::Display for NullTreatment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + NullTreatment::IgnoreNulls => "IGNORE NULLS", + NullTreatment::RespectNulls => "RESPECT NULLS", + }) + } +} + /// Specifies [WindowFrame]'s `start_bound` and `end_bound` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -3757,6 +3777,8 @@ pub struct Function { pub args: Vec, /// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)` pub filter: Option>, + // Snowflake/MSSQL supports diffrent options for null treatment in rank functions + pub null_treatment: Option, pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, @@ -3809,6 +3831,10 @@ impl fmt::Display for Function { write!(f, " FILTER (WHERE {filter_cond})")?; } + if let Some(o) = &self.null_treatment { + write!(f, " {o}")?; + } + if let Some(o) = &self.over { write!(f, " OVER {o}")?; } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 4e025f96..99db1610 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -506,6 +506,7 @@ where /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], +/// null_treatment: None, /// filter: None, over: None, distinct: false, special: false, order_by: vec![], /// }); /// } diff --git a/src/keywords.rs b/src/keywords.rs index 40520360..dec324cf 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -518,6 +518,7 @@ define_keywords!( REPLACE, REPLICATION, RESET, + RESPECT, RESTRICT, RESULT, RETAIN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a1323f7a..4a465ec9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -785,6 +785,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -987,6 +988,19 @@ impl<'a> Parser<'a> { } else { None }; + let null_treatment = match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) + { + Some(keyword) => { + self.expect_keyword(Keyword::NULLS)?; + + match keyword { + Keyword::RESPECT => Some(NullTreatment::RespectNulls), + Keyword::IGNORE => Some(NullTreatment::IgnoreNulls), + _ => None, + } + } + None => None, + }; let over = if self.parse_keyword(Keyword::OVER) { if self.consume_token(&Token::LParen) { let window_spec = self.parse_window_spec()?; @@ -1000,6 +1014,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + null_treatment, filter, over, distinct, @@ -1018,6 +1033,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index fe95b187..e72a99b4 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -564,6 +564,7 @@ fn parse_map_access_offset() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( number("0") ))),], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 7d9cb030..e7c85c2a 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,6 +50,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("endpoint".to_string()) ))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -90,6 +91,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("app".to_string()) ))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -140,6 +142,7 @@ fn parse_array_fn() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x1")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x2")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -199,6 +202,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9eb52f6e..5eb70b09 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -875,6 +875,7 @@ fn parse_select_count_wildcard() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + null_treatment: None, filter: None, over: None, distinct: false, @@ -896,6 +897,7 @@ fn parse_select_count_distinct() { op: UnaryOperator::Plus, expr: Box::new(Expr::Identifier(Ident::new("x"))), }))], + null_treatment: None, filter: None, over: None, distinct: true, @@ -1864,6 +1866,7 @@ fn parse_select_having() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1890,6 +1893,7 @@ fn parse_select_qualify() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), args: vec![], + null_treatment: None, filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![Expr::Identifier(Ident::new("p"))], @@ -2287,6 +2291,45 @@ fn parse_agg_with_order_by() { } } +#[test] +fn parse_window_rank_function() { + let supported_dialects = TestedDialects { + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(AnsiDialect {}), + Box::new(HiveDialect {}), + Box::new(SnowflakeDialect {}), + ], + options: None, + }; + + for sql in [ + "SELECT column1, column2, FIRST_VALUE(column2) OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT column1, column2, FIRST_VALUE(column2) OVER (ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT col_1, col_2, LAG(col_2) OVER (ORDER BY col_1) FROM t1", + "SELECT LAG(col_2, 1, 0) OVER (ORDER BY col_1) FROM t1", + "SELECT LAG(col_2, 1, 0) OVER (PARTITION BY col_3 ORDER BY col_1)", + ] { + supported_dialects.verified_stmt(sql); + } + + let supported_dialects_nulls = TestedDialects { + dialects: vec![Box::new(MsSqlDialect {}), Box::new(SnowflakeDialect {})], + options: None, + }; + + for sql in [ + "SELECT column1, column2, FIRST_VALUE(column2) IGNORE NULLS OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT column1, column2, FIRST_VALUE(column2) RESPECT NULLS OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT LAG(col_2, 1, 0) IGNORE NULLS OVER (ORDER BY col_1) FROM t1", + "SELECT LAG(col_2, 1, 0) RESPECT NULLS OVER (ORDER BY col_1) FROM t1", + ] { + supported_dialects_nulls.verified_stmt(sql); + } +} + #[test] fn parse_create_table() { let sql = "CREATE TABLE uk_cities (\ @@ -3346,6 +3389,7 @@ fn parse_scalar_function_in_projection() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("id")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -3466,6 +3510,7 @@ fn parse_named_argument_function() { ))), }, ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -3498,6 +3543,7 @@ fn parse_window_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), args: vec![], + null_treatment: None, filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![], @@ -3542,6 +3588,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + null_treatment: None, filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window1".to_string(), @@ -3568,6 +3615,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + null_treatment: None, filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window2".to_string(), @@ -4038,6 +4086,7 @@ fn parse_at_timezone() { quote_style: None, }]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero.clone()))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4066,6 +4115,7 @@ fn parse_at_timezone() { quote_style: None, },],), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4078,6 +4128,7 @@ fn parse_at_timezone() { Value::SingleQuotedString("%Y-%m-%dT%H".to_string()), ),),), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4237,6 +4288,7 @@ fn parse_table_function() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( Value::SingleQuotedString("1".to_owned()), )))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4389,6 +4441,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4419,6 +4472,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4431,6 +4485,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("5")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("6")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -6904,6 +6959,7 @@ fn parse_time_functions() { let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -7391,6 +7447,7 @@ fn parse_pivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("amount"),]) ))]), + null_treatment: None, filter: None, over: None, distinct: false, @@ -7541,6 +7598,7 @@ fn parse_pivot_unpivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("population")) ))]), + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 534a224e..66eef09e 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -361,6 +361,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index ebadf95f..4aa993fa 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -334,6 +334,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 8391bbad..2788dfab 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1112,6 +1112,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("description")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1126,6 +1127,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_create")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1140,6 +1142,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_read")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1154,6 +1157,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_update")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1168,6 +1172,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_delete")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1558,6 +1563,7 @@ fn parse_table_colum_option_on_update() { option: ColumnOption::OnUpdate(Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_TIMESTAMP")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 64fcbd38..18b5fe6f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2275,6 +2275,7 @@ fn test_composite_value() { named: true } )))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2437,6 +2438,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2449,6 +2451,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2461,6 +2464,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2473,6 +2477,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2924,6 +2929,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 6238d1ec..6fa647d3 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -137,6 +137,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7e6f1813..19a62b61 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -249,6 +249,7 @@ fn parse_delimited_identifiers() { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], filter: None, + null_treatment: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index b657acdd..4935f1f5 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -359,6 +359,7 @@ fn parse_window_function_with_filter() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("x")) ))], + null_treatment: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![], order_by: vec![], From 79933846861551a05afc00979516fe8111796492 Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Tue, 24 Oct 2023 23:05:43 +0300 Subject: [PATCH 084/109] Support `date` 'key' when using semi structured data (#1023) --- src/parser/mod.rs | 2 +- tests/sqlparser_snowflake.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4a465ec9..1a586514 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4726,7 +4726,7 @@ impl<'a> Parser<'a> { )?, }, // Case when Snowflake Semi-structured data like key:value - Keyword::NoKeyword | Keyword::LOCATION | Keyword::TYPE if dialect_of!(self is SnowflakeDialect | GenericDialect) => { + Keyword::NoKeyword | Keyword::LOCATION | Keyword::TYPE | Keyword::DATE if dialect_of!(self is SnowflakeDialect | GenericDialect) => { Ok(Value::UnQuotedString(w.value)) } _ => self.expected( diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 19a62b61..54d6b554 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -208,6 +208,17 @@ fn parse_json_using_colon() { select.projection[0] ); + let sql = "SELECT a:date FROM t"; + let select = snowflake().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + left: Box::new(Expr::Identifier(Ident::new("a"))), + operator: JsonOperator::Colon, + right: Box::new(Expr::Value(Value::UnQuotedString("date".to_string()))), + }), + select.projection[0] + ); + snowflake().one_statement_parses_to("SELECT a:b::int FROM t", "SELECT CAST(a:b AS INT) FROM t"); } From 65317edcb9ac1cf58badf336bcd55d52fd6cecca Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Wed, 25 Oct 2023 19:53:09 +0300 Subject: [PATCH 085/109] Support Snowflake - allow number as placeholder (e.g. `:1`) (#1001) --- src/parser/mod.rs | 9 ++++++++- tests/sqlparser_snowflake.rs | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1a586514..45ce81ac 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4759,7 +4759,14 @@ impl<'a> Parser<'a> { Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), tok @ Token::Colon | tok @ Token::AtSign => { - let ident = self.parse_identifier()?; + // Not calling self.parse_identifier()? because only in placeholder we want to check numbers as idfentifies + // This because snowflake allows numbers as placeholders + let next_token = self.next_token(); + let ident = match next_token.token { + Token::Word(w) => Ok(w.to_ident()), + Token::Number(w, false) => Ok(Ident::new(w)), + _ => self.expected("placeholder", next_token), + }?; let placeholder = tok.to_string() + &ident.value; Ok(Value::Placeholder(placeholder)) } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 54d6b554..a959a4a4 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1079,6 +1079,20 @@ fn test_snowflake_trim() { ); } +#[test] +fn test_number_placeholder() { + let sql_only_select = "SELECT :1"; + let select = snowflake().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Value(Value::Placeholder(":1".into())), + expr_from_projection(only(&select.projection)) + ); + + snowflake() + .parse_sql_statements("alter role 1 with name = 'foo'") + .expect_err("should have failed"); +} + #[test] fn parse_position_not_function_columns() { snowflake_and_generic() From 2f437db2a68724e4ae709df22f53999d24804ac7 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Wed, 25 Oct 2023 18:57:33 +0200 Subject: [PATCH 086/109] Support for BigQuery `struct`, `array` and `bytes` , `int64`, `float64` datatypes (#1003) --- src/ast/data_type.rs | 67 ++++- src/ast/mod.rs | 58 ++++- src/keywords.rs | 5 + src/parser/mod.rs | 236 ++++++++++++++++- tests/sqlparser_bigquery.rs | 489 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 101 +++++--- tests/sqlparser_postgres.rs | 8 +- tests/sqlparser_snowflake.rs | 2 +- 8 files changed, 901 insertions(+), 65 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 2a6a004f..506de815 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::ObjectName; +use crate::ast::{display_comma_separated, ObjectName, StructField}; use super::value::escape_single_quote_string; @@ -71,6 +71,10 @@ pub enum DataType { /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type /// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html Blob(Option), + /// Variable-length binary data with optional length. + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type + Bytes(Option), /// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1] /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type @@ -125,6 +129,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Int4(Option), + /// Integer type in [bigquery] + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + Int64, /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED @@ -149,6 +157,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Float4, + /// Floating point in [bigquery] + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + Float64, /// Floating point e.g. REAL Real, /// Float8 as alias for Double in [postgresql] @@ -190,18 +202,23 @@ pub enum DataType { Regclass, /// Text Text, - /// String - String, + /// String with optional length. + String(Option), /// Bytea Bytea, /// Custom type such as enums Custom(ObjectName, Vec), /// Arrays - Array(Option>), + Array(ArrayElemTypeDef), /// Enums Enum(Vec), /// Set Set(Vec), + /// Struct + /// + /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Struct(Vec), } impl fmt::Display for DataType { @@ -231,6 +248,7 @@ impl fmt::Display for DataType { format_type_with_optional_length(f, "VARBINARY", size, false) } DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false), + DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false), DataType::Numeric(info) => { write!(f, "NUMERIC{info}") } @@ -274,6 +292,9 @@ impl fmt::Display for DataType { DataType::Int4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, false) } + DataType::Int64 => { + write!(f, "INT64") + } DataType::UnsignedInt4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } @@ -297,6 +318,7 @@ impl fmt::Display for DataType { } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), + DataType::Float64 => write!(f, "FLOAT64"), DataType::Double => write!(f, "DOUBLE"), DataType::Float8 => write!(f, "FLOAT8"), DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"), @@ -316,15 +338,13 @@ impl fmt::Display for DataType { DataType::JSON => write!(f, "JSON"), DataType::Regclass => write!(f, "REGCLASS"), DataType::Text => write!(f, "TEXT"), - DataType::String => write!(f, "STRING"), + DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false), DataType::Bytea => write!(f, "BYTEA"), - DataType::Array(ty) => { - if let Some(t) = &ty { - write!(f, "{t}[]") - } else { - write!(f, "ARRAY") - } - } + DataType::Array(ty) => match ty { + ArrayElemTypeDef::None => write!(f, "ARRAY"), + ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"), + ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"), + }, DataType::Custom(ty, modifiers) => { if modifiers.is_empty() { write!(f, "{ty}") @@ -352,6 +372,13 @@ impl fmt::Display for DataType { } write!(f, ")") } + DataType::Struct(fields) => { + if !fields.is_empty() { + write!(f, "STRUCT<{}>", display_comma_separated(fields)) + } else { + write!(f, "STRUCT") + } + } } } } @@ -533,3 +560,19 @@ impl fmt::Display for CharLengthUnits { } } } + +/// Represents the data type of the elements in an array (if any) as well as +/// the syntax used to declare the array. +/// +/// For example: Bigquery/Hive use `ARRAY` whereas snowflake uses ARRAY. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ArrayElemTypeDef { + /// `ARRAY` + None, + /// `ARRAY` + AngleBracket(Box), + /// `[]INT` + SquareBracket(Box), +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b52bdf84..ab917dc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -26,7 +26,7 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; pub use self::data_type::{ - CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, + ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, }; pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ @@ -323,6 +323,27 @@ impl fmt::Display for JsonOperator { } } +/// A field definition within a struct. +/// +/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct StructField { + pub field_name: Option, + pub field_type: DataType, +} + +impl fmt::Display for StructField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(name) = &self.field_name { + write!(f, "{name} {}", self.field_type) + } else { + write!(f, "{}", self.field_type) + } + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -597,6 +618,26 @@ pub enum Expr { Rollup(Vec>), /// ROW / TUPLE a single value, such as `SELECT (1, 2)` Tuple(Vec), + /// `BigQuery` specific `Struct` literal expression [1] + /// Syntax: + /// ```sql + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// ``` + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Struct { + /// Struct values. + values: Vec, + /// Struct field definitions. + fields: Vec, + }, + /// `BigQuery` specific: An named expression in a typeless struct [1] + /// + /// Syntax + /// ```sql + /// 1 AS A + /// ``` + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Named { expr: Box, name: Ident }, /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` ArrayIndex { obj: Box, indexes: Vec }, /// An array expression e.g. `ARRAY[1, 2]` @@ -997,6 +1038,21 @@ impl fmt::Display for Expr { Expr::Tuple(exprs) => { write!(f, "({})", display_comma_separated(exprs)) } + Expr::Struct { values, fields } => { + if !fields.is_empty() { + write!( + f, + "STRUCT<{}>({})", + display_comma_separated(fields), + display_comma_separated(values) + ) + } else { + write!(f, "STRUCT({})", display_comma_separated(values)) + } + } + Expr::Named { expr, name } => { + write!(f, "{} AS {}", expr, name) + } Expr::ArrayIndex { obj, indexes } => { write!(f, "{obj}")?; for i in indexes { diff --git a/src/keywords.rs b/src/keywords.rs index dec324cf..2941c817 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -120,6 +120,7 @@ define_keywords!( BY, BYPASSRLS, BYTEA, + BYTES, CACHE, CALL, CALLED, @@ -270,6 +271,7 @@ define_keywords!( FIRST_VALUE, FLOAT, FLOAT4, + FLOAT64, FLOAT8, FLOOR, FOLLOWING, @@ -293,6 +295,7 @@ define_keywords!( FUSION, GENERATE, GENERATED, + GEOGRAPHY, GET, GLOBAL, GRANT, @@ -328,6 +331,7 @@ define_keywords!( INT, INT2, INT4, + INT64, INT8, INTEGER, INTERSECT, @@ -584,6 +588,7 @@ define_keywords!( STORED, STRICT, STRING, + STRUCT, SUBMULTISET, SUBSTRING, SUBSTRING_REGEX, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 45ce81ac..eb4ef68a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -30,7 +30,7 @@ use IsOptional::*; use crate::ast::helpers::stmt_create_table::CreateTableBuilder; use crate::ast::*; use crate::dialect::*; -use crate::keywords::{self, Keyword}; +use crate::keywords::{self, Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; mod alter; @@ -197,6 +197,26 @@ impl std::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; +/// Composite types declarations using angle brackets syntax can be arbitrary +/// nested such that the following declaration is possible: +/// `ARRAY>` +/// But the tokenizer recognizes the `>>` as a ShiftRight token. +/// We work-around that limitation when parsing a data type by accepting +/// either a `>` or `>>` token in such cases, remembering which variant we +/// matched. +/// In the latter case having matched a `>>`, the parent type will not look to +/// match its closing `>` as a result since that will have taken place at the +/// child type. +/// +/// See [Parser::parse_data_type] for details +struct MatchedTrailingBracket(bool); + +impl From for MatchedTrailingBracket { + fn from(value: bool) -> Self { + Self(value) + } +} + /// Options that control how the [`Parser`] parses SQL text #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { @@ -833,6 +853,10 @@ impl<'a> Parser<'a> { Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { self.parse_match_against() } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { + self.prev_token(); + self.parse_bigquery_struct_literal() + } // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { @@ -1798,6 +1822,172 @@ impl<'a> Parser<'a> { })) } + /// Bigquery specific: Parse a struct literal + /// Syntax + /// ```sql + /// -- typed + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// -- typeless + /// STRUCT( expr1 [AS field_name] [, ... ]) + /// ``` + fn parse_bigquery_struct_literal(&mut self) -> Result { + let (fields, trailing_bracket) = + self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + if trailing_bracket.0 { + return parser_err!("unmatched > in STRUCT literal", self.peek_token().location); + } + + self.expect_token(&Token::LParen)?; + let values = self + .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; + self.expect_token(&Token::RParen)?; + + Ok(Expr::Struct { values, fields }) + } + + /// Parse an expression value for a bigquery struct [1] + /// Syntax + /// ```sql + /// expr [AS name] + /// ``` + /// + /// Parameter typed_syntax is set to true if the expression + /// is to be parsed as a field expression declared using typed + /// struct syntax [2], and false if using typeless struct syntax [3]. + /// + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct + /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + /// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result { + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::AS) { + if typed_syntax { + return parser_err!("Typed syntax does not allow AS", { + self.prev_token(); + self.peek_token().location + }); + } + let field_name = self.parse_identifier()?; + Ok(Expr::Named { + expr: expr.into(), + name: field_name, + }) + } else { + Ok(expr) + } + } + + /// Parse a Struct type definition as a sequence of field-value pairs. + /// The syntax of the Struct elem differs by dialect so it is customised + /// by the `elem_parser` argument. + /// + /// Syntax + /// ```sql + /// Hive: + /// STRUCT + /// + /// BigQuery: + /// STRUCT<[field_name] field_type> + /// ``` + fn parse_struct_type_def( + &mut self, + mut elem_parser: F, + ) -> Result<(Vec, MatchedTrailingBracket), ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, + { + let start_token = self.peek_token(); + self.expect_keyword(Keyword::STRUCT)?; + + // Nothing to do if we have no type information. + if Token::Lt != self.peek_token() { + return Ok((Default::default(), false.into())); + } + self.next_token(); + + let mut field_defs = vec![]; + let trailing_bracket = loop { + let (def, trailing_bracket) = elem_parser(self)?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break trailing_bracket; + } + + // Angle brackets are balanced so we only expect the trailing `>>` after + // we've matched all field types for the current struct. + // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` + if trailing_bracket.0 { + return parser_err!("unmatched > in STRUCT definition", start_token.location); + } + }; + + Ok(( + field_defs, + self.expect_closing_angle_bracket(trailing_bracket)?, + )) + } + + /// Parse a field definition in a BigQuery struct. + /// Syntax: + /// + /// ```sql + /// [field_name] field_type + /// ``` + fn parse_big_query_struct_field_def( + &mut self, + ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { + let is_anonymous_field = if let Token::Word(w) = self.peek_token().token { + ALL_KEYWORDS + .binary_search(&w.value.to_uppercase().as_str()) + .is_ok() + } else { + false + }; + + let field_name = if is_anonymous_field { + None + } else { + Some(self.parse_identifier()?) + }; + + let (field_type, trailing_bracket) = self.parse_data_type_helper()?; + + Ok(( + StructField { + field_name, + field_type, + }, + trailing_bracket, + )) + } + + /// For nested types that use the angle bracket syntax, this matches either + /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously + /// matched `trailing_bracket` argument). It returns whether there is a trailing + /// left to be matched - (i.e. if '>>' was matched). + fn expect_closing_angle_bracket( + &mut self, + trailing_bracket: MatchedTrailingBracket, + ) -> Result { + let trailing_bracket = if !trailing_bracket.0 { + match self.peek_token().token { + Token::Gt => { + self.next_token(); + false.into() + } + Token::ShiftRight => { + self.next_token(); + true.into() + } + _ => return self.expected(">", self.peek_token()), + } + } else { + false.into() + }; + + Ok(trailing_bracket) + } + /// Parse an operator following an expression pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { // allow the dialect to override infix parsing @@ -4876,7 +5066,22 @@ impl<'a> Parser<'a> { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { + let (ty, trailing_bracket) = self.parse_data_type_helper()?; + if trailing_bracket.0 { + return parser_err!( + format!("unmatched > after parsing data type {ty}"), + self.peek_token() + ); + } + + Ok(ty) + } + + fn parse_data_type_helper( + &mut self, + ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { let next_token = self.next_token(); + let mut trailing_bracket = false.into(); let mut data = match next_token.token { Token::Word(w) => match w.keyword { Keyword::BOOLEAN => Ok(DataType::Boolean), @@ -4884,6 +5089,7 @@ impl<'a> Parser<'a> { Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT64 => Ok(DataType::Float64), Keyword::FLOAT8 => Ok(DataType::Float8), Keyword::DOUBLE => { if self.parse_keyword(Keyword::PRECISION) { @@ -4940,6 +5146,7 @@ impl<'a> Parser<'a> { Ok(DataType::Int4(optional_precision?)) } } + Keyword::INT64 => Ok(DataType::Int64), Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -4994,6 +5201,7 @@ impl<'a> Parser<'a> { Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)), Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)), Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)), + Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), @@ -5037,7 +5245,7 @@ impl<'a> Parser<'a> { Keyword::INTERVAL => Ok(DataType::Interval), Keyword::JSON => Ok(DataType::JSON), Keyword::REGCLASS => Ok(DataType::Regclass), - Keyword::STRING => Ok(DataType::String), + Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), Keyword::TEXT => Ok(DataType::Text), Keyword::BYTEA => Ok(DataType::Bytea), Keyword::NUMERIC => Ok(DataType::Numeric( @@ -5059,17 +5267,23 @@ impl<'a> Parser<'a> { Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), Keyword::ARRAY => { if dialect_of!(self is SnowflakeDialect) { - Ok(DataType::Array(None)) + Ok(DataType::Array(ArrayElemTypeDef::None)) } else { - // Hive array syntax. Note that nesting arrays - or other Hive syntax - // that ends with > will fail due to "C++" problem - >> is parsed as - // Token::ShiftRight self.expect_token(&Token::Lt)?; - let inside_type = self.parse_data_type()?; - self.expect_token(&Token::Gt)?; - Ok(DataType::Array(Some(Box::new(inside_type)))) + let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; + trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?; + Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + inside_type, + )))) } } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect) => { + self.prev_token(); + let (field_defs, _trailing_bracket) = + self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + trailing_bracket = _trailing_bracket; + Ok(DataType::Struct(field_defs)) + } _ => { self.prev_token(); let type_name = self.parse_object_name()?; @@ -5087,9 +5301,9 @@ impl<'a> Parser<'a> { // Keyword::ARRAY syntax from above while self.consume_token(&Token::LBracket) { self.expect_token(&Token::RBracket)?; - data = DataType::Array(Some(Box::new(data))) + data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data))) } - Ok(data) + Ok((data, trailing_bracket)) } pub fn parse_string_values(&mut self) -> Result, ParserError> { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index e72a99b4..006927e4 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -13,6 +13,7 @@ #[macro_use] mod test_utils; +use sqlparser::ast; use std::ops::Deref; use sqlparser::ast::*; @@ -85,6 +86,494 @@ fn parse_raw_literal() { panic!("invalid query") } +#[test] +fn parse_nested_data_types() { + let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; + match bigquery().one_statement_parses_to(sql, sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("x"), + data_type: DataType::Struct(vec![ + StructField { + field_name: Some("a".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket( + Box::new(DataType::Int64,) + )) + }, + StructField { + field_name: Some("b".into()), + field_type: DataType::Bytes(Some(42)) + }, + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("y"), + data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Struct(vec![StructField { + field_name: None, + field_type: DataType::Int64, + }]), + ))), + collation: None, + options: vec![], + }, + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_invalid_brackets() { + let sql = "SELECT STRUCT>(NULL)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("unmatched > in STRUCT literal".to_string()) + ); + + let sql = "SELECT STRUCT>>(NULL)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("Expected (, found: >".to_string()) + ); + + let sql = "CREATE TABLE table (x STRUCT>>)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "Expected ',' or ')' after column definition, found: >".to_string() + ) + ); +} + +#[test] +fn parse_tuple_struct_literal() { + // tuple syntax: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#tuple_syntax + // syntax: (expr1, expr2 [, ... ]) + let sql = "SELECT (1, 2, 3), (1, 1.0, '123', true)"; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Tuple(vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")), + ]), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Tuple(vec![ + Expr::Value(number("1")), + Expr::Value(number("1.0")), + Expr::Value(Value::SingleQuotedString("123".to_string())), + Expr::Value(Value::Boolean(true)) + ]), + expr_from_projection(&select.projection[1]) + ); +} + +#[test] +fn parse_typeless_struct_syntax() { + // typeless struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + // syntax: STRUCT( expr1 [AS field_name] [, ... ]) + let sql = "SELECT STRUCT(1, 2, 3), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)"; + let select = bigquery().verified_only_select(sql); + assert_eq!(5, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")), + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[0]) + ); + + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedString("abc".to_string())),], + fields: Default::default() + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::CompoundIdentifier(vec![Ident::from("t"), Ident::from("str_col")]), + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Named { + expr: Expr::Value(number("1")).into(), + name: Ident::from("a") + }, + Expr::Named { + expr: Expr::Value(Value::SingleQuotedString("abc".to_string())).into(), + name: Ident::from("b") + }, + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Named { + expr: Expr::Identifier(Ident::from("str_col")).into(), + name: Ident::from("abc") + }], + fields: Default::default() + }, + expr_from_projection(&select.projection[4]) + ); +} + +#[test] +fn parse_typed_struct_syntax() { + // typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + // syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + + let sql = r#"SELECT STRUCT(5), STRUCT(1, t.str_col), STRUCT, str STRUCT>(nested_col)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64, + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::CompoundIdentifier(vec![ + Ident { + value: "t".into(), + quote_style: None, + }, + Ident { + value: "str_col".into(), + quote_style: None, + }, + ]), + ], + fields: vec![ + StructField { + field_name: Some(Ident { + value: "x".into(), + quote_style: None, + }), + field_type: DataType::Int64 + }, + StructField { + field_name: Some(Ident { + value: "y".into(), + quote_style: None, + }), + field_type: DataType::String(None) + }, + ] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident { + value: "nested_col".into(), + quote_style: None, + }),], + fields: vec![ + StructField { + field_name: Some("arr".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Float64 + ))) + }, + StructField { + field_name: Some("str".into()), + field_type: DataType::Struct(vec![StructField { + field_name: None, + field_type: DataType::Bool + }]) + }, + ] + }, + expr_from_projection(&select.projection[2]) + ); + + let sql = r#"SELECT STRUCT>(nested_col)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident { + value: "nested_col".into(), + quote_style: None, + }),], + fields: vec![ + StructField { + field_name: Some("x".into()), + field_type: DataType::Struct(Default::default()) + }, + StructField { + field_name: Some("y".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Struct(Default::default()) + ))) + }, + ] + }, + expr_from_projection(&select.projection[0]) + ); + + let sql = r#"SELECT STRUCT(true), STRUCT(B'abc')"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::Boolean(true)),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bool + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedByteStringLiteral( + "abc".into() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bytes(Some(42)) + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT("2011-05-05"), STRUCT(DATETIME '1999-01-01 01:23:34.45'), STRUCT(5.0), STRUCT(1)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(4, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::DoubleQuotedString( + "2011-05-05".to_string() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Date + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::Datetime(None), + value: "1999-01-01 01:23:34.45".to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::Datetime(None) + }] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5.0")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Float64 + }] + }, + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("1")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64 + }] + }, + expr_from_projection(&select.projection[3]) + ); + + let sql = r#"SELECT STRUCT(INTERVAL '1-2 3 4:5:6.789999'), STRUCT(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Interval(ast::Interval { + value: Box::new(Expr::Value(Value::SingleQuotedString( + "1-2 3 4:5:6.789999".to_string() + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None + }),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Interval + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::JSON, + value: r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::JSON + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT("foo"), STRUCT(TIMESTAMP '2008-12-25 15:30:00 America/Los_Angeles'), STRUCT