From 08fadd16c466d2b71bae028ca1e371c0f3445c19 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Mon, 30 Dec 2024 14:09:31 +0100 Subject: [PATCH] Add support for ClickHouse `FORMAT` on `INSERT` Adds supports for the `SETTINGS` and `FORMAT` keywords used for ClickHouse when inserting data with other syntax than SQL. This can happen e.g. when using the ClickHouse CLI tool to pipe input from files or similar. --- src/ast/dml.rs | 30 +++++++++-- src/ast/query.rs | 15 +++++- src/ast/spans.rs | 8 +-- src/dialect/clickhouse.rs | 8 +++ src/keywords.rs | 2 +- src/parser/mod.rs | 93 ++++++++++++++++++++++++++--------- tests/sqlparser_clickhouse.rs | 26 +++++++++- tests/sqlparser_postgres.rs | 10 +++- 8 files changed, 157 insertions(+), 35 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 22309c8f8..256d372ad 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -33,10 +33,10 @@ pub use super::ddl::{ColumnDef, TableConstraint}; use super::{ display_comma_separated, display_separated, ClusteredBy, CommentDef, Expr, FileFormat, - FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, + FormatClause, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens, - OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine, - TableWithJoins, Tag, WrappedCollection, + OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption, SqliteOnConflict, + TableEngine, TableWithJoins, Tag, WrappedCollection, }; /// CREATE INDEX statement. @@ -495,6 +495,20 @@ pub struct Insert { pub priority: Option, /// Only for mysql pub insert_alias: Option, + /// Settings used in together with a specified `FORMAT`. + /// + /// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'` + /// + /// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into) + /// [ClickHouse Formats](https://clickhouse.com/docs/en/interfaces/formats) + pub settings: Option>, + /// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`, + /// `JSON`, `JSONAsString`, `LineAsString` and more. + /// + /// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}` + /// + /// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data) + pub format_clause: Option, } impl Display for Insert { @@ -547,7 +561,15 @@ impl Display for Insert { write!(f, "{source}")?; } - if self.source.is_none() && self.columns.is_empty() { + if let Some(settings) = &self.settings { + write!(f, "SETTINGS {} ", display_comma_separated(settings))?; + } + + if let Some(format_clause) = &self.format_clause { + write!(f, "{format_clause}")?; + } + + if self.source.is_none() && self.columns.is_empty() && self.format_clause.is_none() { write!(f, "DEFAULT VALUES")?; } diff --git a/src/ast/query.rs b/src/ast/query.rs index 9e4e9e2ef..f4101c3d2 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2465,14 +2465,25 @@ impl fmt::Display for GroupByExpr { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FormatClause { - Identifier(Ident), + Identifier { + ident: Ident, + expr: Option>, + }, Null, } impl fmt::Display for FormatClause { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), + FormatClause::Identifier { ident, expr } => { + write!(f, "FORMAT {}", ident)?; + + if let Some(exprs) = expr { + write!(f, " {}", display_comma_separated(exprs))?; + } + + Ok(()) + } FormatClause::Null => write!(f, "FORMAT NULL"), } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 574830ef5..4747421d5 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1148,9 +1148,11 @@ impl Spanned for Insert { table: _, // bool on, returning, - replace_into: _, // bool - priority: _, // todo, mysql specific - insert_alias: _, // todo, mysql specific + replace_into: _, // bool + priority: _, // todo, mysql specific + insert_alias: _, // todo, mysql specific + settings: _, // todo, clickhouse specific + format_clause: _, // todo, clickhouse specific } = self; union_spans( diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 0c8f08040..87ba85a15 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -50,4 +50,12 @@ impl Dialect for ClickHouseDialect { fn supports_limit_comma(&self) -> bool { true } + + // ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting + // with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected. + // + // [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats) + fn supports_dictionary_syntax(&self) -> bool { + true + } } diff --git a/src/keywords.rs b/src/keywords.rs index 43abc2b03..2d06b1035 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -931,7 +931,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::PREWHERE, // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, - // for ClickHouse SELECT * FROM t FORMAT... + // for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT... Keyword::FORMAT, // for Snowflake START WITH .. CONNECT BY Keyword::START, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 012314b42..6eed0f67a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9570,12 +9570,7 @@ impl<'a> Parser<'a> { let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) && self.parse_keyword(Keyword::FORMAT) { - if self.parse_keyword(Keyword::NULL) { - Some(FormatClause::Null) - } else { - let ident = self.parse_identifier()?; - Some(FormatClause::Identifier(ident)) - } + Some(self.parse_format_clause(false)?) } else { None }; @@ -11824,30 +11819,53 @@ impl<'a> Parser<'a> { let is_mysql = dialect_of!(self is MySqlDialect); - let (columns, partitioned, after_columns, source) = - if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { - (vec![], None, vec![], None) + let (columns, partitioned, after_columns, source) = if self + .parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) + { + (vec![], None, vec![], None) + } else { + let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + + let partitioned = self.parse_insert_partition()?; + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = if dialect_of!(self is HiveDialect) { + self.parse_parenthesized_column_list(Optional, false)? + } else { + vec![] + }; + (columns, partitioned, after_columns) } else { - let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + Default::default() + }; - let partitioned = self.parse_insert_partition()?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = if dialect_of!(self is HiveDialect) { - self.parse_parenthesized_column_list(Optional, false)? - } else { - vec![] - }; - (columns, partitioned, after_columns) + let source = + if self.peek_keyword(Keyword::FORMAT) || self.peek_keyword(Keyword::SETTINGS) { + None } else { - Default::default() + Some(self.parse_query()?) }; - let source = Some(self.parse_query()?); + (columns, partitioned, after_columns, source) + }; + + let (format_clause, settings) = if dialect_of!(self is ClickHouseDialect | GenericDialect) + { + // Settings always comes before `FORMAT` for ClickHouse: + // + let settings = self.parse_settings()?; - (columns, partitioned, after_columns, source) + let format = if self.parse_keyword(Keyword::FORMAT) { + Some(self.parse_format_clause(true)?) + } else { + None }; + (format, settings) + } else { + (None, None) + }; + let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) && self.parse_keyword(Keyword::AS) { @@ -11931,10 +11949,41 @@ impl<'a> Parser<'a> { replace_into, priority, insert_alias, + settings, + format_clause, })) } } + // Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and + // `INSERT` and also when using the CLI for pipes. It may or may not take an additional + // expression after the format so we try to parse the expression but allow failure. + // + // Since we know we never take an additional expression in `SELECT` context we never only try + // to parse if `can_have_expression` is true. + // + // + pub fn parse_format_clause( + &mut self, + can_have_expression: bool, + ) -> Result { + if self.parse_keyword(Keyword::NULL) { + Ok(FormatClause::Null) + } else { + let ident = self.parse_identifier()?; + let expr = if can_have_expression { + match self.try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) { + Ok(expr) => Some(expr), + _ => None, + } + } else { + None + }; + + Ok(FormatClause::Identifier { ident, expr }) + } + } + /// Returns true if the immediate tokens look like the /// beginning of a subquery. `(SELECT ...` fn peek_subquery_start(&mut self) -> bool { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 2f1b043b6..ec4470737 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1378,7 +1378,10 @@ fn test_query_with_format_clause() { } else { assert_eq!( query.format_clause, - Some(FormatClause::Identifier(Ident::new(*format))) + Some(FormatClause::Identifier { + ident: Ident::new(*format), + expr: None + }) ); } } @@ -1398,6 +1401,27 @@ fn test_query_with_format_clause() { } } +#[test] +fn test_insert_query_with_format_clause() { + let cases = [ + r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#, + r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#, + r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#, + r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#, + r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#, + r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#, + r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#, + r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#, + r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#, + r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#, + r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#, + ]; + + for sql in &cases { + clickhouse_and_generic().verified_stmt(sql); + } +} + #[test] fn parse_create_table_on_commit_and_as_query() { let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fd520d507..7dfe3dc32 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4430,7 +4430,9 @@ fn test_simple_postgres_insert_with_alias() { returning: None, replace_into: false, priority: None, - insert_alias: None + insert_alias: None, + settings: None, + format_clause: None, }) ) } @@ -4500,7 +4502,9 @@ fn test_simple_postgres_insert_with_alias() { returning: None, replace_into: false, priority: None, - insert_alias: None + insert_alias: None, + settings: None, + format_clause: None, }) ) } @@ -4567,6 +4571,8 @@ fn test_simple_insert_with_quoted_alias() { replace_into: false, priority: None, insert_alias: None, + settings: None, + format_clause: None, }) ) }