Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for ClickHouse FORMAT on INSERT #1628

Merged
merged 8 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions src/ast/dml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ use sqlparser_derive::{Visit, VisitMut};
pub use super::ddl::{ColumnDef, TableConstraint};

use super::{
display_comma_separated, display_separated, Assignment, ClusteredBy, CommentDef, Expr,
FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens,
OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine,
TableObject, TableWithJoins, Tag, WrappedCollection,
display_comma_separated, display_separated, query::InputFormatClause, Assignment, ClusteredBy,
CommentDef, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat,
HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert,
OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption,
SqliteOnConflict, TableEngine, TableObject, TableWithJoins, Tag, WrappedCollection,
};

/// CREATE INDEX statement.
Expand Down Expand Up @@ -497,6 +497,19 @@ pub struct Insert {
pub priority: Option<MysqlInsertPriority>,
/// Only for mysql
pub insert_alias: Option<InsertAliases>,
/// Settings used for ClickHouse.
///
/// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'`
///
/// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into)
pub settings: Option<Vec<Setting>>,
/// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`,
/// `JSON`, `JSONAsString`, `LineAsString` and more.
///
/// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}`
///
/// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data)
pub format_clause: Option<InputFormatClause>,
}

impl Display for Insert {
Expand Down Expand Up @@ -545,12 +558,18 @@ impl Display for Insert {
write!(f, "({}) ", display_comma_separated(&self.after_columns))?;
}

if let Some(settings) = &self.settings {
write!(f, "SETTINGS {} ", display_comma_separated(settings))?;
}

if let Some(source) = &self.source {
write!(f, "{source}")?;
} else if !self.assignments.is_empty() {
write!(f, "SET ")?;
write!(f, "{}", display_comma_separated(&self.assignments))?;
} else if self.source.is_none() && self.columns.is_empty() {
} else if let Some(format_clause) = &self.format_clause {
write!(f, "{format_clause}")?;
} else if self.columns.is_empty() {
write!(f, "DEFAULT VALUES")?;
}

Expand Down
8 changes: 4 additions & 4 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml,
FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate,
InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn,
JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, LateralView,
LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem,
InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator,
JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn,
LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn,
OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem,
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
Expand Down
23 changes: 23 additions & 0 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2480,6 +2480,29 @@ impl fmt::Display for FormatClause {
}
}

/// FORMAT identifier in input context, specific to ClickHouse.
///
/// [ClickHouse]: <https://clickhouse.com/docs/en/interfaces/formats>
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct InputFormatClause {
pub ident: Ident,
pub values: Vec<Expr>,
}

impl fmt::Display for InputFormatClause {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "FORMAT {}", self.ident)?;

if !self.values.is_empty() {
write!(f, " {}", display_comma_separated(self.values.as_slice()))?;
}

Ok(())
}
}

/// FOR XML or FOR JSON clause, specific to MSSQL
/// (formats the output of a query as XML or JSON)
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
Expand Down
2 changes: 2 additions & 0 deletions src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,8 @@ impl Spanned for Insert {
priority: _, // todo, mysql specific
insert_alias: _, // todo, mysql specific
assignments,
settings: _, // todo, clickhouse specific
format_clause: _, // todo, clickhouse specific
} = self;

union_spans(
Expand Down
12 changes: 12 additions & 0 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,16 @@ impl Dialect for ClickHouseDialect {
fn supports_insert_table_function(&self) -> bool {
true
}

fn supports_insert_format(&self) -> bool {
true
}

// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
//
// [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats)
fn supports_dictionary_syntax(&self) -> bool {
true
}
}
5 changes: 5 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,11 @@ pub trait Dialect: Debug + Any {
fn supports_insert_table_function(&self) -> bool {
false
}

/// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT <format>`
fn supports_insert_format(&self) -> bool {
false
}
}

/// This represents the operators for which precedence must be defined
Expand Down
2 changes: 0 additions & 2 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -949,9 +949,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
Keyword::PARTITION,
// for Clickhouse PREWHERE
Keyword::PREWHERE,
// for ClickHouse SELECT * FROM t SETTINGS ...
Keyword::SETTINGS,
// for ClickHouse SELECT * FROM t FORMAT...
Keyword::FORMAT,
// for Snowflake START WITH .. CONNECT BY
Keyword::START,
Expand Down
78 changes: 56 additions & 22 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12033,35 +12033,55 @@ impl<'a> Parser<'a> {

let is_mysql = dialect_of!(self is MySqlDialect);

let (columns, partitioned, after_columns, source, assignments) =
if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) {
(vec![], None, vec![], None, vec![])
} else {
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
let (columns, partitioned, after_columns, source, assignments) = if self
.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES])
{
(vec![], None, vec![], None, vec![])
} else {
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;

let partitioned = self.parse_insert_partition()?;
// Hive allows you to specify columns after partitions as well if you want.
let after_columns = if dialect_of!(self is HiveDialect) {
self.parse_parenthesized_column_list(Optional, false)?
} else {
vec![]
};
(columns, partitioned, after_columns)
let partitioned = self.parse_insert_partition()?;
// Hive allows you to specify columns after partitions as well if you want.
let after_columns = if dialect_of!(self is HiveDialect) {
self.parse_parenthesized_column_list(Optional, false)?
} else {
Default::default()
vec![]
};
(columns, partitioned, after_columns)
} else {
Default::default()
};

let (source, assignments) =
if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
(None, self.parse_comma_separated(Parser::parse_assignment)?)
} else {
(Some(self.parse_query()?), vec![])
};
let (source, assignments) = if self.peek_keyword(Keyword::FORMAT)
|| self.peek_keyword(Keyword::SETTINGS)
{
(None, vec![])
} else if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
(None, self.parse_comma_separated(Parser::parse_assignment)?)
} else {
(Some(self.parse_query()?), vec![])
};

(columns, partitioned, after_columns, source, assignments)
};

(columns, partitioned, after_columns, source, assignments)
let (format_clause, settings) = if self.dialect.supports_insert_format() {
// Settings always comes before `FORMAT` for ClickHouse:
// <https://clickhouse.com/docs/en/sql-reference/statements/insert-into>
let settings = self.parse_settings()?;

let format = if self.parse_keyword(Keyword::FORMAT) {
Some(self.parse_input_format_clause()?)
} else {
None
};

(format, settings)
} else {
Default::default()
};

let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect)
&& self.parse_keyword(Keyword::AS)
{
Expand Down Expand Up @@ -12146,10 +12166,24 @@ impl<'a> Parser<'a> {
replace_into,
priority,
insert_alias,
settings,
format_clause,
}))
}
}

// Parses input format clause used for [ClickHouse].
//
// <https://clickhouse.com/docs/en/interfaces/formats>
pub fn parse_input_format_clause(&mut self) -> Result<InputFormatClause, ParserError> {
let ident = self.parse_identifier()?;
let values = self
.maybe_parse(|p| p.parse_comma_separated(|p| p.parse_expr()))?
.unwrap_or_default();

Ok(InputFormatClause { ident, values })
}

/// Returns true if the immediate tokens look like the
/// beginning of a subquery. `(SELECT ...`
fn peek_subquery_start(&mut self) -> bool {
Expand Down
20 changes: 20 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1404,6 +1404,26 @@ fn test_query_with_format_clause() {
}
}

#[test]
fn test_insert_query_with_format_clause() {
let cases = [
r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#,
r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#,
r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#,
r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#,
r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#,
r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#,
r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#,
r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#,
r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
];

for sql in &cases {
clickhouse().verified_stmt(sql);
}
}

#[test]
fn parse_create_table_on_commit_and_as_query() {
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#;
Expand Down
10 changes: 8 additions & 2 deletions tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4431,7 +4431,9 @@ fn test_simple_postgres_insert_with_alias() {
returning: None,
replace_into: false,
priority: None,
insert_alias: None
insert_alias: None,
settings: None,
format_clause: None,
})
)
}
Expand Down Expand Up @@ -4502,7 +4504,9 @@ fn test_simple_postgres_insert_with_alias() {
returning: None,
replace_into: false,
priority: None,
insert_alias: None
insert_alias: None,
settings: None,
format_clause: None,
})
)
}
Expand Down Expand Up @@ -4570,6 +4574,8 @@ fn test_simple_insert_with_quoted_alias() {
replace_into: false,
priority: None,
insert_alias: None,
settings: None,
format_clause: None,
})
)
}
Expand Down
Loading