From 2b65fb3add471cc23701b12ff9cb236f6715af75 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Wed, 11 Dec 2024 23:27:31 +0800 Subject: [PATCH] Support unparsing `UNNEST` plan to `UNNEST` table factor SQL (#13660) * add `unnest_as_table_factor` and `UnnestRelationBuilder` * unparse unnest as table factor * fix typo * add tests for the default configs * add a static const for unnest_placeholder * fix tests * fix tests --- datafusion/sql/src/unparser/ast.rs | 73 ++++++++++++++ datafusion/sql/src/unparser/dialect.rs | 23 +++++ datafusion/sql/src/unparser/plan.rs | 55 ++++++++++- datafusion/sql/src/unparser/utils.rs | 2 +- datafusion/sql/src/utils.rs | 50 +++++----- datafusion/sql/tests/cases/plan_to_sql.rs | 99 ++++++++++++++++++- .../sqllogictest/test_files/encoding.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 12 +-- .../test_files/push_down_filter.slt | 40 ++++---- .../test_files/table_functions.slt | 2 +- datafusion/sqllogictest/test_files/unnest.slt | 28 +++--- datafusion/sqllogictest/test_files/window.slt | 1 - 12 files changed, 313 insertions(+), 74 deletions(-) diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index cc0812cd71e1..ad0b5f16b283 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -353,6 +353,7 @@ pub(super) struct RelationBuilder { enum TableFactorBuilder { Table(TableRelationBuilder), Derived(DerivedRelationBuilder), + Unnest(UnnestRelationBuilder), Empty, } @@ -369,6 +370,12 @@ impl RelationBuilder { self.relation = Some(TableFactorBuilder::Derived(value)); self } + + pub fn unnest(&mut self, value: UnnestRelationBuilder) -> &mut Self { + self.relation = Some(TableFactorBuilder::Unnest(value)); + self + } + pub fn empty(&mut self) -> &mut Self { self.relation = Some(TableFactorBuilder::Empty); self @@ -382,6 +389,9 @@ impl RelationBuilder { Some(TableFactorBuilder::Derived(ref mut rel_builder)) => { rel_builder.alias = value; } + Some(TableFactorBuilder::Unnest(ref mut rel_builder)) => { + rel_builder.alias = value; + } Some(TableFactorBuilder::Empty) => (), None => (), } @@ -391,6 +401,7 @@ impl RelationBuilder { Ok(match self.relation { Some(TableFactorBuilder::Table(ref value)) => Some(value.build()?), Some(TableFactorBuilder::Derived(ref value)) => Some(value.build()?), + Some(TableFactorBuilder::Unnest(ref value)) => Some(value.build()?), Some(TableFactorBuilder::Empty) => None, None => return Err(Into::into(UninitializedFieldError::from("relation"))), }) @@ -526,6 +537,68 @@ impl Default for DerivedRelationBuilder { } } +#[derive(Clone)] +pub(super) struct UnnestRelationBuilder { + pub alias: Option, + pub array_exprs: Vec, + with_offset: bool, + with_offset_alias: Option, + with_ordinality: bool, +} + +#[allow(dead_code)] +impl UnnestRelationBuilder { + pub fn alias(&mut self, value: Option) -> &mut Self { + self.alias = value; + self + } + pub fn array_exprs(&mut self, value: Vec) -> &mut Self { + self.array_exprs = value; + self + } + + pub fn with_offset(&mut self, value: bool) -> &mut Self { + self.with_offset = value; + self + } + + pub fn with_offset_alias(&mut self, value: Option) -> &mut Self { + self.with_offset_alias = value; + self + } + + pub fn with_ordinality(&mut self, value: bool) -> &mut Self { + self.with_ordinality = value; + self + } + + pub fn build(&self) -> Result { + Ok(ast::TableFactor::UNNEST { + alias: self.alias.clone(), + array_exprs: self.array_exprs.clone(), + with_offset: self.with_offset, + with_offset_alias: self.with_offset_alias.clone(), + with_ordinality: self.with_ordinality, + }) + } + + fn create_empty() -> Self { + Self { + alias: Default::default(), + array_exprs: Default::default(), + with_offset: Default::default(), + with_offset_alias: Default::default(), + with_ordinality: Default::default(), + } + } +} + +impl Default for UnnestRelationBuilder { + fn default() -> Self { + Self::create_empty() + } +} + /// Runtime error when a `build()` method is called and one or more required fields /// do not have a value. #[derive(Debug, Clone)] diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index e979d8fd4ebd..ae387d441fa2 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -157,6 +157,15 @@ pub trait Dialect: Send + Sync { fn full_qualified_col(&self) -> bool { false } + + /// Allow to unparse the unnest plan as [ast::TableFactor::UNNEST]. + /// + /// Some dialects like BigQuery require UNNEST to be used in the FROM clause but + /// the LogicalPlan planner always puts UNNEST in the SELECT clause. This flag allows + /// to unparse the UNNEST plan as [ast::TableFactor::UNNEST] instead of a subquery. + fn unnest_as_table_factor(&self) -> bool { + false + } } /// `IntervalStyle` to use for unparsing @@ -448,6 +457,7 @@ pub struct CustomDialect { requires_derived_table_alias: bool, division_operator: BinaryOperator, full_qualified_col: bool, + unnest_as_table_factor: bool, } impl Default for CustomDialect { @@ -474,6 +484,7 @@ impl Default for CustomDialect { requires_derived_table_alias: false, division_operator: BinaryOperator::Divide, full_qualified_col: false, + unnest_as_table_factor: false, } } } @@ -582,6 +593,10 @@ impl Dialect for CustomDialect { fn full_qualified_col(&self) -> bool { self.full_qualified_col } + + fn unnest_as_table_factor(&self) -> bool { + self.unnest_as_table_factor + } } /// `CustomDialectBuilder` to build `CustomDialect` using builder pattern @@ -617,6 +632,7 @@ pub struct CustomDialectBuilder { requires_derived_table_alias: bool, division_operator: BinaryOperator, full_qualified_col: bool, + unnest_as_table_factor: bool, } impl Default for CustomDialectBuilder { @@ -649,6 +665,7 @@ impl CustomDialectBuilder { requires_derived_table_alias: false, division_operator: BinaryOperator::Divide, full_qualified_col: false, + unnest_as_table_factor: false, } } @@ -673,6 +690,7 @@ impl CustomDialectBuilder { requires_derived_table_alias: self.requires_derived_table_alias, division_operator: self.division_operator, full_qualified_col: self.full_qualified_col, + unnest_as_table_factor: self.unnest_as_table_factor, } } @@ -800,4 +818,9 @@ impl CustomDialectBuilder { self.full_qualified_col = full_qualified_col; self } + + pub fn with_unnest_as_table_factor(mut self, _unnest_as_table_factor: bool) -> Self { + self.unnest_as_table_factor = _unnest_as_table_factor; + self + } } diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index eaae4fe73d8c..e9f9f486ea9a 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -32,7 +32,9 @@ use super::{ }, Unparser, }; +use crate::unparser::ast::UnnestRelationBuilder; use crate::unparser::utils::unproject_agg_exprs; +use crate::utils::UNNEST_PLACEHOLDER; use datafusion_common::{ internal_err, not_impl_err, tree_node::{TransformedResult, TreeNode}, @@ -40,7 +42,7 @@ use datafusion_common::{ }; use datafusion_expr::{ expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, - LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, + LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, }; use sqlparser::ast::{self, Ident, SetExpr}; use std::sync::Arc; @@ -312,6 +314,19 @@ impl Unparser<'_> { .select_to_sql_recursively(&new_plan, query, select, relation); } + // Projection can be top-level plan for unnest relation + // The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have + // only one expression, which is the placeholder column generated by the rewriter. + if self.dialect.unnest_as_table_factor() + && p.expr.len() == 1 + && Self::is_unnest_placeholder(&p.expr[0]) + { + if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() { + return self + .unnest_to_table_factor_sql(unnest, query, select, relation); + } + } + // Projection can be top-level plan for derived table if select.already_projected() { return self.derive_with_dialect_alias( @@ -678,7 +693,11 @@ impl Unparser<'_> { ) } LogicalPlan::EmptyRelation(_) => { - relation.empty(); + // An EmptyRelation could be behind an UNNEST node. If the dialect supports UNNEST as a table factor, + // a TableRelationBuilder will be created for the UNNEST node first. + if !relation.has_relation() { + relation.empty(); + } Ok(()) } LogicalPlan::Extension(_) => not_impl_err!("Unsupported operator: {plan:?}"), @@ -708,6 +727,38 @@ impl Unparser<'_> { } } + /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter` + /// Only match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))` + fn is_unnest_placeholder(expr: &Expr) -> bool { + if let Expr::Alias(Alias { expr, .. }) = expr { + if let Expr::Column(Column { name, .. }) = expr.as_ref() { + return name.starts_with(UNNEST_PLACEHOLDER); + } + } + false + } + + fn unnest_to_table_factor_sql( + &self, + unnest: &Unnest, + query: &mut Option, + select: &mut SelectBuilder, + relation: &mut RelationBuilder, + ) -> Result<()> { + let mut unnest_relation = UnnestRelationBuilder::default(); + let LogicalPlan::Projection(p) = unnest.input.as_ref() else { + return internal_err!("Unnest input is not a Projection: {unnest:?}"); + }; + let exprs = p + .expr + .iter() + .map(|e| self.expr_to_sql(e)) + .collect::>>()?; + unnest_relation.array_exprs(exprs); + relation.unnest(unnest_relation); + self.select_to_sql_recursively(p.input.as_ref(), query, select, relation) + } + fn is_scan_with_pushdown(scan: &TableScan) -> bool { scan.projection.is_some() || !scan.filters.is_empty() || scan.fetch.is_some() } diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index 518781106c3b..354a68f60964 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -133,7 +133,7 @@ pub(crate) fn find_window_nodes_within_select<'a>( /// Recursively identify Column expressions and transform them into the appropriate unnest expression /// -/// For example, if expr contains the column expr "unnest_placeholder(make_array(Int64(1),Int64(2),Int64(2),Int64(5),NULL),depth=1)" +/// For example, if expr contains the column expr "__unnest_placeholder(make_array(Int64(1),Int64(2),Int64(2),Int64(5),NULL),depth=1)" /// it will be transformed into an actual unnest expression UNNEST([1, 2, 2, 5, NULL]) pub(crate) fn unproject_unnest_expr(expr: Expr, unnest: &Unnest) -> Result { expr.transform(|sub_expr| { diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 69e3953341ef..1c2a3ea91a2b 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -315,6 +315,8 @@ pub(crate) fn rewrite_recursive_unnests_bottom_up( .collect::>()) } +pub const UNNEST_PLACEHOLDER: &str = "__unnest_placeholder"; + /* This is only usedful when used with transform down up A full example of how the transformation works: @@ -360,9 +362,9 @@ impl RecursiveUnnestRewriter<'_> { // Full context, we are trying to plan the execution as InnerProjection->Unnest->OuterProjection // inside unnest execution, each column inside the inner projection // will be transformed into new columns. Thus we need to keep track of these placeholding column names - let placeholder_name = format!("unnest_placeholder({})", inner_expr_name); + let placeholder_name = format!("{UNNEST_PLACEHOLDER}({})", inner_expr_name); let post_unnest_name = - format!("unnest_placeholder({},depth={})", inner_expr_name, level); + format!("{UNNEST_PLACEHOLDER}({},depth={})", inner_expr_name, level); // This is due to the fact that unnest transformation should keep the original // column name as is, to comply with group by and order by let placeholder_column = Column::from_name(placeholder_name.clone()); @@ -693,17 +695,17 @@ mod tests { // Only the bottom most unnest exprs are transformed assert_eq!( transformed_exprs, - vec![col("unnest_placeholder(3d_col,depth=2)") + vec![col("__unnest_placeholder(3d_col,depth=2)") .alias("UNNEST(UNNEST(3d_col))") .add( - col("unnest_placeholder(3d_col,depth=2)") + col("__unnest_placeholder(3d_col,depth=2)") .alias("UNNEST(UNNEST(3d_col))") ) .add(col("i64_col"))] ); column_unnests_eq( vec![ - "unnest_placeholder(3d_col)=>[unnest_placeholder(3d_col,depth=2)|depth=2]", + "__unnest_placeholder(3d_col)=>[__unnest_placeholder(3d_col,depth=2)|depth=2]", ], &unnest_placeholder_columns, ); @@ -713,7 +715,7 @@ mod tests { assert_eq!( inner_projection_exprs, vec![ - col("3d_col").alias("unnest_placeholder(3d_col)"), + col("3d_col").alias("__unnest_placeholder(3d_col)"), col("i64_col") ] ); @@ -730,12 +732,12 @@ mod tests { assert_eq!( transformed_exprs, vec![ - (col("unnest_placeholder(3d_col,depth=1)").alias("UNNEST(3d_col)")) + (col("__unnest_placeholder(3d_col,depth=1)").alias("UNNEST(3d_col)")) .alias("2d_col") ] ); column_unnests_eq( - vec!["unnest_placeholder(3d_col)=>[unnest_placeholder(3d_col,depth=2)|depth=2, unnest_placeholder(3d_col,depth=1)|depth=1]"], + vec!["__unnest_placeholder(3d_col)=>[__unnest_placeholder(3d_col,depth=2)|depth=2, __unnest_placeholder(3d_col,depth=1)|depth=1]"], &unnest_placeholder_columns, ); // Still reference struct_col in original schema but with alias, @@ -743,7 +745,7 @@ mod tests { assert_eq!( inner_projection_exprs, vec![ - col("3d_col").alias("unnest_placeholder(3d_col)"), + col("3d_col").alias("__unnest_placeholder(3d_col)"), col("i64_col") ] ); @@ -794,19 +796,19 @@ mod tests { assert_eq!( transformed_exprs, vec![ - col("unnest_placeholder(struct_col).field1"), - col("unnest_placeholder(struct_col).field2"), + col("__unnest_placeholder(struct_col).field1"), + col("__unnest_placeholder(struct_col).field2"), ] ); column_unnests_eq( - vec!["unnest_placeholder(struct_col)"], + vec!["__unnest_placeholder(struct_col)"], &unnest_placeholder_columns, ); // Still reference struct_col in original schema but with alias, // to avoid colliding with the projection on the column itself if any assert_eq!( inner_projection_exprs, - vec![col("struct_col").alias("unnest_placeholder(struct_col)"),] + vec![col("struct_col").alias("__unnest_placeholder(struct_col)"),] ); // unnest(array_col) + 1 @@ -819,15 +821,15 @@ mod tests { )?; column_unnests_eq( vec![ - "unnest_placeholder(struct_col)", - "unnest_placeholder(array_col)=>[unnest_placeholder(array_col,depth=1)|depth=1]", + "__unnest_placeholder(struct_col)", + "__unnest_placeholder(array_col)=>[__unnest_placeholder(array_col,depth=1)|depth=1]", ], &unnest_placeholder_columns, ); // Only transform the unnest children assert_eq!( transformed_exprs, - vec![col("unnest_placeholder(array_col,depth=1)") + vec![col("__unnest_placeholder(array_col,depth=1)") .alias("UNNEST(array_col)") .add(lit(1i64))] ); @@ -838,8 +840,8 @@ mod tests { assert_eq!( inner_projection_exprs, vec![ - col("struct_col").alias("unnest_placeholder(struct_col)"), - col("array_col").alias("unnest_placeholder(array_col)") + col("struct_col").alias("__unnest_placeholder(struct_col)"), + col("array_col").alias("__unnest_placeholder(array_col)") ] ); @@ -907,7 +909,7 @@ mod tests { assert_eq!( transformed_exprs, vec![unnest( - col("unnest_placeholder(struct_list,depth=1)") + col("__unnest_placeholder(struct_list,depth=1)") .alias("UNNEST(struct_list)") .field("subfield1") )] @@ -915,14 +917,14 @@ mod tests { column_unnests_eq( vec![ - "unnest_placeholder(struct_list)=>[unnest_placeholder(struct_list,depth=1)|depth=1]", + "__unnest_placeholder(struct_list)=>[__unnest_placeholder(struct_list,depth=1)|depth=1]", ], &unnest_placeholder_columns, ); assert_eq!( inner_projection_exprs, - vec![col("struct_list").alias("unnest_placeholder(struct_list)")] + vec![col("struct_list").alias("__unnest_placeholder(struct_list)")] ); // continue rewrite another expr in select @@ -937,7 +939,7 @@ mod tests { assert_eq!( transformed_exprs, vec![unnest( - col("unnest_placeholder(struct_list,depth=1)") + col("__unnest_placeholder(struct_list,depth=1)") .alias("UNNEST(struct_list)") .field("subfield2") )] @@ -947,14 +949,14 @@ mod tests { // because expr1 and expr2 derive from the same unnest result column_unnests_eq( vec![ - "unnest_placeholder(struct_list)=>[unnest_placeholder(struct_list,depth=1)|depth=1]", + "__unnest_placeholder(struct_list)=>[__unnest_placeholder(struct_list,depth=1)|depth=1]", ], &unnest_placeholder_columns, ); assert_eq!( inner_projection_exprs, - vec![col("struct_list").alias("unnest_placeholder(struct_list)")] + vec![col("struct_list").alias("__unnest_placeholder(struct_list)")] ); Ok(()) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index fcfee29f6ac9..236b59432a5f 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -525,6 +525,96 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(SqliteDialect {}), }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3])", + expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))")"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", + expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", + expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]), j1", + expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") CROSS JOIN j1"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", + expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)", + expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) UNION ALL SELECT * FROM (SELECT UNNEST([4, 5, 6]) AS "UNNEST(make_array(Int64(4),Int64(5),Int64(6)))") AS u (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3])", + expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", + expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", + expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]), j1", + expected: r#"SELECT * FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", + expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)", + expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS u (c1) UNION ALL SELECT * FROM UNNEST([4, 5, 6]) AS u (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT UNNEST([1,2,3])", + expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT UNNEST([1,2,3]) as c1", + expected: r#"SELECT UNNEST([1, 2, 3]) AS c1"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT UNNEST([1,2,3]), 1", + expected: r#"SELECT UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3))), Int64(1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, ]; for query in tests { @@ -535,7 +625,8 @@ fn roundtrip_statement_with_dialect() -> Result<()> { let state = MockSessionState::default() .with_aggregate_function(max_udaf()) .with_aggregate_function(min_udaf()) - .with_expr_planner(Arc::new(CoreFunctionPlanner::default())); + .with_expr_planner(Arc::new(CoreFunctionPlanner::default())) + .with_expr_planner(Arc::new(NestedFunctionPlanner)); let context = MockContextProvider { state }; let sql_to_rel = SqlToRel::new(&context); @@ -571,9 +662,9 @@ fn test_unnest_logical_plan() -> Result<()> { let sql_to_rel = SqlToRel::new(&context); let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap(); let expected = r#" -Projection: unnest_placeholder(unnest_table.struct_col).field1, unnest_placeholder(unnest_table.struct_col).field2, unnest_placeholder(unnest_table.array_col,depth=1) AS UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col - Unnest: lists[unnest_placeholder(unnest_table.array_col)|depth=1] structs[unnest_placeholder(unnest_table.struct_col)] - Projection: unnest_table.struct_col AS unnest_placeholder(unnest_table.struct_col), unnest_table.array_col AS unnest_placeholder(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col +Projection: __unnest_placeholder(unnest_table.struct_col).field1, __unnest_placeholder(unnest_table.struct_col).field2, __unnest_placeholder(unnest_table.array_col,depth=1) AS UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col + Unnest: lists[__unnest_placeholder(unnest_table.array_col)|depth=1] structs[__unnest_placeholder(unnest_table.struct_col)] + Projection: unnest_table.struct_col AS __unnest_placeholder(unnest_table.struct_col), unnest_table.array_col AS __unnest_placeholder(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col TableScan: unnest_table"#.trim_start(); assert_eq!(plan.to_string(), expected); diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt index fc22cc8bf7a7..24efb33f7896 100644 --- a/datafusion/sqllogictest/test_files/encoding.slt +++ b/datafusion/sqllogictest/test_files/encoding.slt @@ -101,4 +101,4 @@ FROM test_utf8view; Andrew QW5kcmV3 416e64726577 X WA 58 Xiangpeng WGlhbmdwZW5n 5869616e6770656e67 Xiangpeng WGlhbmdwZW5n 5869616e6770656e67 Raphael UmFwaGFlbA 5261706861656c R Ug 52 -NULL NULL NULL R Ug 52 \ No newline at end of file +NULL NULL NULL R Ug 52 diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 62f625119897..49aaa877caa6 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -4058,9 +4058,9 @@ logical_plan 03)----TableScan: join_t1 projection=[t1_id, t1_name] 04)--SubqueryAlias: series 05)----Subquery: -06)------Projection: unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS i -07)--------Unnest: lists[unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[] -08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int))) +06)------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS i +07)--------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[] +08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int))) 09)------------EmptyRelation physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(UInt32, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" }) @@ -4081,9 +4081,9 @@ logical_plan 03)----TableScan: join_t1 projection=[t1_id, t1_name] 04)--SubqueryAlias: series 05)----Subquery: -06)------Projection: unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS i -07)--------Unnest: lists[unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[] -08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int))) +06)------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS i +07)--------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[] +08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int))) 09)------------EmptyRelation physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(UInt32, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" }) diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt index 86aa07b04ce1..64cc51b3c4ff 100644 --- a/datafusion/sqllogictest/test_files/push_down_filter.slt +++ b/datafusion/sqllogictest/test_files/push_down_filter.slt @@ -36,9 +36,9 @@ query TT explain select uc2 from (select unnest(column2) as uc2, column1 from v) where column1 = 2; ---- logical_plan -01)Projection: unnest_placeholder(v.column2,depth=1) AS uc2 -02)--Unnest: lists[unnest_placeholder(v.column2)|depth=1] structs[] -03)----Projection: v.column2 AS unnest_placeholder(v.column2), v.column1 +01)Projection: __unnest_placeholder(v.column2,depth=1) AS uc2 +02)--Unnest: lists[__unnest_placeholder(v.column2)|depth=1] structs[] +03)----Projection: v.column2 AS __unnest_placeholder(v.column2), v.column1 04)------Filter: v.column1 = Int64(2) 05)--------TableScan: v projection=[column1, column2] @@ -53,11 +53,11 @@ query TT explain select uc2 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3; ---- logical_plan -01)Projection: unnest_placeholder(v.column2,depth=1) AS uc2 -02)--Filter: unnest_placeholder(v.column2,depth=1) > Int64(3) -03)----Projection: unnest_placeholder(v.column2,depth=1) -04)------Unnest: lists[unnest_placeholder(v.column2)|depth=1] structs[] -05)--------Projection: v.column2 AS unnest_placeholder(v.column2), v.column1 +01)Projection: __unnest_placeholder(v.column2,depth=1) AS uc2 +02)--Filter: __unnest_placeholder(v.column2,depth=1) > Int64(3) +03)----Projection: __unnest_placeholder(v.column2,depth=1) +04)------Unnest: lists[__unnest_placeholder(v.column2)|depth=1] structs[] +05)--------Projection: v.column2 AS __unnest_placeholder(v.column2), v.column1 06)----------TableScan: v projection=[column1, column2] query II @@ -71,10 +71,10 @@ query TT explain select uc2, column1 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3 AND column1 = 2; ---- logical_plan -01)Projection: unnest_placeholder(v.column2,depth=1) AS uc2, v.column1 -02)--Filter: unnest_placeholder(v.column2,depth=1) > Int64(3) -03)----Unnest: lists[unnest_placeholder(v.column2)|depth=1] structs[] -04)------Projection: v.column2 AS unnest_placeholder(v.column2), v.column1 +01)Projection: __unnest_placeholder(v.column2,depth=1) AS uc2, v.column1 +02)--Filter: __unnest_placeholder(v.column2,depth=1) > Int64(3) +03)----Unnest: lists[__unnest_placeholder(v.column2)|depth=1] structs[] +04)------Projection: v.column2 AS __unnest_placeholder(v.column2), v.column1 05)--------Filter: v.column1 = Int64(2) 06)----------TableScan: v projection=[column1, column2] @@ -90,10 +90,10 @@ query TT explain select uc2, column1 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3 OR column1 = 2; ---- logical_plan -01)Projection: unnest_placeholder(v.column2,depth=1) AS uc2, v.column1 -02)--Filter: unnest_placeholder(v.column2,depth=1) > Int64(3) OR v.column1 = Int64(2) -03)----Unnest: lists[unnest_placeholder(v.column2)|depth=1] structs[] -04)------Projection: v.column2 AS unnest_placeholder(v.column2), v.column1 +01)Projection: __unnest_placeholder(v.column2,depth=1) AS uc2, v.column1 +02)--Filter: __unnest_placeholder(v.column2,depth=1) > Int64(3) OR v.column1 = Int64(2) +03)----Unnest: lists[__unnest_placeholder(v.column2)|depth=1] structs[] +04)------Projection: v.column2 AS __unnest_placeholder(v.column2), v.column1 05)--------TableScan: v projection=[column1, column2] statement ok @@ -112,10 +112,10 @@ query TT explain select * from (select column1, unnest(column2) as o from d) where o['a'] = 1; ---- logical_plan -01)Projection: d.column1, unnest_placeholder(d.column2,depth=1) AS o -02)--Filter: get_field(unnest_placeholder(d.column2,depth=1), Utf8("a")) = Int64(1) -03)----Unnest: lists[unnest_placeholder(d.column2)|depth=1] structs[] -04)------Projection: d.column1, d.column2 AS unnest_placeholder(d.column2) +01)Projection: d.column1, __unnest_placeholder(d.column2,depth=1) AS o +02)--Filter: get_field(__unnest_placeholder(d.column2,depth=1), Utf8("a")) = Int64(1) +03)----Unnest: lists[__unnest_placeholder(d.column2)|depth=1] structs[] +04)------Projection: d.column1, d.column2 AS __unnest_placeholder(d.column2) 05)--------TableScan: d projection=[column1, column2] diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index 12402e0d70c5..79294993dded 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -139,4 +139,4 @@ SELECT generate_series(1, t1.end) FROM generate_series(3, 5) as t1(end) ---- [1, 2, 3, 4, 5] [1, 2, 3, 4] -[1, 2, 3] \ No newline at end of file +[1, 2, 3] diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index d409e0902f7e..1c54006bd2a0 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -594,17 +594,17 @@ query TT explain select unnest(unnest(column3)), column3 from recursive_unnest_table; ---- logical_plan -01)Unnest: lists[] structs[unnest_placeholder(UNNEST(recursive_unnest_table.column3))] -02)--Projection: unnest_placeholder(recursive_unnest_table.column3,depth=1) AS UNNEST(recursive_unnest_table.column3) AS unnest_placeholder(UNNEST(recursive_unnest_table.column3)), recursive_unnest_table.column3 -03)----Unnest: lists[unnest_placeholder(recursive_unnest_table.column3)|depth=1] structs[] -04)------Projection: recursive_unnest_table.column3 AS unnest_placeholder(recursive_unnest_table.column3), recursive_unnest_table.column3 +01)Unnest: lists[] structs[__unnest_placeholder(UNNEST(recursive_unnest_table.column3))] +02)--Projection: __unnest_placeholder(recursive_unnest_table.column3,depth=1) AS UNNEST(recursive_unnest_table.column3) AS __unnest_placeholder(UNNEST(recursive_unnest_table.column3)), recursive_unnest_table.column3 +03)----Unnest: lists[__unnest_placeholder(recursive_unnest_table.column3)|depth=1] structs[] +04)------Projection: recursive_unnest_table.column3 AS __unnest_placeholder(recursive_unnest_table.column3), recursive_unnest_table.column3 05)--------TableScan: recursive_unnest_table projection=[column3] physical_plan 01)UnnestExec 02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -03)----ProjectionExec: expr=[unnest_placeholder(recursive_unnest_table.column3,depth=1)@0 as unnest_placeholder(UNNEST(recursive_unnest_table.column3)), column3@1 as column3] +03)----ProjectionExec: expr=[__unnest_placeholder(recursive_unnest_table.column3,depth=1)@0 as __unnest_placeholder(UNNEST(recursive_unnest_table.column3)), column3@1 as column3] 04)------UnnestExec -05)--------ProjectionExec: expr=[column3@0 as unnest_placeholder(recursive_unnest_table.column3), column3@0 as column3] +05)--------ProjectionExec: expr=[column3@0 as __unnest_placeholder(recursive_unnest_table.column3), column3@0 as column3] 06)----------MemoryExec: partitions=1, partition_sizes=[1] ## unnest->field_access->unnest->unnest @@ -650,19 +650,19 @@ query TT explain select unnest(unnest(unnest(column3)['c1'])), column3 from recursive_unnest_table; ---- logical_plan -01)Projection: unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1],depth=2) AS UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), recursive_unnest_table.column3 -02)--Unnest: lists[unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1])|depth=2] structs[] -03)----Projection: get_field(unnest_placeholder(recursive_unnest_table.column3,depth=1) AS UNNEST(recursive_unnest_table.column3), Utf8("c1")) AS unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1]), recursive_unnest_table.column3 -04)------Unnest: lists[unnest_placeholder(recursive_unnest_table.column3)|depth=1] structs[] -05)--------Projection: recursive_unnest_table.column3 AS unnest_placeholder(recursive_unnest_table.column3), recursive_unnest_table.column3 +01)Projection: __unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1],depth=2) AS UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), recursive_unnest_table.column3 +02)--Unnest: lists[__unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1])|depth=2] structs[] +03)----Projection: get_field(__unnest_placeholder(recursive_unnest_table.column3,depth=1) AS UNNEST(recursive_unnest_table.column3), Utf8("c1")) AS __unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1]), recursive_unnest_table.column3 +04)------Unnest: lists[__unnest_placeholder(recursive_unnest_table.column3)|depth=1] structs[] +05)--------Projection: recursive_unnest_table.column3 AS __unnest_placeholder(recursive_unnest_table.column3), recursive_unnest_table.column3 06)----------TableScan: recursive_unnest_table projection=[column3] physical_plan -01)ProjectionExec: expr=[unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1],depth=2)@0 as UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), column3@1 as column3] +01)ProjectionExec: expr=[__unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1],depth=2)@0 as UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), column3@1 as column3] 02)--UnnestExec -03)----ProjectionExec: expr=[get_field(unnest_placeholder(recursive_unnest_table.column3,depth=1)@0, c1) as unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1]), column3@1 as column3] +03)----ProjectionExec: expr=[get_field(__unnest_placeholder(recursive_unnest_table.column3,depth=1)@0, c1) as __unnest_placeholder(UNNEST(recursive_unnest_table.column3)[c1]), column3@1 as column3] 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 05)--------UnnestExec -06)----------ProjectionExec: expr=[column3@0 as unnest_placeholder(recursive_unnest_table.column3), column3@0 as column3] +06)----------ProjectionExec: expr=[column3@0 as __unnest_placeholder(recursive_unnest_table.column3), column3@0 as column3] 07)------------MemoryExec: partitions=1, partition_sizes=[1] diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 6c48ac68ab6b..188e2ae0915f 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -5127,4 +5127,3 @@ order by id; statement ok drop table t1; -