From 38a8e4e7d9dd1449778c0c4f0aaa28ab66e66da4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 24 Nov 2024 07:31:32 -0500 Subject: [PATCH] Update for new sqlparser API --- datafusion/common/src/utils/mod.rs | 5 +- .../user_defined_scalar_functions.rs | 10 +- datafusion/expr/src/expr.rs | 2 + datafusion/sql/src/expr/function.rs | 5 + datafusion/sql/src/expr/mod.rs | 4 +- datafusion/sql/src/parser.rs | 9 ++ datafusion/sql/src/planner.rs | 3 +- datafusion/sql/src/select.rs | 1 + datafusion/sql/src/statement.rs | 144 +++++++++++++----- datafusion/sql/src/unparser/ast.rs | 3 + datafusion/sql/src/unparser/dialect.rs | 5 +- datafusion/sql/src/unparser/expr.rs | 16 +- datafusion/sql/src/unparser/plan.rs | 9 +- datafusion/sql/src/unparser/utils.rs | 11 +- .../test_files/information_schema.slt | 4 +- 15 files changed, 172 insertions(+), 59 deletions(-) diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index f3bba8e254d98..c4a43446e1e59 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -775,10 +775,10 @@ pub fn get_available_parallelism() -> usize { #[cfg(test)] mod tests { + use super::*; use crate::ScalarValue::Null; use arrow::array::Float64Array; - - use super::*; + use sqlparser::tokenizer::Span; #[test] fn test_bisect_linear_left_and_right() -> Result<()> { @@ -1006,6 +1006,7 @@ mod tests { let expected_parsed = vec![Ident { value: identifier.to_string(), quote_style, + span: Span::empty(), }]; assert_eq!( diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index a59394f908144..79577eaa1d509 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -27,10 +27,6 @@ use arrow_array::{ Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray, }; use arrow_schema::{DataType, Field, Schema}; -use parking_lot::Mutex; -use regex::Regex; -use sqlparser::ast::Ident; - use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionState}; use datafusion::prelude::*; use datafusion::{execution::registry::FunctionRegistry, test_util}; @@ -48,6 +44,10 @@ use datafusion_expr::{ Volatility, }; use datafusion_functions_nested::range::range_udf; +use parking_lot::Mutex; +use regex::Regex; +use sqlparser::ast::Ident; +use sqlparser::tokenizer::Span; /// test that casting happens on udfs. /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and @@ -1187,6 +1187,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( name: Some(Ident { value: "name".into(), quote_style: None, + span: Span::empty(), }), data_type: DataType::Utf8, default_expr: None, @@ -1196,6 +1197,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( language: Some(Ident { value: "plrust".into(), quote_style: None, + span: Span::empty(), }), behavior: None, function_body: Some(lit(body)), diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c495b5396f532..d0e98327f34b0 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -222,6 +222,8 @@ use sqlparser::ast::{ /// // to 42 = 5 AND b = 6 /// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6)))); #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +// TODO make the enum smaller with more boxing (looks like Wildcard is now bigger) +#[allow(clippy::large_enum_variant)] pub enum Expr { /// An expression with a specific name. Alias(Alias), diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 67fa23b869903..15b415e3dc427 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -169,6 +169,11 @@ impl FunctionArgs { "Calling {name}: SEPARATOR not supported in function arguments: {sep}" ) } + FunctionArgumentClause::JsonNullClause(jn) => { + return not_impl_err!( + "Calling {name}: JSON NULL clause not supported in function arguments: {jn}" + ) + } } } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 57ac96951f1f2..0195f3d320326 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -565,11 +565,11 @@ impl SqlToRel<'_, S> { } not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}") } - SQLExpr::Wildcard => Ok(Expr::Wildcard { + SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard { qualifier: None, options: WildcardOptions::default(), }), - SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard { + SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard { qualifier: Some(self.object_name_to_table_reference(object_name)?), options: WildcardOptions::default(), }), diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index bd1ed3145ef5d..21685949af06c 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -875,6 +875,7 @@ mod tests { use super::*; use sqlparser::ast::Expr::Identifier; use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident}; + use sqlparser::tokenizer::Span; fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> { let statements = DFParser::parse_sql(sql)?; @@ -910,6 +911,7 @@ mod tests { name: Ident { value: name.into(), quote_style: None, + span: Span::empty(), }, data_type, collation: None, @@ -1218,6 +1220,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc, nulls_first, @@ -1249,6 +1252,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(true), nulls_first: None, @@ -1258,6 +1262,7 @@ mod tests { expr: Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(false), nulls_first: Some(true), @@ -1289,11 +1294,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), @@ -1334,11 +1341,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 382c503154ddd..c8167138d3bfc 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -339,7 +339,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { plan: LogicalPlan, alias: TableAlias, ) -> Result { - let plan = self.apply_expr_alias(plan, alias.columns)?; + let idents = alias.columns.into_iter().map(|c| c.name).collect(); + let plan = self.apply_expr_alias(plan, idents)?; LogicalPlanBuilder::from(plan) .alias(TableReference::bare( diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 39b6eb6e81327..9899d92bde715 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -654,6 +654,7 @@ impl SqlToRel<'_, S> { opt_rename, opt_replace: _opt_replace, opt_ilike: _opt_ilike, + wildcard_token: _wildcard_token, } = options; if opt_rename.is_some() { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 38695f98b5fe7..47a842643b8d5 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -54,13 +54,13 @@ use datafusion_expr::{ TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, }; -use sqlparser::ast::{self, SqliteOnConflict}; +use sqlparser::ast::{self, ShowStatementIn, ShowStatementOptions, SqliteOnConflict}; use sqlparser::ast::{ Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr, - ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, - TableWithJoins, TransactionMode, UnaryOperator, Value, + ShowCreateObject, Statement, TableConstraint, TableFactor, TableWithJoins, + TransactionMode, UnaryOperator, Value, }; use sqlparser::parser::ParserError::ParserError; @@ -685,19 +685,99 @@ impl SqlToRel<'_, S> { Statement::ShowTables { extended, full, - db_name, - filter, - // SHOW TABLES IN/FROM are equivalent, this field specifies which the user - // specified, but it doesn't affect the plan so ignore the field - clause: _, - } => self.show_tables_to_plan(extended, full, db_name, filter), + terse, + history, + external, + show_options, + } => { + // We only support the basic "SHOW TABLES" + // https://github.com/apache/datafusion/issues/3188 + if extended { + return not_impl_err!("SHOW TABLES EXTENDED not supported")?; + } + if full { + return not_impl_err!("SHOW FULL TABLES not supported")?; + } + if terse { + return not_impl_err!("SHOW TERSE TABLES not supported")?; + } + if history { + return not_impl_err!("SHOW TABLES HISTORY not supported")?; + } + if external { + return not_impl_err!("SHOW EXTERNAL TABLES not supported")?; + } + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if show_in.is_some() { + return not_impl_err!("SHOW TABLES IN not supported")?; + } + if starts_with.is_some() { + return not_impl_err!("SHOW TABLES LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW TABLES LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW TABLES LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!("SHOW TABLES FILTER not supported")?; + } + self.show_tables_to_plan() + } Statement::ShowColumns { extended, full, - table_name, - filter, - } => self.show_columns_to_plan(extended, full, table_name, filter), + show_options, + } => { + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if starts_with.is_some() { + return not_impl_err!("SHOW COLUMNS LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!( + "SHOW COLUMNS with WHERE or LIKE is not supported" + )?; + } + let Some(ShowStatementIn { + // specifies if the syntax was `SHOW COLUMNS IN` or `SHOW + // COLUMNS FROM` which is not different in DataFusion + clause: _, + parent_type, + parent_name, + }) = show_in + else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + if let Some(parent_type) = parent_type { + return not_impl_err!("SHOW COLUMNS IN {parent_type} not supported"); + } + let Some(table_name) = parent_name else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + self.show_columns_to_plan(extended, full, table_name) + } Statement::Insert(Insert { or, @@ -766,10 +846,14 @@ impl SqlToRel<'_, S> { from, selection, returning, + or, } => { if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } + if or.is_some() { + plan_err!("ON conflict not supported")?; + } self.update_to_plan(table, assignments, from, selection) } @@ -1065,24 +1149,12 @@ impl SqlToRel<'_, S> { } /// Generate a logical plan from a "SHOW TABLES" query - fn show_tables_to_plan( - &self, - extended: bool, - full: bool, - db_name: Option, - filter: Option, - ) -> Result { + fn show_tables_to_plan(&self) -> Result { if self.has_table("information_schema", "tables") { - // We only support the basic "SHOW TABLES" - // https://github.com/apache/datafusion/issues/3188 - if db_name.is_some() || filter.is_some() || full || extended { - plan_err!("Unsupported parameters to SHOW TABLES") - } else { - let query = "SELECT * FROM information_schema.tables;"; - let mut rewrite = DFParser::parse_sql(query)?; - assert_eq!(rewrite.len(), 1); - self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 - } + let query = "SELECT * FROM information_schema.tables;"; + let mut rewrite = DFParser::parse_sql(query)?; + assert_eq!(rewrite.len(), 1); + self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 } else { plan_err!("SHOW TABLES is not supported unless information_schema is enabled") } @@ -1842,22 +1914,18 @@ impl SqlToRel<'_, S> { extended: bool, full: bool, sql_table_name: ObjectName, - filter: Option, ) -> Result { - if filter.is_some() { - return plan_err!("SHOW COLUMNS with WHERE or LIKE is not supported"); - } + // Figure out the where clause + let where_clause = object_name_to_qualifier( + &sql_table_name, + self.options.enable_ident_normalization, + ); if !self.has_table("information_schema", "columns") { return plan_err!( "SHOW COLUMNS is not supported unless information_schema is enabled" ); } - // Figure out the where clause - let where_clause = object_name_to_qualifier( - &sql_table_name, - self.options.enable_ident_normalization, - ); // Do a table lookup to verify the table exists let table_ref = self.object_name_to_table_reference(sql_table_name)?; diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index cc0812cd71e12..8812bbcbe0b74 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -24,6 +24,7 @@ use core::fmt; use sqlparser::ast; +use sqlparser::ast::helpers::attached_token::AttachedToken; #[derive(Clone)] pub(super) struct QueryBuilder { @@ -268,6 +269,7 @@ impl SelectBuilder { connect_by: None, window_before_qualify: false, prewhere: None, + select_token: AttachedToken::empty(), }) } fn create_empty() -> Self { @@ -458,6 +460,7 @@ impl TableRelationBuilder { version: self.version.clone(), partitions: self.partitions.clone(), with_ordinality: false, + json_path: None, }) } fn create_empty() -> Self { diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 57c89f74f5c94..69296ad584c0d 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -18,15 +18,15 @@ use std::sync::Arc; use arrow_schema::TimeUnit; +use datafusion_common::Result; use datafusion_expr::Expr; use regex::Regex; +use sqlparser::tokenizer::Span; use sqlparser::{ ast::{self, Function, Ident, ObjectName, TimezoneInfo}, keywords::ALL_KEYWORDS, }; -use datafusion_common::Result; - use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser}; /// `Dialect` to use for Unparsing @@ -272,6 +272,7 @@ impl PostgreSqlDialect { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 70246360450d8..02d9d56f68306 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -43,6 +43,8 @@ use datafusion_expr::{ expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, }; +use sqlparser::ast::helpers::attached_token::AttachedToken; +use sqlparser::tokenizer::Span; /// Convert a DataFusion [`Expr`] to [`ast::Expr`] /// @@ -233,6 +235,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -278,6 +281,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: agg @@ -404,12 +408,16 @@ impl Unparser<'_> { } // TODO: unparsing wildcard addition options Expr::Wildcard { qualifier, .. } => { + let attached_token = AttachedToken::empty(); if let Some(qualifier) = qualifier { let idents: Vec = qualifier.to_vec().into_iter().map(Ident::new).collect(); - Ok(ast::Expr::QualifiedWildcard(ObjectName(idents))) + Ok(ast::Expr::QualifiedWildcard( + ObjectName(idents), + attached_token, + )) } else { - Ok(ast::Expr::Wildcard) + Ok(ast::Expr::Wildcard(attached_token)) } } Expr::GroupingSet(grouping_set) => match grouping_set { @@ -480,6 +488,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -709,6 +718,7 @@ impl Unparser<'_> { Ident { value: ident, quote_style, + span: Span::empty(), } } @@ -716,6 +726,7 @@ impl Unparser<'_> { Ident { value: str, quote_style: None, + span: Span::empty(), } } @@ -1481,6 +1492,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: "UNNEST".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 81e47ed939f22..1a1d4c42b0ffd 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -42,7 +42,7 @@ use datafusion_expr::{ expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, }; -use sqlparser::ast::{self, Ident, SetExpr}; +use sqlparser::ast::{self, Ident, SetExpr, TableAliasColumnDef}; use std::sync::Arc; /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`] @@ -1006,6 +1006,13 @@ impl Unparser<'_> { } fn new_table_alias(&self, alias: String, columns: Vec) -> ast::TableAlias { + let columns = columns + .into_iter() + .map(|ident| TableAliasColumnDef { + name: ident, + data_type: None, + }) + .collect(); ast::TableAlias { name: self.new_ident_quoted_if_needs(alias), columns, diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index c77af79243849..38dcbe981efab 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -17,6 +17,10 @@ use std::{cmp::Ordering, sync::Arc, vec}; +use super::{ + dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, + rewrite::TableAliasRewriter, Unparser, +}; use datafusion_common::{ internal_err, tree_node::{Transformed, TransformedResult, TreeNode}, @@ -28,11 +32,7 @@ use datafusion_expr::{ }; use indexmap::IndexSet; use sqlparser::ast; - -use super::{ - dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, - rewrite::TableAliasRewriter, Unparser, -}; +use sqlparser::tokenizer::Span; /// Recursively searches children of [LogicalPlan] to find an Aggregate node if exists /// prior to encountering a Join, TableScan, or a nested subquery (derived table factor). @@ -425,6 +425,7 @@ pub(crate) fn date_part_to_sql( name: ast::ObjectName(vec![ast::Ident { value: "strftime".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 4d51a61c8a527..95098038ae2f3 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -483,10 +483,10 @@ set datafusion.catalog.information_schema = true; statement ok CREATE TABLE t AS SELECT 1::int as i; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t LIKE 'f'; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t WHERE column_name = 'bar'; query TTTTTT