From 95d296cd5aa1c58baf18e7625afe50f2e8989b07 Mon Sep 17 00:00:00 2001 From: Goksel Kabadayi <45314116+gokselk@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:32:56 +0300 Subject: [PATCH 1/9] Support n-ary monotonic functions in ordering equivalence (#13841) * Support n-ary monotonic functions in `discover_new_orderings` * Add tests for n-ary monotonic functions in `discover_new_orderings` * Fix tests * Fix non-monotonic test case * Fix unintended simplification * Minor comment changes * Fix tests * Add `preserves_lex_ordering` field * Use `preserves_lex_ordering` on `discover_new_orderings()` * Add `output_ordering` and `output_preserves_lex_ordering` implementations for `ConcatFunc` * Update tests * Move logic to UDF * Cargo fmt * Refactor * Cargo fmt * Simply use false value on default implementation * Remove unnecessary import * Clippy fix * Update Cargo.lock * Move dep to dev-dependencies * Rename output_preserves_lex_ordering to preserves_lex_ordering * minor --------- Co-authored-by: berkaysynnada --- datafusion/expr-common/src/sort_properties.rs | 21 +- datafusion/expr/src/udf.rs | 36 ++- datafusion/functions/src/string/concat.rs | 5 + datafusion/physical-expr/Cargo.toml | 1 + .../src/equivalence/properties.rs | 232 +++++++++++++++--- .../physical-expr/src/expressions/binary.rs | 8 + .../physical-expr/src/expressions/literal.rs | 1 + .../physical-expr/src/expressions/negative.rs | 1 + .../physical-expr/src/scalar_function.rs | 2 + 9 files changed, 269 insertions(+), 38 deletions(-) mode change 100644 => 100755 datafusion/physical-expr/src/equivalence/properties.rs diff --git a/datafusion/expr-common/src/sort_properties.rs b/datafusion/expr-common/src/sort_properties.rs index 7778be2ecf0d..5d17a34a96fb 100644 --- a/datafusion/expr-common/src/sort_properties.rs +++ b/datafusion/expr-common/src/sort_properties.rs @@ -129,19 +129,30 @@ impl Neg for SortProperties { } } -/// Represents the properties of a `PhysicalExpr`, including its sorting and range attributes. +/// Represents the properties of a `PhysicalExpr`, including its sorting, +/// range, and whether it preserves lexicographical ordering. #[derive(Debug, Clone)] pub struct ExprProperties { + /// Properties that describe the sorting behavior of the expression, + /// such as whether it is ordered, unordered, or a singleton value. pub sort_properties: SortProperties, + /// A closed interval representing the range of possible values for + /// the expression. Used to compute reliable bounds. pub range: Interval, + /// Indicates whether the expression preserves lexicographical ordering + /// of its inputs. For example, string concatenation preserves ordering, + /// while addition does not. + pub preserves_lex_ordering: bool, } impl ExprProperties { - /// Creates a new `ExprProperties` instance with unknown sort properties and unknown range. + /// Creates a new `ExprProperties` instance with unknown sort properties, + /// unknown range, and unknown lexicographical ordering preservation. pub fn new_unknown() -> Self { Self { sort_properties: SortProperties::default(), range: Interval::make_unbounded(&DataType::Null).unwrap(), + preserves_lex_ordering: false, } } @@ -156,4 +167,10 @@ impl ExprProperties { self.range = range; self } + + /// Sets whether the expression maintains lexicographical ordering and returns the modified instance. + pub fn with_preserves_lex_ordering(mut self, preserves_lex_ordering: bool) -> Self { + self.preserves_lex_ordering = preserves_lex_ordering; + self + } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 809c78f30eff..83200edfa24c 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -303,6 +303,10 @@ impl ScalarUDF { self.inner.output_ordering(inputs) } + pub fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result { + self.inner.preserves_lex_ordering(inputs) + } + /// See [`ScalarUDFImpl::coerce_types`] for more details. pub fn coerce_types(&self, arg_types: &[DataType]) -> Result> { self.inner.coerce_types(arg_types) @@ -650,10 +654,30 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { Ok(Some(vec![])) } - /// Calculates the [`SortProperties`] of this function based on its - /// children's properties. - fn output_ordering(&self, _inputs: &[ExprProperties]) -> Result { - Ok(SortProperties::Unordered) + /// Calculates the [`SortProperties`] of this function based on its children's properties. + fn output_ordering(&self, inputs: &[ExprProperties]) -> Result { + if !self.preserves_lex_ordering(inputs)? { + return Ok(SortProperties::Unordered); + } + + let Some(first_order) = inputs.first().map(|p| &p.sort_properties) else { + return Ok(SortProperties::Singleton); + }; + + if inputs + .iter() + .skip(1) + .all(|input| &input.sort_properties == first_order) + { + Ok(*first_order) + } else { + Ok(SortProperties::Unordered) + } + } + + /// Whether the function preserves lexicographical ordering based on the input ordering + fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result { + Ok(false) } /// Coerce arguments of a function call to types that the function can evaluate. @@ -809,6 +833,10 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { self.inner.output_ordering(inputs) } + fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result { + self.inner.preserves_lex_ordering(inputs) + } + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { self.inner.coerce_types(arg_types) } diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index 895a7cdbf308..f4848c131504 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -17,6 +17,7 @@ use arrow::array::{as_largestring_array, Array}; use arrow::datatypes::DataType; +use datafusion_expr::sort_properties::ExprProperties; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -265,6 +266,10 @@ impl ScalarUDFImpl for ConcatFunc { fn documentation(&self) -> Option<&Documentation> { Some(get_concat_doc()) } + + fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result { + Ok(true) + } } static DOCUMENTATION: OnceLock = OnceLock::new(); diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index db3e0e10d816..d1de63a1e8fc 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -57,6 +57,7 @@ petgraph = "0.6.2" [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } criterion = "0.5" +datafusion-functions = { workspace = true } rand = { workspace = true } rstest = { workspace = true } diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs old mode 100644 new mode 100755 index e37e6b8abdb3..d4814fb4d780 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -346,40 +346,61 @@ impl EquivalenceProperties { .unwrap_or_else(|| vec![Arc::clone(&normalized_expr)]); let mut new_orderings: Vec = vec![]; - for (ordering, next_expr) in self - .normalized_oeq_class() - .iter() - .filter(|ordering| ordering[0].expr.eq(&normalized_expr)) - // First expression after leading ordering - .filter_map(|ordering| Some(ordering).zip(ordering.inner.get(1))) - { - let leading_ordering = ordering[0].options; - // Currently, we only handle expressions with a single child. - // TODO: It should be possible to handle expressions orderings like - // f(a, b, c), a, b, c if f is monotonic in all arguments. + for ordering in self.normalized_oeq_class().iter() { + if !ordering[0].expr.eq(&normalized_expr) { + continue; + } + + let leading_ordering_options = ordering[0].options; + for equivalent_expr in &eq_class { let children = equivalent_expr.children(); - if children.len() == 1 - && children[0].eq(&next_expr.expr) - && SortProperties::Ordered(leading_ordering) - == equivalent_expr - .get_properties(&[ExprProperties { - sort_properties: SortProperties::Ordered( - leading_ordering, - ), - range: Interval::make_unbounded( - &equivalent_expr.data_type(&self.schema)?, - )?, - }])? - .sort_properties - { - // Assume existing ordering is [a ASC, b ASC] - // When equality a = f(b) is given, If we know that given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid, - // then we can deduce that ordering `[b ASC]` is also valid. - // Hence, ordering `[b ASC]` can be added to the state as valid ordering. - // (e.g. existing ordering where leading ordering is removed) - new_orderings.push(LexOrdering::new(ordering[1..].to_vec())); - break; + if children.is_empty() { + continue; + } + + // Check if all children match the next expressions in the ordering + let mut all_children_match = true; + let mut child_properties = vec![]; + + // Build properties for each child based on the next expressions + for (i, child) in children.iter().enumerate() { + if let Some(next) = ordering.get(i + 1) { + if !child.as_ref().eq(next.expr.as_ref()) { + all_children_match = false; + break; + } + child_properties.push(ExprProperties { + sort_properties: SortProperties::Ordered(next.options), + range: Interval::make_unbounded( + &child.data_type(&self.schema)?, + )?, + preserves_lex_ordering: true, + }); + } else { + all_children_match = false; + break; + } + } + + if all_children_match { + // Check if the expression is monotonic in all arguments + if let Ok(expr_properties) = + equivalent_expr.get_properties(&child_properties) + { + if expr_properties.preserves_lex_ordering + && SortProperties::Ordered(leading_ordering_options) + == expr_properties.sort_properties + { + // Assume existing ordering is [c ASC, a ASC, b ASC] + // When equality c = f(a,b) is given, if we know that given ordering `[a ASC, b ASC]`, + // ordering `[f(a,b) ASC]` is valid, then we can deduce that ordering `[a ASC, b ASC]` is also valid. + // Hence, ordering `[a ASC, b ASC]` can be added to the state as a valid ordering. + // (e.g. existing ordering where leading ordering is removed) + new_orderings.push(LexOrdering::new(ordering[1..].to_vec())); + break; + } + } } } } @@ -1428,16 +1449,19 @@ fn get_expr_properties( Ok(ExprProperties { sort_properties: SortProperties::Ordered(column_order.options), range: Interval::make_unbounded(&expr.data_type(schema)?)?, + preserves_lex_ordering: false, }) } else if expr.as_any().downcast_ref::().is_some() { Ok(ExprProperties { sort_properties: SortProperties::Unordered, range: Interval::make_unbounded(&expr.data_type(schema)?)?, + preserves_lex_ordering: false, }) } else if let Some(literal) = expr.as_any().downcast_ref::() { Ok(ExprProperties { sort_properties: SortProperties::Singleton, range: Interval::try_new(literal.value().clone(), literal.value().clone())?, + preserves_lex_ordering: true, }) } else { // Find orderings of its children @@ -2143,11 +2167,14 @@ mod tests { create_test_params, create_test_schema, output_schema, }; use crate::expressions::{col, BinaryExpr, Column}; + use crate::ScalarFunctionExpr; use arrow::datatypes::{DataType, Field, Schema}; use arrow_schema::{Fields, TimeUnit}; use datafusion_expr::Operator; + use datafusion_functions::string::concat; + #[test] fn project_equivalence_properties_test() -> Result<()> { let input_schema = Arc::new(Schema::new(vec![ @@ -3684,4 +3711,145 @@ mod tests { sort_expr } + + #[test] + fn test_ordering_equivalence_with_lex_monotonic_concat() -> Result<()> { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Utf8, false), + Field::new("b", DataType::Utf8, false), + Field::new("c", DataType::Utf8, false), + ])); + + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + let col_c = col("c", &schema)?; + + let a_concat_b: Arc = Arc::new(ScalarFunctionExpr::new( + "concat", + concat(), + vec![Arc::clone(&col_a), Arc::clone(&col_b)], + DataType::Utf8, + )); + + // Assume existing ordering is [c ASC, a ASC, b ASC] + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + eq_properties.add_new_ordering(LexOrdering::from(vec![ + PhysicalSortExpr::new_default(Arc::clone(&col_c)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_a)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_b)).asc(), + ])); + + // Add equality condition c = concat(a, b) + eq_properties.add_equal_conditions(&col_c, &a_concat_b)?; + + let orderings = eq_properties.oeq_class().orderings.clone(); + + let expected_ordering1 = + LexOrdering::from(vec![ + PhysicalSortExpr::new_default(Arc::clone(&col_c)).asc() + ]); + let expected_ordering2 = LexOrdering::from(vec![ + PhysicalSortExpr::new_default(Arc::clone(&col_a)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_b)).asc(), + ]); + + // The ordering should be [c ASC] and [a ASC, b ASC] + assert_eq!(orderings.len(), 2); + assert!(orderings.contains(&expected_ordering1)); + assert!(orderings.contains(&expected_ordering2)); + + Ok(()) + } + + #[test] + fn test_ordering_equivalence_with_non_lex_monotonic_multiply() -> Result<()> { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + ])); + + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + let col_c = col("c", &schema)?; + + let a_times_b: Arc = Arc::new(BinaryExpr::new( + Arc::clone(&col_a), + Operator::Multiply, + Arc::clone(&col_b), + )); + + // Assume existing ordering is [c ASC, a ASC, b ASC] + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + let initial_ordering = LexOrdering::from(vec![ + PhysicalSortExpr::new_default(Arc::clone(&col_c)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_a)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_b)).asc(), + ]); + + eq_properties.add_new_ordering(initial_ordering.clone()); + + // Add equality condition c = a * b + eq_properties.add_equal_conditions(&col_c, &a_times_b)?; + + let orderings = eq_properties.oeq_class().orderings.clone(); + + // The ordering should remain unchanged since multiplication is not lex-monotonic + assert_eq!(orderings.len(), 1); + assert!(orderings.contains(&initial_ordering)); + + Ok(()) + } + + #[test] + fn test_ordering_equivalence_with_concat_equality() -> Result<()> { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Utf8, false), + Field::new("b", DataType::Utf8, false), + Field::new("c", DataType::Utf8, false), + ])); + + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + let col_c = col("c", &schema)?; + + let a_concat_b: Arc = Arc::new(ScalarFunctionExpr::new( + "concat", + concat(), + vec![Arc::clone(&col_a), Arc::clone(&col_b)], + DataType::Utf8, + )); + + // Assume existing ordering is [concat(a, b) ASC, a ASC, b ASC] + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + eq_properties.add_new_ordering(LexOrdering::from(vec![ + PhysicalSortExpr::new_default(Arc::clone(&a_concat_b)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_a)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_b)).asc(), + ])); + + // Add equality condition c = concat(a, b) + eq_properties.add_equal_conditions(&col_c, &a_concat_b)?; + + let orderings = eq_properties.oeq_class().orderings.clone(); + + let expected_ordering1 = LexOrdering::from(vec![PhysicalSortExpr::new_default( + Arc::clone(&a_concat_b), + ) + .asc()]); + let expected_ordering2 = LexOrdering::from(vec![ + PhysicalSortExpr::new_default(Arc::clone(&col_a)).asc(), + PhysicalSortExpr::new_default(Arc::clone(&col_b)).asc(), + ]); + + // The ordering should be [concat(a, b) ASC] and [a ASC, b ASC] + assert_eq!(orderings.len(), 2); + assert!(orderings.contains(&expected_ordering1)); + assert!(orderings.contains(&expected_ordering2)); + + Ok(()) + } } diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index ae2bfe5b0bd4..938d775a2ad1 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -503,34 +503,42 @@ impl PhysicalExpr for BinaryExpr { Operator::Plus => Ok(ExprProperties { sort_properties: l_order.add(&r_order), range: l_range.add(r_range)?, + preserves_lex_ordering: false, }), Operator::Minus => Ok(ExprProperties { sort_properties: l_order.sub(&r_order), range: l_range.sub(r_range)?, + preserves_lex_ordering: false, }), Operator::Gt => Ok(ExprProperties { sort_properties: l_order.gt_or_gteq(&r_order), range: l_range.gt(r_range)?, + preserves_lex_ordering: false, }), Operator::GtEq => Ok(ExprProperties { sort_properties: l_order.gt_or_gteq(&r_order), range: l_range.gt_eq(r_range)?, + preserves_lex_ordering: false, }), Operator::Lt => Ok(ExprProperties { sort_properties: r_order.gt_or_gteq(&l_order), range: l_range.lt(r_range)?, + preserves_lex_ordering: false, }), Operator::LtEq => Ok(ExprProperties { sort_properties: r_order.gt_or_gteq(&l_order), range: l_range.lt_eq(r_range)?, + preserves_lex_ordering: false, }), Operator::And => Ok(ExprProperties { sort_properties: r_order.and_or(&l_order), range: l_range.and(r_range)?, + preserves_lex_ordering: false, }), Operator::Or => Ok(ExprProperties { sort_properties: r_order.and_or(&l_order), range: l_range.or(r_range)?, + preserves_lex_ordering: false, }), _ => Ok(ExprProperties::new_unknown()), } diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs index f0d02eb605b2..c594f039ff2f 100644 --- a/datafusion/physical-expr/src/expressions/literal.rs +++ b/datafusion/physical-expr/src/expressions/literal.rs @@ -90,6 +90,7 @@ impl PhysicalExpr for Literal { Ok(ExprProperties { sort_properties: SortProperties::Singleton, range: Interval::try_new(self.value().clone(), self.value().clone())?, + preserves_lex_ordering: true, }) } } diff --git a/datafusion/physical-expr/src/expressions/negative.rs b/datafusion/physical-expr/src/expressions/negative.rs index 6235845fc028..03f2111aca33 100644 --- a/datafusion/physical-expr/src/expressions/negative.rs +++ b/datafusion/physical-expr/src/expressions/negative.rs @@ -145,6 +145,7 @@ impl PhysicalExpr for NegativeExpr { Ok(ExprProperties { sort_properties: -children[0].sort_properties, range: children[0].range.clone().arithmetic_negate()?, + preserves_lex_ordering: false, }) } } diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index e312d5de59fb..82c718cfaca3 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -202,6 +202,7 @@ impl PhysicalExpr for ScalarFunctionExpr { fn get_properties(&self, children: &[ExprProperties]) -> Result { let sort_properties = self.fun.output_ordering(children)?; + let preserves_lex_ordering = self.fun.preserves_lex_ordering(children)?; let children_range = children .iter() .map(|props| &props.range) @@ -211,6 +212,7 @@ impl PhysicalExpr for ScalarFunctionExpr { Ok(ExprProperties { sort_properties, range, + preserves_lex_ordering, }) } } From f3b1141d0f417e9d9e6c0ada03592c9d9ec60cd4 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Fri, 20 Dec 2024 17:51:06 +0800 Subject: [PATCH 2/9] Replace `execution_mode` with `emission_type` and `boundedness` (#13823) * feat: update execution modes and add bitflags dependency - Introduced `Incremental` execution mode alongside existing modes in the DataFusion execution plan. - Updated various execution plans to utilize the new `Incremental` mode where applicable, enhancing streaming capabilities. - Added `bitflags` dependency to `Cargo.toml` for better management of execution modes. - Adjusted execution mode handling in multiple files to ensure compatibility with the new structure. * add exec API Signed-off-by: Jay Zhan * replace done but has stackoverflow Signed-off-by: Jay Zhan * exec API done Signed-off-by: Jay Zhan * Refactor execution plan properties to remove execution mode - Removed the `ExecutionMode` parameter from `PlanProperties` across multiple physical plan implementations. - Updated related functions to utilize the new structure, ensuring compatibility with the changes. - Adjusted comments and cleaned up imports to reflect the removal of execution mode handling. This refactor simplifies the execution plan properties and enhances maintainability. * Refactor execution plan to remove `ExecutionMode` and introduce `EmissionType` - Removed the `ExecutionMode` parameter from `PlanProperties` and related implementations across multiple files. - Introduced `EmissionType` to better represent the output characteristics of execution plans. - Updated functions and tests to reflect the new structure, ensuring compatibility and enhancing maintainability. - Cleaned up imports and adjusted comments accordingly. This refactor simplifies the execution plan properties and improves the clarity of memory handling in execution plans. * fix test Signed-off-by: Jay Zhan * Refactor join handling and emission type logic - Updated test cases in `sanity_checker.rs` to reflect changes in expected outcomes for bounded and unbounded joins, ensuring accurate test coverage. - Simplified the `is_pipeline_breaking` method in `execution_plan.rs` to clarify the conditions under which a plan is considered pipeline-breaking. - Enhanced the emission type determination logic in `execution_plan.rs` to prioritize `Final` over `Both` and `Incremental`, improving clarity in execution plan behavior. - Adjusted join type handling in `hash_join.rs` to classify `Right` joins as `Incremental`, allowing for immediate row emission. These changes improve the accuracy of tests and the clarity of execution plan properties. * Implement emission type for execution plans - Updated multiple execution plan implementations to replace `unimplemented!()` with `EmissionType::Incremental`, ensuring that the emission type is correctly defined for various plans. - This change enhances the clarity and functionality of the execution plans by explicitly specifying their emission behavior. These updates contribute to a more robust execution plan framework within the DataFusion project. * Enhance join type documentation and refine emission type logic - Updated the `JoinType` enum in `join_type.rs` to include detailed descriptions for each join type, improving clarity on their behavior and expected results. - Modified the emission type logic in `hash_join.rs` to ensure that `Right` and `RightAnti` joins are classified as `Incremental`, allowing for immediate row emission when applicable. These changes improve the documentation and functionality of join operations within the DataFusion project. * Refactor emission type logic in join and sort execution plans - Updated the emission type determination in `SortMergeJoinExec` and `SymmetricHashJoinExec` to utilize the `emission_type_from_children` function, enhancing the accuracy of emission behavior based on input characteristics. - Clarified comments in `sort.rs` regarding the conditions under which results are emitted, emphasizing the relationship between input sorting and emission type. - These changes improve the clarity and functionality of the execution plans within the DataFusion project, ensuring more robust handling of emission types. * Refactor emission type handling in execution plans - Updated the `emission_type_from_children` function to accept an iterator instead of a slice, enhancing flexibility in how child execution plans are passed. - Modified the `SymmetricHashJoinExec` implementation to utilize the new function signature, improving code clarity and maintainability. These changes streamline the emission type determination process within the DataFusion project, contributing to a more robust execution plan framework. * Enhance execution plan properties with boundedness and emission type - Introduced `boundedness` and `pipeline_behavior` methods to the `ExecutionPlanProperties` trait, improving the handling of execution plan characteristics. - Updated the `CsvExec`, `SortExec`, and related implementations to utilize the new methods for determining boundedness and emission behavior. - Refactored the `ensure_distribution` function to use the new boundedness logic, enhancing clarity in distribution decisions. - These changes contribute to a more robust and maintainable execution plan framework within the DataFusion project. * Refactor execution plans to enhance boundedness and emission type handling - Updated multiple execution plan implementations to incorporate `Boundedness` and `EmissionType`, improving the clarity and functionality of execution plans. - Replaced instances of `unimplemented!()` with appropriate emission types, ensuring that plans correctly define their output behavior. - Refactored the `PlanProperties` structure to utilize the new boundedness logic, enhancing decision-making in execution plans. - These changes contribute to a more robust and maintainable execution plan framework within the DataFusion project. * Refactor memory handling in execution plans - Updated the condition for checking memory requirements in execution plans from `has_finite_memory()` to `boundedness().requires_finite_memory()`, improving clarity in memory management. - This change enhances the robustness of execution plans within the DataFusion project by ensuring more accurate assessments of memory constraints. * Refactor boundedness checks in execution plans - Updated conditions for checking boundedness in various execution plans to use `is_unbounded()` instead of `requires_finite_memory()`, enhancing clarity in memory management. - Adjusted the `PlanProperties` structure to reflect these changes, ensuring more accurate assessments of memory constraints across the DataFusion project. - These modifications contribute to a more robust and maintainable execution plan framework, improving the handling of boundedness in execution strategies. * Remove TODO comment regarding unbounded execution plans in `UnboundedExec` implementation - Eliminated the outdated comment suggesting a switch to unbounded execution with finite memory, streamlining the code and improving clarity. - This change contributes to a cleaner and more maintainable codebase within the DataFusion project. * Refactor execution plan boundedness and emission type handling - Updated the `is_pipeline_breaking` method to use `requires_finite_memory()` for improved clarity in determining pipeline behavior. - Enhanced the `Boundedness` enum to include detailed documentation on memory requirements for unbounded streams. - Refactored `compute_properties` methods in `GlobalLimitExec` and `LocalLimitExec` to directly use the input's boundedness, simplifying the logic. - Adjusted emission type determination in `NestedLoopJoinExec` to utilize the `emission_type_from_children` function, ensuring accurate output behavior based on input characteristics. These changes contribute to a more robust and maintainable execution plan framework within the DataFusion project, improving clarity and functionality in handling boundedness and emission types. * Refactor emission type and boundedness handling in execution plans - Removed the `OptionalEmissionType` struct from `plan_properties.rs`, simplifying the codebase. - Updated the `is_pipeline_breaking` function in `execution_plan.rs` for improved readability by formatting the condition across multiple lines. - Adjusted the `GlobalLimitExec` implementation in `limit.rs` to directly use the input's boundedness, enhancing clarity in memory management. These changes contribute to a more streamlined and maintainable execution plan framework within the DataFusion project, improving the handling of emission types and boundedness. * Refactor GlobalLimitExec and LocalLimitExec to enhance boundedness handling - Updated the `compute_properties` methods in both `GlobalLimitExec` and `LocalLimitExec` to replace `EmissionType::Final` with `Boundedness::Bounded`, reflecting that limit operations always produce a finite number of rows. - Changed the input's boundedness reference to `pipeline_behavior()` for improved clarity in execution plan properties. These changes contribute to a more streamlined and maintainable execution plan framework within the DataFusion project, enhancing the handling of boundedness in limit operations. * Review Part1 * Update sanity_checker.rs * addressing reviews * Review Part 1 * Update datafusion/physical-plan/src/execution_plan.rs * Update datafusion/physical-plan/src/execution_plan.rs * Shorten imports * Enhance documentation for JoinType and Boundedness enums - Improved descriptions for the Inner and Full join types in join_type.rs to clarify their behavior and examples. - Added explanations regarding the boundedness of output streams and memory requirements in execution_plan.rs, including specific examples for operators like Median and Min/Max. --------- Signed-off-by: Jay Zhan Co-authored-by: berkaysynnada Co-authored-by: Mehmet Ozan Kabak --- datafusion-cli/src/exec.rs | 14 +- .../examples/custom_datasource.rs | 8 +- datafusion/common/src/join_type.rs | 25 +- .../datasource/physical_plan/arrow_file.rs | 6 +- .../core/src/datasource/physical_plan/avro.rs | 8 +- .../core/src/datasource/physical_plan/csv.rs | 8 +- .../core/src/datasource/physical_plan/json.rs | 8 +- .../datasource/physical_plan/parquet/mod.rs | 11 +- .../enforce_distribution.rs | 15 +- .../src/physical_optimizer/enforce_sorting.rs | 2 +- .../src/physical_optimizer/join_selection.rs | 28 +- .../replace_with_order_preserving_variants.rs | 6 +- .../src/physical_optimizer/sanity_checker.rs | 19 +- datafusion/core/src/physical_planner.rs | 11 +- datafusion/core/src/test/mod.rs | 13 +- datafusion/core/src/test_util/mod.rs | 23 +- .../core/tests/custom_sources_cases/mod.rs | 12 +- .../provider_filter_pushdown.rs | 13 +- .../tests/custom_sources_cases/statistics.rs | 14 +- .../tests/user_defined/insert_operation.rs | 18 +- .../tests/user_defined/user_defined_plan.rs | 19 +- datafusion/ffi/src/execution_plan.rs | 11 +- datafusion/ffi/src/plan_properties.rs | 107 ++++++-- .../src/output_requirements.rs | 3 +- .../physical-plan/src/aggregates/mod.rs | 36 +-- datafusion/physical-plan/src/analyze.rs | 10 +- .../physical-plan/src/coalesce_batches.rs | 3 +- .../physical-plan/src/coalesce_partitions.rs | 3 +- datafusion/physical-plan/src/empty.rs | 20 +- .../physical-plan/src/execution_plan.rs | 253 ++++++++++++------ datafusion/physical-plan/src/explain.rs | 9 +- datafusion/physical-plan/src/filter.rs | 4 +- datafusion/physical-plan/src/insert.rs | 8 +- .../physical-plan/src/joins/cross_join.rs | 20 +- .../physical-plan/src/joins/hash_join.rs | 48 ++-- .../src/joins/nested_loop_join.rs | 37 ++- .../src/joins/sort_merge_join.rs | 21 +- .../src/joins/symmetric_hash_join.rs | 12 +- datafusion/physical-plan/src/lib.rs | 3 +- datafusion/physical-plan/src/limit.rs | 12 +- datafusion/physical-plan/src/memory.rs | 16 +- .../physical-plan/src/placeholder_row.rs | 17 +- datafusion/physical-plan/src/projection.rs | 3 +- .../physical-plan/src/recursive_query.rs | 6 +- .../physical-plan/src/repartition/mod.rs | 10 +- .../physical-plan/src/sorts/partial_sort.rs | 10 +- datafusion/physical-plan/src/sorts/sort.rs | 68 +++-- .../src/sorts/sort_preserving_merge.rs | 16 +- datafusion/physical-plan/src/streaming.rs | 23 +- datafusion/physical-plan/src/test/exec.rs | 51 ++-- datafusion/physical-plan/src/union.rs | 19 +- datafusion/physical-plan/src/unnest.rs | 9 +- datafusion/physical-plan/src/values.rs | 13 +- .../src/windows/bounded_window_agg_exec.rs | 8 +- datafusion/physical-plan/src/windows/mod.rs | 2 +- .../src/windows/window_agg_exec.rs | 23 +- datafusion/physical-plan/src/work_table.rs | 10 +- 57 files changed, 748 insertions(+), 457 deletions(-) diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index 18906536691e..a4f154b2de92 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -33,11 +33,12 @@ use crate::{ }; use datafusion::common::instant::Instant; -use datafusion::common::plan_datafusion_err; +use datafusion::common::{plan_datafusion_err, plan_err}; use datafusion::config::ConfigFileType; use datafusion::datasource::listing::ListingTableUrl; use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::{DdlStatement, LogicalPlan}; +use datafusion::physical_plan::execution_plan::EmissionType; use datafusion::physical_plan::{collect, execute_stream, ExecutionPlanProperties}; use datafusion::sql::parser::{DFParser, Statement}; use datafusion::sql::sqlparser::dialect::dialect_from_str; @@ -234,10 +235,19 @@ pub(super) async fn exec_and_print( let df = ctx.execute_logical_plan(plan).await?; let physical_plan = df.create_physical_plan().await?; - if physical_plan.execution_mode().is_unbounded() { + if physical_plan.boundedness().is_unbounded() { + if physical_plan.pipeline_behavior() == EmissionType::Final { + return plan_err!( + "The given query can generate a valid result only once \ + the source finishes, but the source is unbounded" + ); + } + // As the input stream comes, we can generate results. + // However, memory safety is not guaranteed. let stream = execute_stream(physical_plan, task_ctx.clone())?; print_options.print_stream(stream, now).await?; } else { + // Bounded stream; collected results are printed after all input consumed. let schema = physical_plan.schema(); let results = collect(physical_plan, task_ctx.clone()).await?; adjusted.into_inner().print_batches(schema, &results, now)?; diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs index 90e9d2c7a632..bc865fac5a33 100644 --- a/datafusion-examples/examples/custom_datasource.rs +++ b/datafusion-examples/examples/custom_datasource.rs @@ -30,10 +30,11 @@ use datafusion::error::Result; use datafusion::execution::context::TaskContext; use datafusion::logical_expr::LogicalPlanBuilder; use datafusion::physical_expr::EquivalenceProperties; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion::physical_plan::memory::MemoryStream; use datafusion::physical_plan::{ - project_schema, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, - Partitioning, PlanProperties, SendableRecordBatchStream, + project_schema, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, + PlanProperties, SendableRecordBatchStream, }; use datafusion::prelude::*; @@ -214,7 +215,8 @@ impl CustomExec { PlanProperties::new( eq_properties, Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/common/src/join_type.rs b/datafusion/common/src/join_type.rs index e98f34199b27..bdca253c5f64 100644 --- a/datafusion/common/src/join_type.rs +++ b/datafusion/common/src/join_type.rs @@ -28,21 +28,30 @@ use crate::{DataFusionError, Result}; /// Join type #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)] pub enum JoinType { - /// Inner Join + /// Inner Join - Returns only rows where there is a matching value in both tables based on the join condition. + /// For example, if joining table A and B on A.id = B.id, only rows where A.id equals B.id will be included. + /// All columns from both tables are returned for the matching rows. Non-matching rows are excluded entirely. Inner, - /// Left Join + /// Left Join - Returns all rows from the left table and matching rows from the right table. + /// If no match, NULL values are returned for columns from the right table. Left, - /// Right Join + /// Right Join - Returns all rows from the right table and matching rows from the left table. + /// If no match, NULL values are returned for columns from the left table. Right, - /// Full Join + /// Full Join (also called Full Outer Join) - Returns all rows from both tables, matching rows where possible. + /// When a row from either table has no match in the other table, the missing columns are filled with NULL values. + /// For example, if table A has row X with no match in table B, the result will contain row X with NULL values for all of table B's columns. + /// This join type preserves all records from both tables, making it useful when you need to see all data regardless of matches. Full, - /// Left Semi Join + /// Left Semi Join - Returns rows from the left table that have matching rows in the right table. + /// Only columns from the left table are returned. LeftSemi, - /// Right Semi Join + /// Right Semi Join - Returns rows from the right table that have matching rows in the left table. + /// Only columns from the right table are returned. RightSemi, - /// Left Anti Join + /// Left Anti Join - Returns rows from the left table that do not have a matching row in the right table. LeftAnti, - /// Right Anti Join + /// Right Anti Join - Returns rows from the right table that do not have a matching row in the left table. RightAnti, /// Left Mark join /// diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs index 8df5ef82cd0c..4e76b087abb1 100644 --- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs +++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs @@ -38,7 +38,8 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::Statistics; use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; -use datafusion_physical_plan::{ExecutionMode, PlanProperties}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion_physical_plan::PlanProperties; use futures::StreamExt; use itertools::Itertools; @@ -97,7 +98,8 @@ impl ArrowExec { PlanProperties::new( eq_properties, Self::output_partitioning_helper(file_scan_config), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + EmissionType::Incremental, + Boundedness::Bounded, ) } diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs index def6504189bb..fb36179c3cf6 100644 --- a/datafusion/core/src/datasource/physical_plan/avro.rs +++ b/datafusion/core/src/datasource/physical_plan/avro.rs @@ -24,13 +24,14 @@ use super::FileScanConfig; use crate::error::Result; use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, - PlanProperties, SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, + SendableRecordBatchStream, Statistics, }; use arrow::datatypes::SchemaRef; use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; /// Execution plan for scanning Avro data source #[derive(Debug, Clone)] @@ -81,7 +82,8 @@ impl AvroExec { PlanProperties::new( eq_properties, Partitioning::UnknownPartitioning(n_partitions), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index c54c663dca7d..a00e74cf4fcd 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -33,8 +33,8 @@ use crate::datasource::physical_plan::FileMeta; use crate::error::{DataFusionError, Result}; use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, ExecutionPlanProperties, - Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, + PlanProperties, SendableRecordBatchStream, Statistics, }; use arrow::csv; @@ -43,6 +43,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use futures::{StreamExt, TryStreamExt}; use object_store::buffered::BufWriter; use object_store::{GetOptions, GetResultPayload, ObjectStore}; @@ -327,7 +328,8 @@ impl CsvExec { PlanProperties::new( eq_properties, Self::output_partitioning_helper(file_scan_config), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + EmissionType::Incremental, + Boundedness::Bounded, ) } diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs index 5c70968fbb42..879c9817a382 100644 --- a/datafusion/core/src/datasource/physical_plan/json.rs +++ b/datafusion/core/src/datasource/physical_plan/json.rs @@ -33,14 +33,15 @@ use crate::datasource::physical_plan::FileMeta; use crate::error::{DataFusionError, Result}; use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, ExecutionPlanProperties, - Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, + PlanProperties, SendableRecordBatchStream, Statistics, }; use arrow::json::ReaderBuilder; use arrow::{datatypes::SchemaRef, json}; use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use futures::{StreamExt, TryStreamExt}; use object_store::buffered::BufWriter; @@ -107,7 +108,8 @@ impl NdJsonExec { PlanProperties::new( eq_properties, Self::output_partitioning_helper(file_scan_config), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + EmissionType::Incremental, + Boundedness::Bounded, ) } diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs index 446d5f531185..cb79055ce301 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs @@ -34,13 +34,14 @@ use crate::{ physical_optimizer::pruning::PruningPredicate, physical_plan::{ metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}, - DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, + DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, }, }; use arrow::datatypes::SchemaRef; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use itertools::Itertools; use log::debug; @@ -654,13 +655,11 @@ impl ParquetExec { orderings: &[LexOrdering], file_config: &FileScanConfig, ) -> PlanProperties { - // Equivalence Properties - let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new_with_orderings(schema, orderings), Self::output_partitioning_helper(file_config), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + EmissionType::Incremental, + Boundedness::Bounded, ) } diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index 27323eaedccc..76c4d668d797 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -52,12 +52,13 @@ use datafusion_physical_expr::utils::map_columns_before_projection; use datafusion_physical_expr::{ physical_exprs_equal, EquivalenceProperties, PhysicalExpr, PhysicalExprRef, }; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_optimizer::output_requirements::OutputRequirementExec; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::execution_plan::EmissionType; use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec}; use datafusion_physical_plan::ExecutionPlanProperties; -use datafusion_physical_expr_common::sort_expr::LexOrdering; use itertools::izip; /// The `EnforceDistribution` rule ensures that distribution requirements are @@ -1161,12 +1162,17 @@ fn ensure_distribution( let should_use_estimates = config .execution .use_row_number_estimates_to_optimize_partitioning; - let is_unbounded = dist_context.plan.execution_mode().is_unbounded(); + let unbounded_and_pipeline_friendly = dist_context.plan.boundedness().is_unbounded() + && matches!( + dist_context.plan.pipeline_behavior(), + EmissionType::Incremental | EmissionType::Both + ); // Use order preserving variants either of the conditions true // - it is desired according to config // - when plan is unbounded + // - when it is pipeline friendly (can incrementally produce results) let order_preserving_variants_desirable = - is_unbounded || config.optimizer.prefer_existing_sort; + unbounded_and_pipeline_friendly || config.optimizer.prefer_existing_sort; // Remove unnecessary repartition from the physical plan if any let DistributionContext { @@ -1459,7 +1465,8 @@ pub(crate) mod tests { PlanProperties::new( input.equivalence_properties().clone(), // Equivalence Properties input.output_partitioning().clone(), // Output Partitioning - input.execution_mode(), // Execution Mode + input.pipeline_behavior(), // Pipeline Behavior + input.boundedness(), // Boundedness ) } } diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index cfc08562f7d7..85fe9ecfcdb0 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -214,7 +214,7 @@ fn replace_with_partial_sort( let plan_any = plan.as_any(); if let Some(sort_plan) = plan_any.downcast_ref::() { let child = Arc::clone(sort_plan.children()[0]); - if !child.execution_mode().is_unbounded() { + if !child.boundedness().is_unbounded() { return Ok(plan); } diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs index 9d65c6ded423..009757f3a938 100644 --- a/datafusion/core/src/physical_optimizer/join_selection.rs +++ b/datafusion/core/src/physical_optimizer/join_selection.rs @@ -43,6 +43,7 @@ use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::execution_plan::EmissionType; /// The [`JoinSelection`] rule tries to modify a given plan so that it can /// accommodate infinite sources and optimize joins in the plan according to @@ -516,7 +517,8 @@ fn statistical_join_selection_subrule( pub type PipelineFixerSubrule = dyn Fn(Arc, &ConfigOptions) -> Result>; -/// Converts a hash join to a symmetric hash join in the case of infinite inputs on both sides. +/// Converts a hash join to a symmetric hash join if both its inputs are +/// unbounded and incremental. /// /// This subrule checks if a hash join can be replaced with a symmetric hash join when dealing /// with unbounded (infinite) inputs on both sides. This replacement avoids pipeline breaking and @@ -537,10 +539,18 @@ fn hash_join_convert_symmetric_subrule( ) -> Result> { // Check if the current plan node is a HashJoinExec. if let Some(hash_join) = input.as_any().downcast_ref::() { - let left_unbounded = hash_join.left.execution_mode().is_unbounded(); - let right_unbounded = hash_join.right.execution_mode().is_unbounded(); - // Process only if both left and right sides are unbounded. - if left_unbounded && right_unbounded { + let left_unbounded = hash_join.left.boundedness().is_unbounded(); + let left_incremental = matches!( + hash_join.left.pipeline_behavior(), + EmissionType::Incremental | EmissionType::Both + ); + let right_unbounded = hash_join.right.boundedness().is_unbounded(); + let right_incremental = matches!( + hash_join.right.pipeline_behavior(), + EmissionType::Incremental | EmissionType::Both + ); + // Process only if both left and right sides are unbounded and incrementally emit. + if left_unbounded && right_unbounded & left_incremental & right_incremental { // Determine the partition mode based on configuration. let mode = if config_options.optimizer.repartition_joins { StreamJoinPartitionMode::Partitioned @@ -669,8 +679,8 @@ fn hash_join_swap_subrule( _config_options: &ConfigOptions, ) -> Result> { if let Some(hash_join) = input.as_any().downcast_ref::() { - if hash_join.left.execution_mode().is_unbounded() - && !hash_join.right.execution_mode().is_unbounded() + if hash_join.left.boundedness().is_unbounded() + && !hash_join.right.boundedness().is_unbounded() && matches!( *hash_join.join_type(), JoinType::Inner @@ -2025,12 +2035,12 @@ mod hash_join_tests { assert_eq!( ( t.case.as_str(), - if left.execution_mode().is_unbounded() { + if left.boundedness().is_unbounded() { SourceType::Unbounded } else { SourceType::Bounded }, - if right.execution_mode().is_unbounded() { + if right.boundedness().is_unbounded() { SourceType::Unbounded } else { SourceType::Bounded diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs index 2f6b7a51ee75..96b2454fa330 100644 --- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs +++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs @@ -29,11 +29,12 @@ use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::Transformed; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion_physical_plan::execution_plan::EmissionType; use datafusion_physical_plan::tree_node::PlanContext; use datafusion_physical_plan::ExecutionPlanProperties; -use datafusion_physical_expr_common::sort_expr::LexOrdering; use itertools::izip; /// For a given `plan`, this object carries the information one needs from its @@ -246,7 +247,8 @@ pub(crate) fn replace_with_order_preserving_variants( // For unbounded cases, we replace with the order-preserving variant in any // case, as doing so helps fix the pipeline. Also replace if config allows. let use_order_preserving_variant = config.optimizer.prefer_existing_sort - || !requirements.plan.execution_mode().pipeline_friendly(); + || (requirements.plan.boundedness().is_unbounded() + && requirements.plan.pipeline_behavior() == EmissionType::Final); // Create an alternate plan with order-preserving variants: let mut alternate_plan = plan_with_order_preserving_variants( diff --git a/datafusion/core/src/physical_optimizer/sanity_checker.rs b/datafusion/core/src/physical_optimizer/sanity_checker.rs index 99bd1cab3ed4..b6d22320d086 100644 --- a/datafusion/core/src/physical_optimizer/sanity_checker.rs +++ b/datafusion/core/src/physical_optimizer/sanity_checker.rs @@ -30,6 +30,7 @@ use datafusion_common::config::{ConfigOptions, OptimizerOptions}; use datafusion_common::plan_err; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::joins::SymmetricHashJoinExec; use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties}; @@ -85,7 +86,15 @@ pub fn check_finiteness_requirements( the 'allow_symmetric_joins_without_pruning' configuration flag"); } } - if !input.execution_mode().pipeline_friendly() { + + if matches!( + input.boundedness(), + Boundedness::Unbounded { + requires_infinite_memory: true + } + ) || (input.boundedness().is_unbounded() + && input.pipeline_behavior() == EmissionType::Final) + { plan_err!( "Cannot execute pipeline breaking queries, operator: {:?}", input @@ -215,7 +224,9 @@ mod tests { let test2 = BinaryTestCase { source_types: (SourceType::Bounded, SourceType::Unbounded), - expect_fail: true, + // Left join for bounded build side and unbounded probe side can generate + // both incremental matched rows and final non-matched rows. + expect_fail: false, }; let test3 = BinaryTestCase { source_types: (SourceType::Bounded, SourceType::Bounded), @@ -290,7 +301,9 @@ mod tests { }; let test2 = BinaryTestCase { source_types: (SourceType::Bounded, SourceType::Unbounded), - expect_fail: true, + // Full join for bounded build side and unbounded probe side can generate + // both incremental matched rows and final non-matched rows. + expect_fail: false, }; let test3 = BinaryTestCase { source_types: (SourceType::Bounded, SourceType::Bounded), diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 44537c951f94..47b31d2f4e2d 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -2017,7 +2017,7 @@ mod tests { use crate::datasource::file_format::options::CsvReadOptions; use crate::datasource::MemTable; use crate::physical_plan::{ - expressions, DisplayAs, DisplayFormatType, ExecutionMode, PlanProperties, + expressions, DisplayAs, DisplayFormatType, PlanProperties, SendableRecordBatchStream, }; use crate::prelude::{SessionConfig, SessionContext}; @@ -2032,6 +2032,7 @@ mod tests { use datafusion_expr::{col, lit, LogicalPlanBuilder, UserDefinedLogicalNodeCore}; use datafusion_functions_aggregate::expr_fn::sum; use datafusion_physical_expr::EquivalenceProperties; + use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; fn make_session_state() -> SessionState { let runtime = Arc::new(RuntimeEnv::default()); @@ -2619,13 +2620,11 @@ mod tests { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); PlanProperties::new( - eq_properties, - // Output Partitioning + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - // Execution Mode - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs index d8304c2f0a86..e5ce28e73806 100644 --- a/datafusion/core/src/test/mod.rs +++ b/datafusion/core/src/test/mod.rs @@ -45,10 +45,9 @@ use arrow::record_batch::RecordBatch; use datafusion_common::{DataFusionError, Statistics}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; -use datafusion_physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, PlanProperties, -}; +use datafusion_physical_plan::{DisplayAs, DisplayFormatType, PlanProperties}; #[cfg(feature = "compression")] use bzip2::write::BzEncoder; @@ -386,13 +385,11 @@ impl StatisticsExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); PlanProperties::new( - eq_properties, - // Output Partitioning + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(2), - // Execution Mode - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs index 09608887c0f1..aa134f28fe5b 100644 --- a/datafusion/core/src/test_util/mod.rs +++ b/datafusion/core/src/test_util/mod.rs @@ -39,22 +39,23 @@ use crate::error::Result; use crate::execution::context::TaskContext; use crate::logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE}; use crate::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, - PlanProperties, RecordBatchStream, SendableRecordBatchStream, + DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, + RecordBatchStream, SendableRecordBatchStream, }; use crate::prelude::{CsvReadOptions, SessionContext}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; +use datafusion_catalog::Session; use datafusion_common::TableReference; use datafusion_expr::utils::COUNT_STAR_EXPANSION; use datafusion_expr::{CreateExternalTable, Expr, SortExpr, TableType}; use datafusion_functions_aggregate::count::count_udaf; +use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; use datafusion_physical_expr::{expressions, EquivalenceProperties, PhysicalExpr}; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use async_trait::async_trait; -use datafusion_catalog::Session; -use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; use futures::Stream; use tempfile::TempDir; // backwards compatibility @@ -259,16 +260,18 @@ impl UnboundedExec { batch_produce: Option, n_partitions: usize, ) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - let mode = if batch_produce.is_none() { - ExecutionMode::Unbounded + let boundedness = if batch_produce.is_none() { + Boundedness::Unbounded { + requires_infinite_memory: false, + } } else { - ExecutionMode::Bounded + Boundedness::Bounded }; PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(n_partitions), - mode, + EmissionType::Incremental, + boundedness, ) } } diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs index e1bd14105e23..aafefac04e32 100644 --- a/datafusion/core/tests/custom_sources_cases/mod.rs +++ b/datafusion/core/tests/custom_sources_cases/mod.rs @@ -35,15 +35,16 @@ use datafusion::physical_plan::{ RecordBatchStream, SendableRecordBatchStream, Statistics, }; use datafusion::scalar::ScalarValue; +use datafusion_catalog::Session; use datafusion_common::cast::as_primitive_array; use datafusion_common::project_schema; use datafusion_common::stats::Precision; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; -use datafusion_physical_plan::{ExecutionMode, PlanProperties}; +use datafusion_physical_plan::PlanProperties; use async_trait::async_trait; -use datafusion_catalog::Session; use futures::stream::Stream; mod provider_filter_pushdown; @@ -91,12 +92,11 @@ impl CustomExecutionPlan { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); PlanProperties::new( - eq_properties, - // Output Partitioning + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs index 09f7265d639a..af0506a50558 100644 --- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs +++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs @@ -28,19 +28,20 @@ use datafusion::execution::context::TaskContext; use datafusion::logical_expr::TableProviderFilterPushDown; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, - PlanProperties, SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, + SendableRecordBatchStream, Statistics, }; use datafusion::prelude::*; use datafusion::scalar::ScalarValue; +use datafusion_catalog::Session; use datafusion_common::cast::as_primitive_array; use datafusion_common::{internal_err, not_impl_err}; use datafusion_expr::expr::{BinaryExpr, Cast}; use datafusion_functions_aggregate::expr_fn::count; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use async_trait::async_trait; -use datafusion_catalog::Session; fn create_batch(value: i32, num_rows: usize) -> Result { let mut builder = Int32Builder::with_capacity(num_rows); @@ -72,11 +73,11 @@ impl CustomPlan { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs index 41d182a3767b..9d3bd594a929 100644 --- a/datafusion/core/tests/custom_sources_cases/statistics.rs +++ b/datafusion/core/tests/custom_sources_cases/statistics.rs @@ -26,17 +26,18 @@ use datafusion::{ error::Result, logical_expr::Expr, physical_plan::{ - ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, - Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, + ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, + PlanProperties, SendableRecordBatchStream, Statistics, }, prelude::SessionContext, scalar::ScalarValue, }; +use datafusion_catalog::Session; use datafusion_common::{project_schema, stats::Precision}; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use async_trait::async_trait; -use datafusion_catalog::Session; /// This is a testing structure for statistics /// It will act both as a table provider and execution plan @@ -64,12 +65,11 @@ impl StatisticsValidation { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(2), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/core/tests/user_defined/insert_operation.rs b/datafusion/core/tests/user_defined/insert_operation.rs index ff14fa0be3fb..aa531632c60b 100644 --- a/datafusion/core/tests/user_defined/insert_operation.rs +++ b/datafusion/core/tests/user_defined/insert_operation.rs @@ -26,7 +26,10 @@ use datafusion::{ use datafusion_catalog::{Session, TableProvider}; use datafusion_expr::{dml::InsertOp, Expr, TableType}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; -use datafusion_physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, PlanProperties}; +use datafusion_physical_plan::{ + execution_plan::{Boundedness, EmissionType}, + DisplayAs, ExecutionPlan, PlanProperties, +}; #[tokio::test] async fn insert_operation_is_passed_correctly_to_table_provider() { @@ -122,15 +125,14 @@ struct TestInsertExec { impl TestInsertExec { fn new(op: InsertOp) -> Self { - let eq_properties = EquivalenceProperties::new(make_count_schema()); - let plan_properties = PlanProperties::new( - eq_properties, - Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, - ); Self { op, - plan_properties, + plan_properties: PlanProperties::new( + EquivalenceProperties::new(make_count_schema()), + Partitioning::UnknownPartitioning(1), + EmissionType::Incremental, + Boundedness::Bounded, + ), } } } diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs index 520a91aeb4d6..77753290c37e 100644 --- a/datafusion/core/tests/user_defined/user_defined_plan.rs +++ b/datafusion/core/tests/user_defined/user_defined_plan.rs @@ -68,9 +68,6 @@ use arrow::{ record_batch::RecordBatch, util::pretty::pretty_format_batches, }; -use async_trait::async_trait; -use futures::{Stream, StreamExt}; - use datafusion::execution::session_state::SessionStateBuilder; use datafusion::{ common::cast::{as_int64_array, as_string_array}, @@ -87,9 +84,8 @@ use datafusion::{ optimizer::{OptimizerConfig, OptimizerRule}, physical_expr::EquivalenceProperties, physical_plan::{ - DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan, - Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, - Statistics, + DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, + PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }, physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner}, prelude::{SessionConfig, SessionContext}, @@ -100,6 +96,10 @@ use datafusion_common::ScalarValue; use datafusion_expr::{FetchType, Projection, SortExpr}; use datafusion_optimizer::optimizer::ApplyOrder; use datafusion_optimizer::AnalyzerRule; +use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; + +use async_trait::async_trait; +use futures::{Stream, StreamExt}; /// Execute the specified sql and return the resulting record batches /// pretty printed as a String. @@ -495,12 +495,11 @@ impl TopKExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/ffi/src/execution_plan.rs b/datafusion/ffi/src/execution_plan.rs index d10eda8990b8..5ab321cc0114 100644 --- a/datafusion/ffi/src/execution_plan.rs +++ b/datafusion/ffi/src/execution_plan.rs @@ -272,7 +272,13 @@ impl ExecutionPlan for ForeignExecutionPlan { #[cfg(test)] mod tests { - use datafusion::{physical_plan::Partitioning, prelude::SessionContext}; + use datafusion::{ + physical_plan::{ + execution_plan::{Boundedness, EmissionType}, + Partitioning, + }, + prelude::SessionContext, + }; use super::*; @@ -287,7 +293,8 @@ mod tests { props: PlanProperties::new( datafusion::physical_expr::EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(3), - datafusion::physical_plan::ExecutionMode::Unbounded, + EmissionType::Incremental, + Boundedness::Bounded, ), } } diff --git a/datafusion/ffi/src/plan_properties.rs b/datafusion/ffi/src/plan_properties.rs index 722681ae4a1d..3c7bc886aede 100644 --- a/datafusion/ffi/src/plan_properties.rs +++ b/datafusion/ffi/src/plan_properties.rs @@ -28,7 +28,10 @@ use arrow::datatypes::SchemaRef; use datafusion::{ error::{DataFusionError, Result}, physical_expr::EquivalenceProperties, - physical_plan::{ExecutionMode, PlanProperties}, + physical_plan::{ + execution_plan::{Boundedness, EmissionType}, + PlanProperties, + }, prelude::SessionContext, }; use datafusion_proto::{ @@ -53,8 +56,11 @@ pub struct FFI_PlanProperties { pub output_partitioning: unsafe extern "C" fn(plan: &Self) -> RResult, RStr<'static>>, - /// Return the execution mode of the plan. - pub execution_mode: unsafe extern "C" fn(plan: &Self) -> FFI_ExecutionMode, + /// Return the emission type of the plan. + pub emission_type: unsafe extern "C" fn(plan: &Self) -> FFI_EmissionType, + + /// Indicate boundedness of the plan and its memory requirements. + pub boundedness: unsafe extern "C" fn(plan: &Self) -> FFI_Boundedness, /// The output ordering is a [`PhysicalSortExprNodeCollection`] protobuf message /// serialized into bytes to pass across the FFI boundary. @@ -98,12 +104,20 @@ unsafe extern "C" fn output_partitioning_fn_wrapper( ROk(output_partitioning.into()) } -unsafe extern "C" fn execution_mode_fn_wrapper( +unsafe extern "C" fn emission_type_fn_wrapper( properties: &FFI_PlanProperties, -) -> FFI_ExecutionMode { +) -> FFI_EmissionType { let private_data = properties.private_data as *const PlanPropertiesPrivateData; let props = &(*private_data).props; - props.execution_mode().into() + props.emission_type.into() +} + +unsafe extern "C" fn boundedness_fn_wrapper( + properties: &FFI_PlanProperties, +) -> FFI_Boundedness { + let private_data = properties.private_data as *const PlanPropertiesPrivateData; + let props = &(*private_data).props; + props.boundedness.into() } unsafe extern "C" fn output_ordering_fn_wrapper( @@ -164,7 +178,8 @@ impl From<&PlanProperties> for FFI_PlanProperties { FFI_PlanProperties { output_partitioning: output_partitioning_fn_wrapper, - execution_mode: execution_mode_fn_wrapper, + emission_type: emission_type_fn_wrapper, + boundedness: boundedness_fn_wrapper, output_ordering: output_ordering_fn_wrapper, schema: schema_fn_wrapper, release: release_fn_wrapper, @@ -220,9 +235,6 @@ impl TryFrom for PlanProperties { RErr(e) => Err(DataFusionError::Plan(e.to_string())), }?; - let execution_mode: ExecutionMode = - unsafe { (ffi_props.execution_mode)(&ffi_props).into() }; - let eq_properties = match orderings { Some(ordering) => { EquivalenceProperties::new_with_orderings(Arc::new(schema), &[ordering]) @@ -230,40 +242,82 @@ impl TryFrom for PlanProperties { None => EquivalenceProperties::new(Arc::new(schema)), }; + let emission_type: EmissionType = + unsafe { (ffi_props.emission_type)(&ffi_props).into() }; + + let boundedness: Boundedness = + unsafe { (ffi_props.boundedness)(&ffi_props).into() }; + Ok(PlanProperties::new( eq_properties, partitioning, - execution_mode, + emission_type, + boundedness, )) } } -/// FFI safe version of [`ExecutionMode`]. +/// FFI safe version of [`Boundedness`]. #[repr(C)] #[allow(non_camel_case_types)] #[derive(Clone, StableAbi)] -pub enum FFI_ExecutionMode { +pub enum FFI_Boundedness { Bounded, - Unbounded, - PipelineBreaking, + Unbounded { requires_infinite_memory: bool }, +} + +impl From for FFI_Boundedness { + fn from(value: Boundedness) -> Self { + match value { + Boundedness::Bounded => FFI_Boundedness::Bounded, + Boundedness::Unbounded { + requires_infinite_memory, + } => FFI_Boundedness::Unbounded { + requires_infinite_memory, + }, + } + } +} + +impl From for Boundedness { + fn from(value: FFI_Boundedness) -> Self { + match value { + FFI_Boundedness::Bounded => Boundedness::Bounded, + FFI_Boundedness::Unbounded { + requires_infinite_memory, + } => Boundedness::Unbounded { + requires_infinite_memory, + }, + } + } +} + +/// FFI safe version of [`EmissionType`]. +#[repr(C)] +#[allow(non_camel_case_types)] +#[derive(Clone, StableAbi)] +pub enum FFI_EmissionType { + Incremental, + Final, + Both, } -impl From for FFI_ExecutionMode { - fn from(value: ExecutionMode) -> Self { +impl From for FFI_EmissionType { + fn from(value: EmissionType) -> Self { match value { - ExecutionMode::Bounded => FFI_ExecutionMode::Bounded, - ExecutionMode::Unbounded => FFI_ExecutionMode::Unbounded, - ExecutionMode::PipelineBreaking => FFI_ExecutionMode::PipelineBreaking, + EmissionType::Incremental => FFI_EmissionType::Incremental, + EmissionType::Final => FFI_EmissionType::Final, + EmissionType::Both => FFI_EmissionType::Both, } } } -impl From for ExecutionMode { - fn from(value: FFI_ExecutionMode) -> Self { +impl From for EmissionType { + fn from(value: FFI_EmissionType) -> Self { match value { - FFI_ExecutionMode::Bounded => ExecutionMode::Bounded, - FFI_ExecutionMode::Unbounded => ExecutionMode::Unbounded, - FFI_ExecutionMode::PipelineBreaking => ExecutionMode::PipelineBreaking, + FFI_EmissionType::Incremental => EmissionType::Incremental, + FFI_EmissionType::Final => EmissionType::Final, + FFI_EmissionType::Both => EmissionType::Both, } } } @@ -283,7 +337,8 @@ mod tests { let original_props = PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(3), - ExecutionMode::Unbounded, + EmissionType::Incremental, + Boundedness::Bounded, ); let local_props_ptr = FFI_PlanProperties::from(&original_props); diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs index 6c8e76bff82b..d5ffaad6d872 100644 --- a/datafusion/physical-optimizer/src/output_requirements.rs +++ b/datafusion/physical-optimizer/src/output_requirements.rs @@ -121,7 +121,8 @@ impl OutputRequirementExec { PlanProperties::new( input.equivalence_properties().clone(), // Equivalence Properties input.output_partitioning().clone(), // Output Partitioning - input.execution_mode(), // Execution Mode + input.pipeline_behavior(), // Pipeline Behavior + input.boundedness(), // Boundedness ) } } diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index feca5eb1db38..2e0103defd9f 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -20,11 +20,12 @@ use std::any::Any; use std::sync::Arc; -use super::{DisplayAs, ExecutionMode, ExecutionPlanProperties, PlanProperties}; +use super::{DisplayAs, ExecutionPlanProperties, PlanProperties}; use crate::aggregates::{ no_grouping::AggregateStream, row_hash::GroupedHashAggregateStream, topk_stream::GroupedTopKAggregateStream, }; +use crate::execution_plan::{CardinalityEffect, EmissionType}; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::get_field_metadata; use crate::windows::get_ordered_partition_by_indices; @@ -41,6 +42,7 @@ use datafusion_common::stats::Precision; use datafusion_common::{internal_err, not_impl_err, Result}; use datafusion_execution::TaskContext; use datafusion_expr::{Accumulator, Aggregate}; +use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use datafusion_physical_expr::{ equivalence::{collapse_lex_req, ProjectionMapping}, expressions::Column, @@ -48,8 +50,6 @@ use datafusion_physical_expr::{ PhysicalExpr, PhysicalSortRequirement, }; -use crate::execution_plan::CardinalityEffect; -use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use itertools::Itertools; pub(crate) mod group_values; @@ -663,16 +663,19 @@ impl AggregateExec { input_partitioning.clone() }; - // Determine execution mode: - let mut exec_mode = input.execution_mode(); - if exec_mode == ExecutionMode::Unbounded - && *input_order_mode == InputOrderMode::Linear - { - // Cannot run without breaking the pipeline - exec_mode = ExecutionMode::PipelineBreaking; - } + // TODO: Emission type and boundedness information can be enhanced here + let emission_type = if *input_order_mode == InputOrderMode::Linear { + EmissionType::Final + } else { + input.pipeline_behavior() + }; - PlanProperties::new(eq_properties, output_partitioning, exec_mode) + PlanProperties::new( + eq_properties, + output_partitioning, + emission_type, + input.boundedness(), + ) } pub fn input_order_mode(&self) -> &InputOrderMode { @@ -1298,6 +1301,7 @@ mod tests { use crate::coalesce_batches::CoalesceBatchesExec; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::common; + use crate::execution_plan::Boundedness; use crate::expressions::col; use crate::memory::MemoryExec; use crate::test::assert_is_pending; @@ -1730,13 +1734,11 @@ mod tests { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); PlanProperties::new( - eq_properties, - // Output Partitioning + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - // Execution Mode - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs index c8b329fabdaa..1fc3280ceb16 100644 --- a/datafusion/physical-plan/src/analyze.rs +++ b/datafusion/physical-plan/src/analyze.rs @@ -89,10 +89,12 @@ impl AnalyzeExec { input: &Arc, schema: SchemaRef, ) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - let output_partitioning = Partitioning::UnknownPartitioning(1); - let exec_mode = input.execution_mode(); - PlanProperties::new(eq_properties, output_partitioning, exec_mode) + PlanProperties::new( + EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(1), + input.pipeline_behavior(), + input.boundedness(), + ) } } diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 11678e7a4696..fa8d125d62d1 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -97,7 +97,8 @@ impl CoalesceBatchesExec { PlanProperties::new( input.equivalence_properties().clone(), // Equivalence Properties input.output_partitioning().clone(), // Output Partitioning - input.execution_mode(), // Execution Mode + input.pipeline_behavior(), + input.boundedness(), ) } } diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index 3da101d6092f..7c1bdba2f339 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -70,7 +70,8 @@ impl CoalescePartitionsExec { PlanProperties::new( eq_properties, // Equivalence Properties Partitioning::UnknownPartitioning(1), // Output Partitioning - input.execution_mode(), // Execution Mode + input.pipeline_behavior(), + input.boundedness(), ) } } diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs index 192619f69f6a..5168c3cc101f 100644 --- a/datafusion/physical-plan/src/empty.rs +++ b/datafusion/physical-plan/src/empty.rs @@ -20,11 +20,12 @@ use std::any::Any; use std::sync::Arc; -use super::{ - common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream, - Statistics, +use super::{common, DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics}; +use crate::{ + execution_plan::{Boundedness, EmissionType}, + memory::MemoryStream, + DisplayFormatType, ExecutionPlan, Partitioning, }; -use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; @@ -74,14 +75,11 @@ impl EmptyExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - let output_partitioning = Self::output_partitioning_helper(n_partitions); PlanProperties::new( - eq_properties, - // Output Partitioning - output_partitioning, - // Execution Mode - ExecutionMode::Bounded, + EquivalenceProperties::new(schema), + Self::output_partitioning_helper(n_partitions), + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index ba9e4b0697c1..09bb80734401 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -431,11 +431,6 @@ pub trait ExecutionPlanProperties { /// partitions. fn output_partitioning(&self) -> &Partitioning; - /// Specifies whether this plan generates an infinite stream of records. - /// If the plan does not support pipelining, but its input(s) are - /// infinite, returns [`ExecutionMode::PipelineBreaking`] to indicate this. - fn execution_mode(&self) -> ExecutionMode; - /// If the output of this `ExecutionPlan` within each partition is sorted, /// returns `Some(keys)` describing the ordering. A `None` return value /// indicates no assumptions should be made on the output ordering. @@ -445,6 +440,14 @@ pub trait ExecutionPlanProperties { /// output if its input is sorted as it does not reorder the input rows. fn output_ordering(&self) -> Option<&LexOrdering>; + /// Boundedness information of the stream corresponding to this `ExecutionPlan`. + /// For more details, see [`Boundedness`]. + fn boundedness(&self) -> Boundedness; + + /// Indicates how the stream of this `ExecutionPlan` emits its results. + /// For more details, see [`EmissionType`]. + fn pipeline_behavior(&self) -> EmissionType; + /// Get the [`EquivalenceProperties`] within the plan. /// /// Equivalence properties tell DataFusion what columns are known to be @@ -470,14 +473,18 @@ impl ExecutionPlanProperties for Arc { self.properties().output_partitioning() } - fn execution_mode(&self) -> ExecutionMode { - self.properties().execution_mode() - } - fn output_ordering(&self) -> Option<&LexOrdering> { self.properties().output_ordering() } + fn boundedness(&self) -> Boundedness { + self.properties().boundedness + } + + fn pipeline_behavior(&self) -> EmissionType { + self.properties().emission_type + } + fn equivalence_properties(&self) -> &EquivalenceProperties { self.properties().equivalence_properties() } @@ -488,95 +495,159 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan { self.properties().output_partitioning() } - fn execution_mode(&self) -> ExecutionMode { - self.properties().execution_mode() - } - fn output_ordering(&self) -> Option<&LexOrdering> { self.properties().output_ordering() } + fn boundedness(&self) -> Boundedness { + self.properties().boundedness + } + + fn pipeline_behavior(&self) -> EmissionType { + self.properties().emission_type + } + fn equivalence_properties(&self) -> &EquivalenceProperties { self.properties().equivalence_properties() } } -/// Describes the execution mode of the result of calling -/// [`ExecutionPlan::execute`] with respect to its size and behavior. +/// Represents whether a stream of data **generated** by an operator is bounded (finite) +/// or unbounded (infinite). /// -/// The mode of the execution plan is determined by the mode of its input -/// execution plans and the details of the operator itself. For example, a -/// `FilterExec` operator will have the same execution mode as its input, but a -/// `SortExec` operator may have a different execution mode than its input, -/// depending on how the input stream is sorted. +/// This is used to determine whether an execution plan will eventually complete +/// processing all its data (bounded) or could potentially run forever (unbounded). /// -/// There are three possible execution modes: `Bounded`, `Unbounded` and -/// `PipelineBreaking`. -#[derive(Clone, Copy, PartialEq, Debug)] -pub enum ExecutionMode { - /// The stream is bounded / finite. - /// - /// In this case the stream will eventually return `None` to indicate that - /// there are no more records to process. +/// For unbounded streams, it also tracks whether the operator requires finite memory +/// to process the stream or if memory usage could grow unbounded. +/// +/// Bounedness of the output stream is based on the the boundedness of the input stream and the nature of +/// the operator. For example, limit or topk with fetch operator can convert an unbounded stream to a bounded stream. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Boundedness { + /// The data stream is bounded (finite) and will eventually complete Bounded, - /// The stream is unbounded / infinite. - /// - /// In this case, the stream will never be done (never return `None`), - /// except in case of error. - /// - /// This mode is often used in "Steaming" use cases where data is - /// incrementally processed as it arrives. - /// - /// Note that even though the operator generates an unbounded stream of - /// results, it can execute with bounded memory and incrementally produces - /// output. - Unbounded, - /// Some of the operator's input stream(s) are unbounded, but the operator - /// cannot generate streaming results from these streaming inputs. - /// - /// In this case, the execution mode will be pipeline breaking, e.g. the - /// operator requires unbounded memory to generate results. This - /// information is used by the planner when performing sanity checks - /// on plans processings unbounded data sources. - PipelineBreaking, + /// The data stream is unbounded (infinite) and could run forever + Unbounded { + /// Whether this operator requires infinite memory to process the unbounded stream. + /// If false, the operator can process an infinite stream with bounded memory. + /// If true, memory usage may grow unbounded while processing the stream. + /// + /// For example, `Median` requires infinite memory to compute the median of an unbounded stream. + /// `Min/Max` requires infinite memory if the stream is unordered, but can be computed with bounded memory if the stream is ordered. + requires_infinite_memory: bool, + }, } -impl ExecutionMode { - /// Check whether the execution mode is unbounded or not. +impl Boundedness { pub fn is_unbounded(&self) -> bool { - matches!(self, ExecutionMode::Unbounded) + matches!(self, Boundedness::Unbounded { .. }) } +} - /// Check whether the execution is pipeline friendly. If so, operator can - /// execute safely. - pub fn pipeline_friendly(&self) -> bool { - matches!(self, ExecutionMode::Bounded | ExecutionMode::Unbounded) - } +/// Represents how an operator emits its output records. +/// +/// This is used to determine whether an operator emits records incrementally as they arrive, +/// only emits a final result at the end, or can do both. Note that it generates the output -- record batch with `batch_size` rows +/// but it may still buffer data internally until it has enough data to emit a record batch or the source is exhausted. +/// +/// For example, in the following plan: +/// ```text +/// SortExec [EmissionType::Final] +/// |_ on: [col1 ASC] +/// FilterExec [EmissionType::Incremental] +/// |_ pred: col2 > 100 +/// CsvExec [EmissionType::Incremental] +/// |_ file: "data.csv" +/// ``` +/// - CsvExec emits records incrementally as it reads from the file +/// - FilterExec processes and emits filtered records incrementally as they arrive +/// - SortExec must wait for all input records before it can emit the sorted result, +/// since it needs to see all values to determine their final order +/// +/// Left joins can emit both incrementally and finally: +/// - Incrementally emit matches as they are found +/// - Finally emit non-matches after all input is processed +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EmissionType { + /// Records are emitted incrementally as they arrive and are processed + Incremental, + /// Records are only emitted once all input has been processed + Final, + /// Records can be emitted both incrementally and as a final result + Both, } -/// Conservatively "combines" execution modes of a given collection of operators. -pub(crate) fn execution_mode_from_children<'a>( +/// Utility to determine an operator's boundedness based on its children's boundedness. +/// +/// Assumes boundedness can be inferred from child operators: +/// - Unbounded (requires_infinite_memory: true) takes precedence. +/// - Unbounded (requires_infinite_memory: false) is considered next. +/// - Otherwise, the operator is bounded. +/// +/// **Note:** This is a general-purpose utility and may not apply to +/// all multi-child operators. Ensure your operator's behavior aligns +/// with these assumptions before using. +pub(crate) fn boundedness_from_children<'a>( children: impl IntoIterator>, -) -> ExecutionMode { - let mut result = ExecutionMode::Bounded; - for mode in children.into_iter().map(|child| child.execution_mode()) { - match (mode, result) { - (ExecutionMode::PipelineBreaking, _) - | (_, ExecutionMode::PipelineBreaking) => { - // If any of the modes is `PipelineBreaking`, so is the result: - return ExecutionMode::PipelineBreaking; - } - (ExecutionMode::Unbounded, _) | (_, ExecutionMode::Unbounded) => { - // Unbounded mode eats up bounded mode: - result = ExecutionMode::Unbounded; +) -> Boundedness { + let mut unbounded_with_finite_mem = false; + + for child in children { + match child.boundedness() { + Boundedness::Unbounded { + requires_infinite_memory: true, + } => { + return Boundedness::Unbounded { + requires_infinite_memory: true, + } } - (ExecutionMode::Bounded, ExecutionMode::Bounded) => { - // When both modes are bounded, so is the result: - result = ExecutionMode::Bounded; + Boundedness::Unbounded { + requires_infinite_memory: false, + } => { + unbounded_with_finite_mem = true; } + Boundedness::Bounded => {} } } - result + + if unbounded_with_finite_mem { + Boundedness::Unbounded { + requires_infinite_memory: false, + } + } else { + Boundedness::Bounded + } +} + +/// Determines the emission type of an operator based on its children's pipeline behavior. +/// +/// The precedence of emission types is: +/// - `Final` has the highest precedence. +/// - `Both` is next: if any child emits both incremental and final results, the parent inherits this behavior unless a `Final` is present. +/// - `Incremental` is the default if all children emit incremental results. +/// +/// **Note:** This is a general-purpose utility and may not apply to +/// all multi-child operators. Verify your operator's behavior aligns +/// with these assumptions. +pub(crate) fn emission_type_from_children<'a>( + children: impl IntoIterator>, +) -> EmissionType { + let mut inc_and_final = false; + + for child in children { + match child.pipeline_behavior() { + EmissionType::Final => return EmissionType::Final, + EmissionType::Both => inc_and_final = true, + EmissionType::Incremental => continue, + } + } + + if inc_and_final { + EmissionType::Both + } else { + EmissionType::Incremental + } } /// Stores certain, often expensive to compute, plan properties used in query @@ -591,8 +662,10 @@ pub struct PlanProperties { pub eq_properties: EquivalenceProperties, /// See [ExecutionPlanProperties::output_partitioning] pub partitioning: Partitioning, - /// See [ExecutionPlanProperties::execution_mode] - pub execution_mode: ExecutionMode, + /// See [ExecutionPlanProperties::pipeline_behavior] + pub emission_type: EmissionType, + /// See [ExecutionPlanProperties::boundedness] + pub boundedness: Boundedness, /// See [ExecutionPlanProperties::output_ordering] output_ordering: Option, } @@ -602,14 +675,16 @@ impl PlanProperties { pub fn new( eq_properties: EquivalenceProperties, partitioning: Partitioning, - execution_mode: ExecutionMode, + emission_type: EmissionType, + boundedness: Boundedness, ) -> Self { // Output ordering can be derived from `eq_properties`. let output_ordering = eq_properties.output_ordering(); Self { eq_properties, partitioning, - execution_mode, + emission_type, + boundedness, output_ordering, } } @@ -620,12 +695,6 @@ impl PlanProperties { self } - /// Overwrite the execution Mode with its new value. - pub fn with_execution_mode(mut self, execution_mode: ExecutionMode) -> Self { - self.execution_mode = execution_mode; - self - } - /// Overwrite equivalence properties with its new value. pub fn with_eq_properties(mut self, eq_properties: EquivalenceProperties) -> Self { // Changing equivalence properties also changes output ordering, so @@ -635,6 +704,18 @@ impl PlanProperties { self } + /// Overwrite boundedness with its new value. + pub fn with_boundedness(mut self, boundedness: Boundedness) -> Self { + self.boundedness = boundedness; + self + } + + /// Overwrite emission type with its new value. + pub fn with_emission_type(mut self, emission_type: EmissionType) -> Self { + self.emission_type = emission_type; + self + } + pub fn equivalence_properties(&self) -> &EquivalenceProperties { &self.eq_properties } @@ -647,10 +728,6 @@ impl PlanProperties { self.output_ordering.as_ref() } - pub fn execution_mode(&self) -> ExecutionMode { - self.execution_mode - } - /// Get schema of the node. fn schema(&self) -> &SchemaRef { self.eq_properties.schema() diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs index cc42e0587151..cb00958cec4c 100644 --- a/datafusion/physical-plan/src/explain.rs +++ b/datafusion/physical-plan/src/explain.rs @@ -20,7 +20,8 @@ use std::any::Any; use std::sync::Arc; -use super::{DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream}; +use super::{DisplayAs, PlanProperties, SendableRecordBatchStream}; +use crate::execution_plan::{Boundedness, EmissionType}; use crate::stream::RecordBatchStreamAdapter; use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; @@ -74,11 +75,11 @@ impl ExplainExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Final, + Boundedness::Bounded, ) } } diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 07898e8d22d8..901907cf38fa 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -272,10 +272,12 @@ impl FilterExec { output_partitioning.project(&projection_mapping, &eq_properties); eq_properties = eq_properties.project(&projection_mapping, out_schema); } + Ok(PlanProperties::new( eq_properties, output_partitioning, - input.execution_mode(), + input.pipeline_behavior(), + input.boundedness(), )) } } diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs index ae8a2acce696..e8486403868b 100644 --- a/datafusion/physical-plan/src/insert.rs +++ b/datafusion/physical-plan/src/insert.rs @@ -23,11 +23,12 @@ use std::fmt::Debug; use std::sync::Arc; use super::{ - execute_input_stream, DisplayAs, DisplayFormatType, ExecutionPlan, - ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, + execute_input_stream, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, + PlanProperties, SendableRecordBatchStream, }; use crate::metrics::MetricsSet; use crate::stream::RecordBatchStreamAdapter; +use crate::ExecutionPlanProperties; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; @@ -140,7 +141,8 @@ impl DataSinkExec { PlanProperties::new( eq_properties, Partitioning::UnknownPartitioning(1), - input.execution_mode(), + input.pipeline_behavior(), + input.boundedness(), ) } } diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 8bf675e87362..b70eeb313b2a 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -24,11 +24,11 @@ use super::utils::{ StatefulStreamResult, }; use crate::coalesce_partitions::CoalescePartitionsExec; +use crate::execution_plan::{boundedness_from_children, EmissionType}; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::{ - execution_mode_from_children, handle_state, ColumnStatistics, DisplayAs, - DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan, - ExecutionPlanProperties, PlanProperties, RecordBatchStream, + handle_state, ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, + ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; use arrow::compute::concat_batches; @@ -161,14 +161,12 @@ impl CrossJoinExec { left.schema().fields.len(), ); - // Determine the execution mode: - let mut mode = execution_mode_from_children([left, right]); - if mode.is_unbounded() { - // If any of the inputs is unbounded, cross join breaks the pipeline. - mode = ExecutionMode::PipelineBreaking; - } - - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + EmissionType::Final, + boundedness_from_children([left, right]), + ) } } diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index 532a91da75b7..ef70392a01b7 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -29,11 +29,12 @@ use super::{ utils::{OnceAsync, OnceFut}, PartitionMode, }; +use crate::execution_plan::{boundedness_from_children, EmissionType}; use crate::ExecutionPlanProperties; use crate::{ coalesce_partitions::CoalescePartitionsExec, common::can_project, - execution_mode_from_children, handle_state, + handle_state, hash_utils::create_hashes, joins::utils::{ adjust_indices_by_join_type, apply_join_filter_to_indices, @@ -44,9 +45,8 @@ use crate::{ JoinHashMapType, JoinOn, JoinOnRef, StatefulStreamResult, }, metrics::{ExecutionPlanMetricsSet, MetricsSet}, - DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan, - Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, - Statistics, + DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, + PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; use arrow::array::{ @@ -526,24 +526,26 @@ impl HashJoinExec { } }; - // Determine execution mode by checking whether this join is pipeline - // breaking. This happens when the left side is unbounded, or the right - // side is unbounded with `Left`, `Full`, `LeftAnti` or `LeftSemi` join types. - let pipeline_breaking = left.execution_mode().is_unbounded() - || (right.execution_mode().is_unbounded() - && matches!( - join_type, - JoinType::Left - | JoinType::Full - | JoinType::LeftAnti - | JoinType::LeftSemi - | JoinType::LeftMark - )); - - let mode = if pipeline_breaking { - ExecutionMode::PipelineBreaking + let emission_type = if left.boundedness().is_unbounded() { + EmissionType::Final + } else if right.pipeline_behavior() == EmissionType::Incremental { + match join_type { + // If we only need to generate matched rows from the probe side, + // we can emit rows incrementally. + JoinType::Inner + | JoinType::LeftSemi + | JoinType::RightSemi + | JoinType::Right + | JoinType::RightAnti => EmissionType::Incremental, + // If we need to generate unmatched rows from the *build side*, + // we need to emit them at the end. + JoinType::Left + | JoinType::LeftAnti + | JoinType::LeftMark + | JoinType::Full => EmissionType::Both, + } } else { - execution_mode_from_children([left, right]) + right.pipeline_behavior() }; // If contains projection, update the PlanProperties. @@ -556,10 +558,12 @@ impl HashJoinExec { output_partitioning.project(&projection_mapping, &eq_properties); eq_properties = eq_properties.project(&projection_mapping, out_schema); } + Ok(PlanProperties::new( eq_properties, output_partitioning, - mode, + emission_type, + boundedness_from_children([left, right]), )) } } diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index d174564178df..8caf5d9b5de1 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -28,6 +28,7 @@ use super::utils::{ BatchTransformer, NoopBatchTransformer, StatefulStreamResult, }; use crate::coalesce_partitions::CoalescePartitionsExec; +use crate::execution_plan::{boundedness_from_children, EmissionType}; use crate::joins::utils::{ adjust_indices_by_join_type, apply_join_filter_to_indices, build_batch_from_indices, build_join_schema, check_join_is_valid, estimate_join_statistics, @@ -36,9 +37,9 @@ use crate::joins::utils::{ }; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::{ - execution_mode_from_children, handle_state, DisplayAs, DisplayFormatType, - Distribution, ExecutionMode, ExecutionPlan, ExecutionPlanProperties, PlanProperties, - RecordBatchStream, SendableRecordBatchStream, + handle_state, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, + ExecutionPlanProperties, PlanProperties, RecordBatchStream, + SendableRecordBatchStream, }; use arrow::array::{BooleanBufferBuilder, UInt32Array, UInt64Array}; @@ -241,14 +242,34 @@ impl NestedLoopJoinExec { let output_partitioning = asymmetric_join_output_partitioning(left, right, &join_type); - // Determine execution mode: - let mode = if left.execution_mode().is_unbounded() { - ExecutionMode::PipelineBreaking + let emission_type = if left.boundedness().is_unbounded() { + EmissionType::Final + } else if right.pipeline_behavior() == EmissionType::Incremental { + match join_type { + // If we only need to generate matched rows from the probe side, + // we can emit rows incrementally. + JoinType::Inner + | JoinType::LeftSemi + | JoinType::RightSemi + | JoinType::Right + | JoinType::RightAnti => EmissionType::Incremental, + // If we need to generate unmatched rows from the *build side*, + // we need to emit them at the end. + JoinType::Left + | JoinType::LeftAnti + | JoinType::LeftMark + | JoinType::Full => EmissionType::Both, + } } else { - execution_mode_from_children([left, right]) + right.pipeline_behavior() }; - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + emission_type, + boundedness_from_children([left, right]), + ) } /// Returns a vector indicating whether the left and right inputs maintain their order. diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 5e387409da16..f17b99d81d7b 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -53,8 +53,8 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::PhysicalExprRef; use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; -use futures::{Stream, StreamExt}; +use crate::execution_plan::{boundedness_from_children, EmissionType}; use crate::expressions::PhysicalSortExpr; use crate::joins::utils::{ build_join_schema, check_join_is_valid, estimate_join_statistics, @@ -63,11 +63,13 @@ use crate::joins::utils::{ use crate::metrics::{Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}; use crate::spill::spill_record_batches; use crate::{ - execution_mode_from_children, metrics, DisplayAs, DisplayFormatType, Distribution, - ExecutionPlan, ExecutionPlanProperties, PhysicalExpr, PlanProperties, - RecordBatchStream, SendableRecordBatchStream, Statistics, + metrics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, + ExecutionPlanProperties, PhysicalExpr, PlanProperties, RecordBatchStream, + SendableRecordBatchStream, Statistics, }; +use futures::{Stream, StreamExt}; + /// Join execution plan that executes equi-join predicates on multiple partitions using Sort-Merge /// join algorithm and applies an optional filter post join. Can be used to join arbitrarily large /// inputs where one or both of the inputs don't fit in the available memory. @@ -302,10 +304,13 @@ impl SortMergeJoinExec { let output_partitioning = symmetric_join_output_partitioning(left, right, &join_type); - // Determine execution mode: - let mode = execution_mode_from_children([left, right]); - - PlanProperties::new(eq_properties, output_partitioning, mode) + // TODO: Emission type may be incremental if the input is sorted + PlanProperties::new( + eq_properties, + output_partitioning, + EmissionType::Final, + boundedness_from_children([left, right]), + ) } } diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 94ef4d5bc34c..72fd5a0feb1a 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -33,6 +33,7 @@ use std::task::{Context, Poll}; use std::vec; use crate::common::SharedMemoryReservation; +use crate::execution_plan::{boundedness_from_children, emission_type_from_children}; use crate::joins::hash_join::{equal_rows_arr, update_hash}; use crate::joins::stream_join_utils::{ calculate_filter_expr_intervals, combine_two_batches, @@ -47,7 +48,6 @@ use crate::joins::utils::{ NoopBatchTransformer, StatefulStreamResult, }; use crate::{ - execution_mode_from_children, joins::StreamJoinPartitionMode, metrics::{ExecutionPlanMetricsSet, MetricsSet}, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, @@ -275,10 +275,12 @@ impl SymmetricHashJoinExec { let output_partitioning = symmetric_join_output_partitioning(left, right, &join_type); - // Determine execution mode: - let mode = execution_mode_from_children([left, right]); - - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + emission_type_from_children([left, right]), + boundedness_from_children([left, right]), + ) } /// left stream diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index 845a74eaea48..5ad37f0b1ac0 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -35,11 +35,10 @@ pub use datafusion_physical_expr::{ }; pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay}; -pub(crate) use crate::execution_plan::execution_mode_from_children; pub use crate::execution_plan::{ collect, collect_partitioned, displayable, execute_input_stream, execute_stream, execute_stream_partitioned, get_plan_string, with_new_children_if_necessary, - ExecutionMode, ExecutionPlan, ExecutionPlanProperties, PlanProperties, + ExecutionPlan, ExecutionPlanProperties, PlanProperties, }; pub use crate::metrics::Metric; pub use crate::ordering::InputOrderMode; diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index ab1e6cb37bc8..9665a09e42c9 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -24,9 +24,10 @@ use std::task::{Context, Poll}; use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::{ - DisplayAs, ExecutionMode, ExecutionPlanProperties, PlanProperties, RecordBatchStream, + DisplayAs, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; +use crate::execution_plan::{Boundedness, CardinalityEffect}; use crate::{DisplayFormatType, Distribution, ExecutionPlan, Partitioning}; use arrow::datatypes::SchemaRef; @@ -34,7 +35,6 @@ use arrow::record_batch::RecordBatch; use datafusion_common::{internal_err, Result}; use datafusion_execution::TaskContext; -use crate::execution_plan::CardinalityEffect; use futures::stream::{Stream, StreamExt}; use log::trace; @@ -86,7 +86,9 @@ impl GlobalLimitExec { PlanProperties::new( input.equivalence_properties().clone(), // Equivalence Properties Partitioning::UnknownPartitioning(1), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + input.pipeline_behavior(), + // Limit operations are always bounded since they output a finite number of rows + Boundedness::Bounded, ) } } @@ -242,7 +244,9 @@ impl LocalLimitExec { PlanProperties::new( input.equivalence_properties().clone(), // Equivalence Properties input.output_partitioning().clone(), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + input.pipeline_behavior(), + // Limit operations are always bounded since they output a finite number of rows + Boundedness::Bounded, ) } } diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs index bf6294f5a55b..521008ce9b02 100644 --- a/datafusion/physical-plan/src/memory.rs +++ b/datafusion/physical-plan/src/memory.rs @@ -24,9 +24,10 @@ use std::sync::Arc; use std::task::{Context, Poll}; use super::{ - common, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, - PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, + common, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, + RecordBatchStream, SendableRecordBatchStream, Statistics, }; +use crate::execution_plan::{Boundedness, EmissionType}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; @@ -285,11 +286,11 @@ impl MemoryExec { orderings: &[LexOrdering], partitions: &[Vec], ) -> PlanProperties { - let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings); PlanProperties::new( - eq_properties, // Equivalence Properties - Partitioning::UnknownPartitioning(partitions.len()), // Output Partitioning - ExecutionMode::Bounded, // Execution Mode + EquivalenceProperties::new_with_orderings(schema, orderings), + Partitioning::UnknownPartitioning(partitions.len()), + EmissionType::Incremental, + Boundedness::Bounded, ) } } @@ -393,7 +394,8 @@ impl LazyMemoryExec { let cache = PlanProperties::new( EquivalenceProperties::new(Arc::clone(&schema)), Partitioning::RoundRobinBatch(generators.len()), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Ok(Self { schema, diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs index f9437f46f8a6..355e51070f1f 100644 --- a/datafusion/physical-plan/src/placeholder_row.rs +++ b/datafusion/physical-plan/src/placeholder_row.rs @@ -20,10 +20,8 @@ use std::any::Any; use std::sync::Arc; -use super::{ - common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream, - Statistics, -}; +use super::{common, DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics}; +use crate::execution_plan::{Boundedness, EmissionType}; use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning}; use arrow::array::{ArrayRef, NullArray}; @@ -96,11 +94,12 @@ impl PlaceholderRowExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - // Get output partitioning: - let output_partitioning = Self::output_partitioning_helper(n_partitions); - - PlanProperties::new(eq_properties, output_partitioning, ExecutionMode::Bounded) + PlanProperties::new( + EquivalenceProperties::new(schema), + Self::output_partitioning_helper(n_partitions), + EmissionType::Incremental, + Boundedness::Bounded, + ) } } diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index c1d3f368366f..e37a6b0dfb85 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -132,7 +132,8 @@ impl ProjectionExec { Ok(PlanProperties::new( eq_properties, output_partitioning, - input.execution_mode(), + input.pipeline_behavior(), + input.boundedness(), )) } } diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs index 0137e5d52fea..0e49a791cbae 100644 --- a/datafusion/physical-plan/src/recursive_query.rs +++ b/datafusion/physical-plan/src/recursive_query.rs @@ -26,7 +26,8 @@ use super::{ work_table::{ReservedBatches, WorkTable, WorkTableExec}, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; -use crate::{DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan}; +use crate::execution_plan::{Boundedness, EmissionType}; +use crate::{DisplayAs, DisplayFormatType, ExecutionPlan}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; @@ -122,7 +123,8 @@ impl RecursiveQueryExec { PlanProperties::new( eq_properties, Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 0a80dcd34e05..963ccc6fd809 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -726,13 +726,11 @@ impl RepartitionExec { partitioning: Partitioning, preserve_order: bool, ) -> PlanProperties { - // Equivalence Properties - let eq_properties = Self::eq_properties_helper(input, preserve_order); - PlanProperties::new( - eq_properties, // Equivalence Properties - partitioning, // Output Partitioning - input.execution_mode(), // Execution Mode + Self::eq_properties_helper(input, preserve_order), + partitioning, + input.pipeline_behavior(), + input.boundedness(), ) } diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index 77636f9e4991..f14ba6606e89 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -201,10 +201,12 @@ impl PartialSortExec { let output_partitioning = Self::output_partitioning_helper(input, preserve_partitioning); - // Determine execution mode: - let mode = input.execution_mode(); - - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + input.pipeline_behavior(), + input.boundedness(), + ) } } diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index c2d8b093a923..8d8a5c5f7055 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -25,6 +25,7 @@ use std::fmt::{Debug, Formatter}; use std::sync::Arc; use crate::common::spawn_buffered; +use crate::execution_plan::{Boundedness, CardinalityEffect, EmissionType}; use crate::expressions::PhysicalSortExpr; use crate::limit::LimitStream; use crate::metrics::{ @@ -37,9 +38,9 @@ use crate::spill::{ use crate::stream::RecordBatchStreamAdapter; use crate::topk::TopK; use crate::{ - DisplayAs, DisplayFormatType, Distribution, EmptyRecordBatchStream, ExecutionMode, - ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, - SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, Distribution, EmptyRecordBatchStream, ExecutionPlan, + ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, + Statistics, }; use arrow::compute::{concat_batches, lexsort_to_indices, take_arrays, SortColumn}; @@ -56,7 +57,6 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::LexOrdering; use datafusion_physical_expr_common::sort_expr::LexRequirement; -use crate::execution_plan::CardinalityEffect; use futures::{StreamExt, TryStreamExt}; use log::{debug, trace}; @@ -763,11 +763,15 @@ impl SortExec { /// can be dropped. pub fn with_fetch(&self, fetch: Option) -> Self { let mut cache = self.cache.clone(); - if fetch.is_some() && self.cache.execution_mode == ExecutionMode::Unbounded { - // When a theoretically unnecessary sort becomes a top-K (which - // sometimes arises as an intermediate state before full removal), - // its execution mode should become `Bounded`. - cache.execution_mode = ExecutionMode::Bounded; + // If the SortExec can emit incrementally (that means the sort requirements + // and properties of the input match), the SortExec can generate its result + // without scanning the entire input when a fetch value exists. + let is_pipeline_friendly = matches!( + self.cache.emission_type, + EmissionType::Incremental | EmissionType::Both + ); + if fetch.is_some() && is_pipeline_friendly { + cache = cache.with_boundedness(Boundedness::Bounded); } SortExec { input: Arc::clone(&self.input), @@ -817,10 +821,30 @@ impl SortExec { let sort_satisfied = input .equivalence_properties() .ordering_satisfy_requirement(&requirement); - let mode = match input.execution_mode() { - ExecutionMode::Unbounded if sort_satisfied => ExecutionMode::Unbounded, - ExecutionMode::Bounded => ExecutionMode::Bounded, - _ => ExecutionMode::PipelineBreaking, + + // The emission type depends on whether the input is already sorted: + // - If already sorted, we can emit results in the same way as the input + // - If not sorted, we must wait until all data is processed to emit results (Final) + let emission_type = if sort_satisfied { + input.pipeline_behavior() + } else { + EmissionType::Final + }; + + // The boundedness depends on whether the input is already sorted: + // - If already sorted, we have the same property as the input + // - If not sorted and input is unbounded, we require infinite memory and generates + // unbounded data (not practical). + // - If not sorted and input is bounded, then the SortExec is bounded, too. + let boundedness = if sort_satisfied { + input.boundedness() + } else { + match input.boundedness() { + Boundedness::Unbounded { .. } => Boundedness::Unbounded { + requires_infinite_memory: true, + }, + bounded => bounded, + } }; // Calculate equivalence properties; i.e. reset the ordering equivalence @@ -835,7 +859,12 @@ impl SortExec { let output_partitioning = Self::output_partitioning_helper(input, preserve_partitioning); - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + emission_type, + boundedness, + ) } } @@ -1006,6 +1035,7 @@ mod tests { use super::*; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::collect; + use crate::execution_plan::Boundedness; use crate::expressions::col; use crate::memory::MemoryExec; use crate::test; @@ -1049,8 +1079,14 @@ mod tests { eq_properties.add_new_orderings(vec![LexOrdering::new(vec![ PhysicalSortExpr::new_default(Arc::new(Column::new("c1", 0))), ])]); - let mode = ExecutionMode::Unbounded; - PlanProperties::new(eq_properties, Partitioning::UnknownPartitioning(1), mode) + PlanProperties::new( + eq_properties, + Partitioning::UnknownPartitioning(1), + EmissionType::Final, + Boundedness::Unbounded { + requires_infinite_memory: false, + }, + ) } } diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index 906164f21b8c..21597fb85662 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -139,7 +139,8 @@ impl SortPreservingMergeExec { PlanProperties::new( eq_properties, // Equivalence Properties Partitioning::UnknownPartitioning(1), // Output Partitioning - input.execution_mode(), // Execution Mode + input.pipeline_behavior(), // Pipeline Behavior + input.boundedness(), // Boundedness ) } } @@ -323,6 +324,7 @@ mod tests { use super::*; use crate::coalesce_batches::CoalesceBatchesExec; use crate::coalesce_partitions::CoalescePartitionsExec; + use crate::execution_plan::{Boundedness, EmissionType}; use crate::expressions::col; use crate::memory::MemoryExec; use crate::metrics::{MetricValue, Timestamp}; @@ -331,7 +333,7 @@ mod tests { use crate::stream::RecordBatchReceiverStream; use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; use crate::test::{self, assert_is_pending, make_partition}; - use crate::{collect, common, ExecutionMode}; + use crate::{collect, common}; use arrow::array::{ArrayRef, Int32Array, StringArray, TimestampNanosecondArray}; use arrow::compute::SortOptions; @@ -1268,8 +1270,14 @@ mod tests { .iter() .map(|expr| PhysicalSortExpr::new_default(Arc::clone(expr))) .collect::()]); - let mode = ExecutionMode::Unbounded; - PlanProperties::new(eq_properties, Partitioning::Hash(columns, 3), mode) + PlanProperties::new( + eq_properties, + Partitioning::Hash(columns, 3), + EmissionType::Incremental, + Boundedness::Unbounded { + requires_infinite_memory: false, + }, + ) } } diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index 7ccef3248069..da8b0e877dcc 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -21,8 +21,9 @@ use std::any::Any; use std::fmt::Debug; use std::sync::Arc; -use super::{DisplayAs, DisplayFormatType, ExecutionMode, PlanProperties}; +use super::{DisplayAs, DisplayFormatType, PlanProperties}; use crate::display::{display_orderings, ProjectSchemaDisplay}; +use crate::execution_plan::{Boundedness, EmissionType}; use crate::stream::RecordBatchStreamAdapter; use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream}; @@ -145,22 +146,26 @@ impl StreamingTableExec { schema: SchemaRef, orderings: &[LexOrdering], partitions: &[Arc], - is_infinite: bool, + infinite: bool, ) -> PlanProperties { // Calculate equivalence properties: let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings); // Get output partitioning: let output_partitioning = Partitioning::UnknownPartitioning(partitions.len()); - - // Determine execution mode: - let mode = if is_infinite { - ExecutionMode::Unbounded + let boundedness = if infinite { + Boundedness::Unbounded { + requires_infinite_memory: false, + } } else { - ExecutionMode::Bounded + Boundedness::Bounded }; - - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + EmissionType::Incremental, + boundedness, + ) } } diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs index cc0a7cbd9b52..b31a53e55e88 100644 --- a/datafusion/physical-plan/src/test/exec.rs +++ b/datafusion/physical-plan/src/test/exec.rs @@ -24,10 +24,14 @@ use std::{ task::{Context, Poll}, }; -use crate::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter}; use crate::{ - common, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, - PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, + common, execution_plan::Boundedness, DisplayAs, DisplayFormatType, ExecutionPlan, + Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, + Statistics, +}; +use crate::{ + execution_plan::EmissionType, + stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter}, }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; @@ -152,12 +156,11 @@ impl MockExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } @@ -315,11 +318,11 @@ impl BarrierExec { schema: SchemaRef, data: &[Vec], ) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(data.len()), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } @@ -427,12 +430,11 @@ impl ErrorExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } @@ -509,12 +511,11 @@ impl StatisticsExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(2), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } @@ -610,12 +611,11 @@ impl BlockingExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(n_partitions), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } @@ -752,13 +752,12 @@ impl PanicExec { schema: SchemaRef, batches_until_panics: &[usize], ) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); let num_partitions = batches_until_panics.len(); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(num_partitions), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index bd36753880eb..6e768a3d87bc 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -27,12 +27,12 @@ use std::task::{Context, Poll}; use std::{any::Any, sync::Arc}; use super::{ - execution_mode_from_children, metrics::{ExecutionPlanMetricsSet, MetricsSet}, ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; +use crate::execution_plan::{boundedness_from_children, emission_type_from_children}; use crate::metrics::BaselineMetrics; use crate::stream::ObservedStream; @@ -135,14 +135,11 @@ impl UnionExec { .map(|plan| plan.output_partitioning().partition_count()) .sum(); let output_partitioning = Partitioning::UnknownPartitioning(num_partitions); - - // Determine execution mode: - let mode = execution_mode_from_children(inputs.iter()); - Ok(PlanProperties::new( eq_properties, output_partitioning, - mode, + emission_type_from_children(inputs), + boundedness_from_children(inputs), )) } } @@ -335,10 +332,12 @@ impl InterleaveExec { let eq_properties = EquivalenceProperties::new(schema); // Get output partitioning: let output_partitioning = inputs[0].output_partitioning().clone(); - // Determine execution mode: - let mode = execution_mode_from_children(inputs.iter()); - - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + emission_type_from_children(inputs), + boundedness_from_children(inputs), + ) } } diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 19b1b46953d8..19a090ca284f 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -100,12 +100,11 @@ impl UnnestExec { input: &Arc, schema: SchemaRef, ) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, - input.output_partitioning().clone(), - input.execution_mode(), + EquivalenceProperties::new(schema), + input.output_partitioning().to_owned(), + input.pipeline_behavior(), + input.boundedness(), ) } diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs index edadf98cb10c..5089b1e626d4 100644 --- a/datafusion/physical-plan/src/values.rs +++ b/datafusion/physical-plan/src/values.rs @@ -20,10 +20,8 @@ use std::any::Any; use std::sync::Arc; -use super::{ - common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream, - Statistics, -}; +use super::{common, DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics}; +use crate::execution_plan::{Boundedness, EmissionType}; use crate::{ memory::MemoryStream, ColumnarValue, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr, @@ -133,12 +131,11 @@ impl ValuesExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index c6003fe0a82f..b66147bf7439 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -202,9 +202,11 @@ impl BoundedWindowAggExec { // Construct properties cache PlanProperties::new( - eq_properties, // Equivalence Properties - output_partitioning, // Output Partitioning - input.execution_mode(), // Execution Mode + eq_properties, + output_partitioning, + // TODO: Emission type and boundedness information can be enhanced here + input.pipeline_behavior(), + input.boundedness(), ) } } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 222a8bb71a02..36c4b9f18da9 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -383,7 +383,7 @@ pub fn get_best_fitting_window( } else { return Ok(None); }; - let is_unbounded = input.execution_mode().is_unbounded(); + let is_unbounded = input.boundedness().is_unbounded(); if !is_unbounded && input_order_mode != InputOrderMode::Sorted { // Executor has bounded input and `input_order_mode` is not `InputOrderMode::Sorted` // in this case removing the sort is not helpful, return: diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index c0ac96d22ef7..b132c3247072 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -23,15 +23,16 @@ use std::sync::Arc; use std::task::{Context, Poll}; use super::utils::create_schema; +use crate::execution_plan::EmissionType; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::windows::{ calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, window_equivalence_properties, }; use crate::{ - ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionMode, - ExecutionPlan, ExecutionPlanProperties, PhysicalExpr, PlanProperties, - RecordBatchStream, SendableRecordBatchStream, Statistics, WindowExpr, + ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, + ExecutionPlanProperties, PhysicalExpr, PlanProperties, RecordBatchStream, + SendableRecordBatchStream, Statistics, WindowExpr, }; use arrow::array::ArrayRef; use arrow::compute::{concat, concat_batches}; @@ -127,16 +128,14 @@ impl WindowAggExec { // would be either 1 or more than 1 depending on the presence of repartitioning. let output_partitioning = input.output_partitioning().clone(); - // Determine execution mode: - let mode = match input.execution_mode() { - ExecutionMode::Bounded => ExecutionMode::Bounded, - ExecutionMode::Unbounded | ExecutionMode::PipelineBreaking => { - ExecutionMode::PipelineBreaking - } - }; - // Construct properties cache: - PlanProperties::new(eq_properties, output_partitioning, mode) + PlanProperties::new( + eq_properties, + output_partitioning, + // TODO: Emission type and boundedness information can be enhanced here + EmissionType::Final, + input.boundedness(), + ) } } diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs index add386319253..b1dd4d9308f4 100644 --- a/datafusion/physical-plan/src/work_table.rs +++ b/datafusion/physical-plan/src/work_table.rs @@ -24,8 +24,9 @@ use super::{ metrics::{ExecutionPlanMetricsSet, MetricsSet}, SendableRecordBatchStream, Statistics, }; +use crate::execution_plan::{Boundedness, EmissionType}; use crate::memory::MemoryStream; -use crate::{DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties}; +use crate::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; @@ -142,12 +143,11 @@ impl WorkTableExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { - let eq_properties = EquivalenceProperties::new(schema); - PlanProperties::new( - eq_properties, + EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ) } } From b0d7cd0e98751a3d11a9d6c09bacb54813119d25 Mon Sep 17 00:00:00 2001 From: Goksel Kabadayi <45314116+gokselk@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:51:18 +0300 Subject: [PATCH 3/9] Preserve ordering equivalencies on `with_reorder` (#13770) * Preserve ordering equivalencies on `with_reorder` * Add assertions * Return early if filtered_exprs is empty * Add clarify comment * Refactor * Add comprehensive test case * Add comment for exprs_equal * Cargo fmt * Clippy fix * Update properties.rs * Update exprs_equal and add tests * Update properties.rs --------- Co-authored-by: berkaysynnada --- .../physical-expr/src/equivalence/class.rs | 211 ++++++++++++- .../src/equivalence/properties.rs | 287 +++++++++++++++++- 2 files changed, 494 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index cc26d12fb029..03b3c7761ac6 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -626,6 +626,59 @@ impl EquivalenceGroup { JoinType::RightSemi | JoinType::RightAnti => right_equivalences.clone(), } } + + /// Checks if two expressions are equal either directly or through equivalence classes. + /// For complex expressions (e.g. a + b), checks that the expression trees are structurally + /// identical and their leaf nodes are equivalent either directly or through equivalence classes. + pub fn exprs_equal( + &self, + left: &Arc, + right: &Arc, + ) -> bool { + // Direct equality check + if left.eq(right) { + return true; + } + + // Check if expressions are equivalent through equivalence classes + // We need to check both directions since expressions might be in different classes + if let Some(left_class) = self.get_equivalence_class(left) { + if left_class.contains(right) { + return true; + } + } + if let Some(right_class) = self.get_equivalence_class(right) { + if right_class.contains(left) { + return true; + } + } + + // For non-leaf nodes, check structural equality + let left_children = left.children(); + let right_children = right.children(); + + // If either expression is a leaf node and we haven't found equality yet, + // they must be different + if left_children.is_empty() || right_children.is_empty() { + return false; + } + + // Type equality check through reflection + if left.as_any().type_id() != right.as_any().type_id() { + return false; + } + + // Check if the number of children is the same + if left_children.len() != right_children.len() { + return false; + } + + // Check if all children are equal + left_children + .into_iter() + .zip(right_children) + .all(|(left_child, right_child)| self.exprs_equal(left_child, right_child)) + } } impl Display for EquivalenceGroup { @@ -647,9 +700,10 @@ mod tests { use super::*; use crate::equivalence::tests::create_test_params; - use crate::expressions::{lit, Literal}; + use crate::expressions::{lit, BinaryExpr, Literal}; use datafusion_common::{Result, ScalarValue}; + use datafusion_expr::Operator; #[test] fn test_bridge_groups() -> Result<()> { @@ -777,4 +831,159 @@ mod tests { assert!(!cls1.contains_any(&cls3)); assert!(!cls2.contains_any(&cls3)); } + + #[test] + fn test_exprs_equal() -> Result<()> { + struct TestCase { + left: Arc, + right: Arc, + expected: bool, + description: &'static str, + } + + // Create test columns + let col_a = Arc::new(Column::new("a", 0)) as Arc; + let col_b = Arc::new(Column::new("b", 1)) as Arc; + let col_x = Arc::new(Column::new("x", 2)) as Arc; + let col_y = Arc::new(Column::new("y", 3)) as Arc; + + // Create test literals + let lit_1 = + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; + let lit_2 = + Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) as Arc; + + // Create equivalence group with classes (a = x) and (b = y) + let eq_group = EquivalenceGroup::new(vec![ + EquivalenceClass::new(vec![Arc::clone(&col_a), Arc::clone(&col_x)]), + EquivalenceClass::new(vec![Arc::clone(&col_b), Arc::clone(&col_y)]), + ]); + + let test_cases = vec![ + // Basic equality tests + TestCase { + left: Arc::clone(&col_a), + right: Arc::clone(&col_a), + expected: true, + description: "Same column should be equal", + }, + // Equivalence class tests + TestCase { + left: Arc::clone(&col_a), + right: Arc::clone(&col_x), + expected: true, + description: "Columns in same equivalence class should be equal", + }, + TestCase { + left: Arc::clone(&col_b), + right: Arc::clone(&col_y), + expected: true, + description: "Columns in same equivalence class should be equal", + }, + TestCase { + left: Arc::clone(&col_a), + right: Arc::clone(&col_b), + expected: false, + description: + "Columns in different equivalence classes should not be equal", + }, + // Literal tests + TestCase { + left: Arc::clone(&lit_1), + right: Arc::clone(&lit_1), + expected: true, + description: "Same literal should be equal", + }, + TestCase { + left: Arc::clone(&lit_1), + right: Arc::clone(&lit_2), + expected: false, + description: "Different literals should not be equal", + }, + // Complex expression tests + TestCase { + left: Arc::new(BinaryExpr::new( + Arc::clone(&col_a), + Operator::Plus, + Arc::clone(&col_b), + )) as Arc, + right: Arc::new(BinaryExpr::new( + Arc::clone(&col_x), + Operator::Plus, + Arc::clone(&col_y), + )) as Arc, + expected: true, + description: + "Binary expressions with equivalent operands should be equal", + }, + TestCase { + left: Arc::new(BinaryExpr::new( + Arc::clone(&col_a), + Operator::Plus, + Arc::clone(&col_b), + )) as Arc, + right: Arc::new(BinaryExpr::new( + Arc::clone(&col_x), + Operator::Plus, + Arc::clone(&col_a), + )) as Arc, + expected: false, + description: + "Binary expressions with non-equivalent operands should not be equal", + }, + TestCase { + left: Arc::new(BinaryExpr::new( + Arc::clone(&col_a), + Operator::Plus, + Arc::clone(&lit_1), + )) as Arc, + right: Arc::new(BinaryExpr::new( + Arc::clone(&col_x), + Operator::Plus, + Arc::clone(&lit_1), + )) as Arc, + expected: true, + description: "Binary expressions with equivalent column and same literal should be equal", + }, + TestCase { + left: Arc::new(BinaryExpr::new( + Arc::new(BinaryExpr::new( + Arc::clone(&col_a), + Operator::Plus, + Arc::clone(&col_b), + )), + Operator::Multiply, + Arc::clone(&lit_1), + )) as Arc, + right: Arc::new(BinaryExpr::new( + Arc::new(BinaryExpr::new( + Arc::clone(&col_x), + Operator::Plus, + Arc::clone(&col_y), + )), + Operator::Multiply, + Arc::clone(&lit_1), + )) as Arc, + expected: true, + description: "Nested binary expressions with equivalent operands should be equal", + }, + ]; + + for TestCase { + left, + right, + expected, + description, + } in test_cases + { + let actual = eq_group.exprs_equal(&left, &right); + assert_eq!( + actual, expected, + "{}: Failed comparing {:?} and {:?}, expected {}, got {}", + description, left, right, expected, actual + ); + } + + Ok(()) + } } diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index d4814fb4d780..f019b2e570ff 100755 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -use std::fmt; use std::fmt::Display; use std::hash::{Hash, Hasher}; use std::iter::Peekable; use std::slice::Iter; use std::sync::Arc; +use std::{fmt, mem}; use super::ordering::collapse_lex_ordering; use crate::equivalence::class::const_exprs_contains; @@ -412,12 +412,51 @@ impl EquivalenceProperties { /// Updates the ordering equivalence group within assuming that the table /// is re-sorted according to the argument `sort_exprs`. Note that constants /// and equivalence classes are unchanged as they are unaffected by a re-sort. + /// If the given ordering is already satisfied, the function does nothing. pub fn with_reorder(mut self, sort_exprs: LexOrdering) -> Self { - // TODO: In some cases, existing ordering equivalences may still be valid add this analysis. - self.oeq_class = OrderingEquivalenceClass::new(vec![sort_exprs]); + // Filter out constant expressions as they don't affect ordering + let filtered_exprs = LexOrdering::new( + sort_exprs + .into_iter() + .filter(|expr| !self.is_expr_constant(&expr.expr)) + .collect(), + ); + + if filtered_exprs.is_empty() { + return self; + } + + let mut new_orderings = vec![filtered_exprs.clone()]; + + // Preserve valid suffixes from existing orderings + let orderings = mem::take(&mut self.oeq_class.orderings); + for existing in orderings { + if self.is_prefix_of(&filtered_exprs, &existing) { + let mut extended = filtered_exprs.clone(); + extended.extend(existing.into_iter().skip(filtered_exprs.len())); + new_orderings.push(extended); + } + } + + self.oeq_class = OrderingEquivalenceClass::new(new_orderings); self } + /// Checks if the new ordering matches a prefix of the existing ordering + /// (considering expression equivalences) + fn is_prefix_of(&self, new_order: &LexOrdering, existing: &LexOrdering) -> bool { + // Check if new order is longer than existing - can't be a prefix + if new_order.len() > existing.len() { + return false; + } + + // Check if new order matches existing prefix (considering equivalences) + new_order.iter().zip(existing).all(|(new, existing)| { + self.eq_group.exprs_equal(&new.expr, &existing.expr) + && new.options == existing.options + }) + } + /// Normalizes the given sort expressions (i.e. `sort_exprs`) using the /// equivalence group and the ordering equivalence class within. /// @@ -3852,4 +3891,246 @@ mod tests { Ok(()) } + + #[test] + fn test_with_reorder_constant_filtering() -> Result<()> { + let schema = create_test_schema()?; + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + // Setup constant columns + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + eq_properties = eq_properties.with_constants([ConstExpr::from(&col_a)]); + + let sort_exprs = LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::clone(&col_a), + options: SortOptions::default(), + }, + PhysicalSortExpr { + expr: Arc::clone(&col_b), + options: SortOptions::default(), + }, + ]); + + let result = eq_properties.with_reorder(sort_exprs); + + // Should only contain b since a is constant + assert_eq!(result.oeq_class().len(), 1); + assert_eq!(result.oeq_class().orderings[0].len(), 1); + assert!(result.oeq_class().orderings[0][0].expr.eq(&col_b)); + + Ok(()) + } + + #[test] + fn test_with_reorder_preserve_suffix() -> Result<()> { + let schema = create_test_schema()?; + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + let col_c = col("c", &schema)?; + + let asc = SortOptions::default(); + let desc = SortOptions { + descending: true, + nulls_first: true, + }; + + // Initial ordering: [a ASC, b DESC, c ASC] + eq_properties.add_new_orderings([LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::clone(&col_a), + options: asc, + }, + PhysicalSortExpr { + expr: Arc::clone(&col_b), + options: desc, + }, + PhysicalSortExpr { + expr: Arc::clone(&col_c), + options: asc, + }, + ])]); + + // New ordering: [a ASC] + let new_order = LexOrdering::new(vec![PhysicalSortExpr { + expr: Arc::clone(&col_a), + options: asc, + }]); + + let result = eq_properties.with_reorder(new_order); + + // Should only contain [a ASC, b DESC, c ASC] + assert_eq!(result.oeq_class().len(), 1); + assert_eq!(result.oeq_class().orderings[0].len(), 3); + assert!(result.oeq_class().orderings[0][0].expr.eq(&col_a)); + assert!(result.oeq_class().orderings[0][0].options.eq(&asc)); + assert!(result.oeq_class().orderings[0][1].expr.eq(&col_b)); + assert!(result.oeq_class().orderings[0][1].options.eq(&desc)); + assert!(result.oeq_class().orderings[0][2].expr.eq(&col_c)); + assert!(result.oeq_class().orderings[0][2].options.eq(&asc)); + + Ok(()) + } + + #[test] + fn test_with_reorder_equivalent_expressions() -> Result<()> { + let schema = create_test_schema()?; + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + let col_c = col("c", &schema)?; + + // Make a and b equivalent + eq_properties.add_equal_conditions(&col_a, &col_b)?; + + let asc = SortOptions::default(); + + // Initial ordering: [a ASC, c ASC] + eq_properties.add_new_orderings([LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::clone(&col_a), + options: asc, + }, + PhysicalSortExpr { + expr: Arc::clone(&col_c), + options: asc, + }, + ])]); + + // New ordering: [b ASC] + let new_order = LexOrdering::new(vec![PhysicalSortExpr { + expr: Arc::clone(&col_b), + options: asc, + }]); + + let result = eq_properties.with_reorder(new_order); + + // Should only contain [b ASC, c ASC] + assert_eq!(result.oeq_class().len(), 1); + + // Verify orderings + assert_eq!(result.oeq_class().orderings[0].len(), 2); + assert!(result.oeq_class().orderings[0][0].expr.eq(&col_b)); + assert!(result.oeq_class().orderings[0][0].options.eq(&asc)); + assert!(result.oeq_class().orderings[0][1].expr.eq(&col_c)); + assert!(result.oeq_class().orderings[0][1].options.eq(&asc)); + + Ok(()) + } + + #[test] + fn test_with_reorder_incompatible_prefix() -> Result<()> { + let schema = create_test_schema()?; + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + + let asc = SortOptions::default(); + let desc = SortOptions { + descending: true, + nulls_first: true, + }; + + // Initial ordering: [a ASC, b DESC] + eq_properties.add_new_orderings([LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::clone(&col_a), + options: asc, + }, + PhysicalSortExpr { + expr: Arc::clone(&col_b), + options: desc, + }, + ])]); + + // New ordering: [a DESC] + let new_order = LexOrdering::new(vec![PhysicalSortExpr { + expr: Arc::clone(&col_a), + options: desc, + }]); + + let result = eq_properties.with_reorder(new_order.clone()); + + // Should only contain the new ordering since options don't match + assert_eq!(result.oeq_class().len(), 1); + assert_eq!(result.oeq_class().orderings[0], new_order); + + Ok(()) + } + + #[test] + fn test_with_reorder_comprehensive() -> Result<()> { + let schema = create_test_schema()?; + let mut eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); + + let col_a = col("a", &schema)?; + let col_b = col("b", &schema)?; + let col_c = col("c", &schema)?; + let col_d = col("d", &schema)?; + let col_e = col("e", &schema)?; + + let asc = SortOptions::default(); + + // Constants: c is constant + eq_properties = eq_properties.with_constants([ConstExpr::from(&col_c)]); + + // Equality: b = d + eq_properties.add_equal_conditions(&col_b, &col_d)?; + + // Orderings: [d ASC, a ASC], [e ASC] + eq_properties.add_new_orderings([ + LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::clone(&col_d), + options: asc, + }, + PhysicalSortExpr { + expr: Arc::clone(&col_a), + options: asc, + }, + ]), + LexOrdering::new(vec![PhysicalSortExpr { + expr: Arc::clone(&col_e), + options: asc, + }]), + ]); + + // Initial ordering: [b ASC, c ASC] + let new_order = LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::clone(&col_b), + options: asc, + }, + PhysicalSortExpr { + expr: Arc::clone(&col_c), + options: asc, + }, + ]); + + let result = eq_properties.with_reorder(new_order); + + // Should preserve the original [d ASC, a ASC] ordering + assert_eq!(result.oeq_class().len(), 1); + let ordering = &result.oeq_class().orderings[0]; + assert_eq!(ordering.len(), 2); + + // First expression should be either b or d (they're equivalent) + assert!( + ordering[0].expr.eq(&col_b) || ordering[0].expr.eq(&col_d), + "Expected b or d as first expression, got {:?}", + ordering[0].expr + ); + assert!(ordering[0].options.eq(&asc)); + + // Second expression should be a + assert!(ordering[1].expr.eq(&col_a)); + assert!(ordering[1].options.eq(&asc)); + + Ok(()) + } } From 079f6219fed8eb8812b028257c031dfdfba96cb4 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Fri, 20 Dec 2024 07:40:48 -0600 Subject: [PATCH 4/9] replace CASE expressions in predicate pruning with boolean algebra (#13795) * replace CASE expressions in predicate pruning with boolean algebra * fix merge * update tests * add some more tests * add some more tests * remove duplicate test case * Update datafusion/physical-optimizer/src/pruning.rs * swap NOT for != * replace comments, update docstrings * fix example * update tests * update tests * Apply suggestions from code review Co-authored-by: Andrew Lamb * Update pruning.rs Co-authored-by: Chunchun Ye <14298407+appletreeisyellow@users.noreply.github.com> * Update pruning.rs Co-authored-by: Chunchun Ye <14298407+appletreeisyellow@users.noreply.github.com> --------- Co-authored-by: Andrew Lamb Co-authored-by: Chunchun Ye <14298407+appletreeisyellow@users.noreply.github.com> --- .../datasource/physical_plan/parquet/mod.rs | 2 +- datafusion/physical-optimizer/src/pruning.rs | 325 ++++++++++-------- .../test_files/parquet_filter_pushdown.slt | 8 +- .../test_files/repartition_scan.slt | 8 +- datafusion/sqllogictest/test_files/window.slt | 2 +- 5 files changed, 184 insertions(+), 161 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs index cb79055ce301..7573e32f8652 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs @@ -2001,7 +2001,7 @@ mod tests { assert_contains!( &display, - "pruning_predicate=CASE WHEN c1_null_count@2 = c1_row_count@3 THEN false ELSE c1_min@0 != bar OR bar != c1_max@1 END" + "pruning_predicate=c1_null_count@2 != c1_row_count@3 AND (c1_min@0 != bar OR bar != c1_max@1)" ); assert_contains!(&display, r#"predicate=c1@0 != bar"#); diff --git a/datafusion/physical-optimizer/src/pruning.rs b/datafusion/physical-optimizer/src/pruning.rs index 3cfb03b7205a..77fc76c35352 100644 --- a/datafusion/physical-optimizer/src/pruning.rs +++ b/datafusion/physical-optimizer/src/pruning.rs @@ -287,7 +287,12 @@ pub trait PruningStatistics { /// predicate can never possibly be true). The container can be pruned (skipped) /// entirely. /// -/// Note that in order to be correct, `PruningPredicate` must return false +/// While `PruningPredicate` will never return a `NULL` value, the +/// rewritten predicate (as returned by `build_predicate_expression` and used internally +/// by `PruningPredicate`) may evaluate to `NULL` when some of the min/max values +/// or null / row counts are not known. +/// +/// In order to be correct, `PruningPredicate` must return false /// **only** if it can determine that for all rows in the container, the /// predicate could never evaluate to `true` (always evaluates to either `NULL` /// or `false`). @@ -327,12 +332,12 @@ pub trait PruningStatistics { /// /// Original Predicate | Rewritten Predicate /// ------------------ | -------------------- -/// `x = 5` | `CASE WHEN x_null_count = x_row_count THEN false ELSE x_min <= 5 AND 5 <= x_max END` -/// `x < 5` | `CASE WHEN x_null_count = x_row_count THEN false ELSE x_max < 5 END` -/// `x = 5 AND y = 10` | `CASE WHEN x_null_count = x_row_count THEN false ELSE x_min <= 5 AND 5 <= x_max END AND CASE WHEN y_null_count = y_row_count THEN false ELSE y_min <= 10 AND 10 <= y_max END` +/// `x = 5` | `x_null_count != x_row_count AND (x_min <= 5 AND 5 <= x_max)` +/// `x < 5` | `x_null_count != x_row_count THEN false (x_max < 5)` +/// `x = 5 AND y = 10` | `x_null_count != x_row_count AND (x_min <= 5 AND 5 <= x_max) AND y_null_count != y_row_count (y_min <= 10 AND 10 <= y_max)` /// `x IS NULL` | `x_null_count > 0` /// `x IS NOT NULL` | `x_null_count != row_count` -/// `CAST(x as int) = 5` | `CASE WHEN x_null_count = x_row_count THEN false ELSE CAST(x_min as int) <= 5 AND 5 <= CAST(x_max as int) END` +/// `CAST(x as int) = 5` | `x_null_count != x_row_count (CAST(x_min as int) <= 5 AND 5 <= CAST(x_max as int))` /// /// ## Predicate Evaluation /// The PruningPredicate works in two passes @@ -352,15 +357,9 @@ pub trait PruningStatistics { /// Given the predicate, `x = 5 AND y = 10`, the rewritten predicate would look like: /// /// ```sql -/// CASE -/// WHEN x_null_count = x_row_count THEN false -/// ELSE x_min <= 5 AND 5 <= x_max -/// END +/// x_null_count != x_row_count AND (x_min <= 5 AND 5 <= x_max) /// AND -/// CASE -/// WHEN y_null_count = y_row_count THEN false -/// ELSE y_min <= 10 AND 10 <= y_max -/// END +/// y_null_count != y_row_count AND (y_min <= 10 AND 10 <= y_max) /// ``` /// /// If we know that for a given container, `x` is between `1 and 100` and we know that @@ -381,16 +380,22 @@ pub trait PruningStatistics { /// When these statistics values are substituted in to the rewritten predicate and /// simplified, the result is `false`: /// -/// * `CASE WHEN null = null THEN false ELSE 1 <= 5 AND 5 <= 100 END AND CASE WHEN null = null THEN false ELSE 4 <= 10 AND 10 <= 7 END` -/// * `null = null` is `null` which is not true, so the `CASE` expression will use the `ELSE` clause -/// * `1 <= 5 AND 5 <= 100 AND 4 <= 10 AND 10 <= 7` -/// * `true AND true AND true AND false` +/// * `null != null AND (1 <= 5 AND 5 <= 100) AND null != null AND (4 <= 10 AND 10 <= 7)` +/// * `null = null` is `null` which is not true, so the AND moves on to the next clause +/// * `null and (1 <= 5 AND 5 <= 100) AND null AND (4 <= 10 AND 10 <= 7)` +/// * evaluating the clauses further we get: +/// * `null and true and null and false` +/// * `null and false` /// * `false` /// /// Returning `false` means the container can be pruned, which matches the /// intuition that `x = 5 AND y = 10` can’t be true for any row if all values of `y` /// are `7` or less. /// +/// Note that if we had ended up with `null AND true AND null AND true` the result +/// would have been `null`. +/// `null` is treated the same as`true`, because we can't prove that the predicate is `false.` +/// /// If, for some other container, we knew `y` was between the values `4` and /// `15`, then the rewritten predicate evaluates to `true` (verifying this is /// left as an exercise to the reader -- are you still here?), and the container @@ -405,15 +410,9 @@ pub trait PruningStatistics { /// look like the same as example 1: /// /// ```sql -/// CASE -/// WHEN x_null_count = x_row_count THEN false -/// ELSE x_min <= 5 AND 5 <= x_max -/// END +/// x_null_count != x_row_count AND (x_min <= 5 AND 5 <= x_max) /// AND -/// CASE -/// WHEN y_null_count = y_row_count THEN false -/// ELSE y_min <= 10 AND 10 <= y_max -/// END +/// y_null_count != y_row_count AND (y_min <= 10 AND 10 <= y_max) /// ``` /// /// If we know that for another given container, `x_min` is NULL and `x_max` is @@ -435,14 +434,13 @@ pub trait PruningStatistics { /// When these statistics values are substituted in to the rewritten predicate and /// simplified, the result is `false`: /// -/// * `CASE WHEN 100 = 100 THEN false ELSE null <= 5 AND 5 <= null END AND CASE WHEN null = null THEN false ELSE 4 <= 10 AND 10 <= 7 END` -/// * Since `100 = 100` is `true`, the `CASE` expression will use the `THEN` clause, i.e. `false` -/// * The other `CASE` expression will use the `ELSE` clause, i.e. `4 <= 10 AND 10 <= 7` -/// * `false AND true` +/// * `100 != 100 AND (null <= 5 AND 5 <= null) AND null = null AND (4 <= 10 AND 10 <= 7)` +/// * `false AND null AND null AND false` +/// * `false AND false` /// * `false` /// /// Returning `false` means the container can be pruned, which matches the -/// intuition that `x = 5 AND y = 10` can’t be true for all values in `x` +/// intuition that `x = 5 AND y = 10` can’t be true because all values in `x` /// are known to be NULL. /// /// # Related Work @@ -1603,13 +1601,15 @@ fn build_statistics_expr( ); } }; - let statistics_expr = wrap_case_expr(statistics_expr, expr_builder)?; + let statistics_expr = wrap_null_count_check_expr(statistics_expr, expr_builder)?; Ok(statistics_expr) } -/// Wrap the statistics expression in a case expression. -/// This is necessary to handle the case where the column is known -/// to be all nulls. +/// Wrap the statistics expression in a check that skips the expression if the column is all nulls. +/// This is important not only as an optimization but also because statistics may not be +/// accurate for columns that are all nulls. +/// For example, for an `int` column `x` with all nulls, the min/max/null_count statistics +/// might be set to 0 and evaluating `x = 0` would incorrectly include the column. /// /// For example: /// @@ -1618,33 +1618,29 @@ fn build_statistics_expr( /// will become /// /// ```sql -/// CASE -/// WHEN x_null_count = x_row_count THEN false -/// ELSE x_min <= 10 AND 10 <= x_max -/// END +/// x_null_count != x_row_count AND (x_min <= 10 AND 10 <= x_max) /// ```` /// /// If the column is known to be all nulls, then the expression /// `x_null_count = x_row_count` will be true, which will cause the -/// case expression to return false. Therefore, prune out the container. -fn wrap_case_expr( +/// boolean expression to return false. Therefore, prune out the container. +fn wrap_null_count_check_expr( statistics_expr: Arc, expr_builder: &mut PruningExpressionBuilder, ) -> Result> { - // x_null_count = x_row_count - let when_null_count_eq_row_count = Arc::new(phys_expr::BinaryExpr::new( + // x_null_count != x_row_count + let not_when_null_count_eq_row_count = Arc::new(phys_expr::BinaryExpr::new( expr_builder.null_count_column_expr()?, - Operator::Eq, + Operator::NotEq, expr_builder.row_count_column_expr()?, )); - let then = Arc::new(phys_expr::Literal::new(ScalarValue::Boolean(Some(false)))); - - // CASE WHEN x_null_count = x_row_count THEN false ELSE END - Ok(Arc::new(phys_expr::CaseExpr::try_new( - None, - vec![(when_null_count_eq_row_count, then)], - Some(statistics_expr), - )?)) + + // (x_null_count != x_row_count) AND () + Ok(Arc::new(phys_expr::BinaryExpr::new( + not_when_null_count_eq_row_count, + Operator::And, + statistics_expr, + ))) } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -2052,6 +2048,110 @@ mod tests { } } + #[test] + fn prune_all_rows_null_counts() { + // if null_count = row_count then we should prune the container for i = 0 + // regardless of the statistics + let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); + let statistics = TestStatistics::new().with( + "i", + ContainerStats::new_i32( + vec![Some(0)], // min + vec![Some(0)], // max + ) + .with_null_counts(vec![Some(1)]) + .with_row_counts(vec![Some(1)]), + ); + let expected_ret = &[false]; + prune_with_expr(col("i").eq(lit(0)), &schema, &statistics, expected_ret); + + // this should be true even if the container stats are missing + let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); + let container_stats = ContainerStats { + min: Some(Arc::new(Int32Array::from(vec![None]))), + max: Some(Arc::new(Int32Array::from(vec![None]))), + null_counts: Some(Arc::new(UInt64Array::from(vec![Some(1)]))), + row_counts: Some(Arc::new(UInt64Array::from(vec![Some(1)]))), + ..ContainerStats::default() + }; + let statistics = TestStatistics::new().with("i", container_stats); + let expected_ret = &[false]; + prune_with_expr(col("i").eq(lit(0)), &schema, &statistics, expected_ret); + + // If the null counts themselves are missing we should be able to fall back to the stats + let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); + let container_stats = ContainerStats { + min: Some(Arc::new(Int32Array::from(vec![Some(0)]))), + max: Some(Arc::new(Int32Array::from(vec![Some(0)]))), + null_counts: Some(Arc::new(UInt64Array::from(vec![None]))), + row_counts: Some(Arc::new(UInt64Array::from(vec![Some(1)]))), + ..ContainerStats::default() + }; + let statistics = TestStatistics::new().with("i", container_stats); + let expected_ret = &[true]; + prune_with_expr(col("i").eq(lit(0)), &schema, &statistics, expected_ret); + let expected_ret = &[false]; + prune_with_expr(col("i").gt(lit(0)), &schema, &statistics, expected_ret); + + // Same for the row counts + let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); + let container_stats = ContainerStats { + min: Some(Arc::new(Int32Array::from(vec![Some(0)]))), + max: Some(Arc::new(Int32Array::from(vec![Some(0)]))), + null_counts: Some(Arc::new(UInt64Array::from(vec![Some(1)]))), + row_counts: Some(Arc::new(UInt64Array::from(vec![None]))), + ..ContainerStats::default() + }; + let statistics = TestStatistics::new().with("i", container_stats); + let expected_ret = &[true]; + prune_with_expr(col("i").eq(lit(0)), &schema, &statistics, expected_ret); + let expected_ret = &[false]; + prune_with_expr(col("i").gt(lit(0)), &schema, &statistics, expected_ret); + } + + #[test] + fn prune_missing_statistics() { + // If the min or max stats are missing we should not prune + // (unless we know all rows are null, see `prune_all_rows_null_counts`) + let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); + let container_stats = ContainerStats { + min: Some(Arc::new(Int32Array::from(vec![None, Some(0)]))), + max: Some(Arc::new(Int32Array::from(vec![Some(0), None]))), + null_counts: Some(Arc::new(UInt64Array::from(vec![Some(0), Some(0)]))), + row_counts: Some(Arc::new(UInt64Array::from(vec![Some(1), Some(1)]))), + ..ContainerStats::default() + }; + let statistics = TestStatistics::new().with("i", container_stats); + let expected_ret = &[true, true]; + prune_with_expr(col("i").eq(lit(0)), &schema, &statistics, expected_ret); + let expected_ret = &[false, true]; + prune_with_expr(col("i").gt(lit(0)), &schema, &statistics, expected_ret); + let expected_ret = &[true, false]; + prune_with_expr(col("i").lt(lit(0)), &schema, &statistics, expected_ret); + } + + #[test] + fn prune_null_stats() { + // if null_count = row_count then we should prune the container for i = 0 + // regardless of the statistics + let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); + + let statistics = TestStatistics::new().with( + "i", + ContainerStats::new_i32( + vec![Some(0)], // min + vec![Some(0)], // max + ) + .with_null_counts(vec![Some(1)]) + .with_row_counts(vec![Some(1)]), + ); + + let expected_ret = &[false]; + + // i = 0 + prune_with_expr(col("i").eq(lit(0)), &schema, &statistics, expected_ret); + } + #[test] fn test_build_statistics_record_batch() { // Request a record batch with of s1_min, s2_max, s3_max, s3_min @@ -2233,7 +2333,8 @@ mod tests { #[test] fn row_group_predicate_eq() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); - let expected_expr = "CASE WHEN c1_null_count@2 = c1_row_count@3 THEN false ELSE c1_min@0 <= 1 AND 1 <= c1_max@1 END"; + let expected_expr = + "c1_null_count@2 != c1_row_count@3 AND c1_min@0 <= 1 AND 1 <= c1_max@1"; // test column on the left let expr = col("c1").eq(lit(1)); @@ -2253,7 +2354,8 @@ mod tests { #[test] fn row_group_predicate_not_eq() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); - let expected_expr = "CASE WHEN c1_null_count@2 = c1_row_count@3 THEN false ELSE c1_min@0 != 1 OR 1 != c1_max@1 END"; + let expected_expr = + "c1_null_count@2 != c1_row_count@3 AND (c1_min@0 != 1 OR 1 != c1_max@1)"; // test column on the left let expr = col("c1").not_eq(lit(1)); @@ -2273,8 +2375,7 @@ mod tests { #[test] fn row_group_predicate_gt() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); - let expected_expr = - "CASE WHEN c1_null_count@1 = c1_row_count@2 THEN false ELSE c1_max@0 > 1 END"; + let expected_expr = "c1_null_count@1 != c1_row_count@2 AND c1_max@0 > 1"; // test column on the left let expr = col("c1").gt(lit(1)); @@ -2294,7 +2395,7 @@ mod tests { #[test] fn row_group_predicate_gt_eq() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); - let expected_expr = "CASE WHEN c1_null_count@1 = c1_row_count@2 THEN false ELSE c1_max@0 >= 1 END"; + let expected_expr = "c1_null_count@1 != c1_row_count@2 AND c1_max@0 >= 1"; // test column on the left let expr = col("c1").gt_eq(lit(1)); @@ -2313,8 +2414,7 @@ mod tests { #[test] fn row_group_predicate_lt() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); - let expected_expr = - "CASE WHEN c1_null_count@1 = c1_row_count@2 THEN false ELSE c1_min@0 < 1 END"; + let expected_expr = "c1_null_count@1 != c1_row_count@2 AND c1_min@0 < 1"; // test column on the left let expr = col("c1").lt(lit(1)); @@ -2334,7 +2434,7 @@ mod tests { #[test] fn row_group_predicate_lt_eq() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); - let expected_expr = "CASE WHEN c1_null_count@1 = c1_row_count@2 THEN false ELSE c1_min@0 <= 1 END"; + let expected_expr = "c1_null_count@1 != c1_row_count@2 AND c1_min@0 <= 1"; // test column on the left let expr = col("c1").lt_eq(lit(1)); @@ -2359,8 +2459,7 @@ mod tests { ]); // test AND operator joining supported c1 < 1 expression and unsupported c2 > c3 expression let expr = col("c1").lt(lit(1)).and(col("c2").lt(col("c3"))); - let expected_expr = - "CASE WHEN c1_null_count@1 = c1_row_count@2 THEN false ELSE c1_min@0 < 1 END"; + let expected_expr = "c1_null_count@1 != c1_row_count@2 AND c1_min@0 < 1"; let predicate_expr = test_build_predicate_expression(&expr, &schema, &mut RequiredColumns::new()); assert_eq!(predicate_expr.to_string(), expected_expr); @@ -2426,7 +2525,7 @@ mod tests { #[test] fn row_group_predicate_lt_bool() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Boolean, false)]); - let expected_expr = "CASE WHEN c1_null_count@1 = c1_row_count@2 THEN false ELSE c1_min@0 < true END"; + let expected_expr = "c1_null_count@1 != c1_row_count@2 AND c1_min@0 < true"; // DF doesn't support arithmetic on boolean columns so // this predicate will error when evaluated @@ -2449,20 +2548,11 @@ mod tests { let expr = col("c1") .lt(lit(1)) .and(col("c2").eq(lit(2)).or(col("c2").eq(lit(3)))); - let expected_expr = "\ - CASE \ - WHEN c1_null_count@1 = c1_row_count@2 THEN false \ - ELSE c1_min@0 < 1 \ - END \ - AND (\ - CASE \ - WHEN c2_null_count@5 = c2_row_count@6 THEN false \ - ELSE c2_min@3 <= 2 AND 2 <= c2_max@4 \ - END \ - OR CASE \ - WHEN c2_null_count@5 = c2_row_count@6 THEN false \ - ELSE c2_min@3 <= 3 AND 3 <= c2_max@4 \ - END\ + let expected_expr = "c1_null_count@1 != c1_row_count@2 \ + AND c1_min@0 < 1 AND (\ + c2_null_count@5 != c2_row_count@6 \ + AND c2_min@3 <= 2 AND 2 <= c2_max@4 OR \ + c2_null_count@5 != c2_row_count@6 AND c2_min@3 <= 3 AND 3 <= c2_max@4\ )"; let predicate_expr = test_build_predicate_expression(&expr, &schema, &mut required_columns); @@ -2554,18 +2644,7 @@ mod tests { vec![lit(1), lit(2), lit(3)], false, )); - let expected_expr = "CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 <= 1 AND 1 <= c1_max@1 \ - END \ - OR CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 <= 2 AND 2 <= c1_max@1 \ - END \ - OR CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 <= 3 AND 3 <= c1_max@1 \ - END"; + let expected_expr = "c1_null_count@2 != c1_row_count@3 AND c1_min@0 <= 1 AND 1 <= c1_max@1 OR c1_null_count@2 != c1_row_count@3 AND c1_min@0 <= 2 AND 2 <= c1_max@1 OR c1_null_count@2 != c1_row_count@3 AND c1_min@0 <= 3 AND 3 <= c1_max@1"; let predicate_expr = test_build_predicate_expression(&expr, &schema, &mut RequiredColumns::new()); assert_eq!(predicate_expr.to_string(), expected_expr); @@ -2601,19 +2680,7 @@ mod tests { vec![lit(1), lit(2), lit(3)], true, )); - let expected_expr = "\ - CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 != 1 OR 1 != c1_max@1 \ - END \ - AND CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 != 2 OR 2 != c1_max@1 \ - END \ - AND CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 != 3 OR 3 != c1_max@1 \ - END"; + let expected_expr = "c1_null_count@2 != c1_row_count@3 AND (c1_min@0 != 1 OR 1 != c1_max@1) AND c1_null_count@2 != c1_row_count@3 AND (c1_min@0 != 2 OR 2 != c1_max@1) AND c1_null_count@2 != c1_row_count@3 AND (c1_min@0 != 3 OR 3 != c1_max@1)"; let predicate_expr = test_build_predicate_expression(&expr, &schema, &mut RequiredColumns::new()); assert_eq!(predicate_expr.to_string(), expected_expr); @@ -2659,24 +2726,7 @@ mod tests { // test c1 in(1, 2) and c2 BETWEEN 4 AND 5 let expr3 = expr1.and(expr2); - let expected_expr = "\ - (\ - CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 <= 1 AND 1 <= c1_max@1 \ - END \ - OR CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE c1_min@0 <= 2 AND 2 <= c1_max@1 \ - END\ - ) AND CASE \ - WHEN c2_null_count@5 = c2_row_count@6 THEN false \ - ELSE c2_max@4 >= 4 \ - END \ - AND CASE \ - WHEN c2_null_count@5 = c2_row_count@6 THEN false \ - ELSE c2_min@7 <= 5 \ - END"; + let expected_expr = "(c1_null_count@2 != c1_row_count@3 AND c1_min@0 <= 1 AND 1 <= c1_max@1 OR c1_null_count@2 != c1_row_count@3 AND c1_min@0 <= 2 AND 2 <= c1_max@1) AND c2_null_count@5 != c2_row_count@6 AND c2_max@4 >= 4 AND c2_null_count@5 != c2_row_count@6 AND c2_min@7 <= 5"; let predicate_expr = test_build_predicate_expression(&expr3, &schema, &mut RequiredColumns::new()); assert_eq!(predicate_expr.to_string(), expected_expr); @@ -2703,10 +2753,7 @@ mod tests { #[test] fn row_group_predicate_cast() -> Result<()> { let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); - let expected_expr = "CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE CAST(c1_min@0 AS Int64) <= 1 AND 1 <= CAST(c1_max@1 AS Int64) \ - END"; + let expected_expr = "c1_null_count@2 != c1_row_count@3 AND CAST(c1_min@0 AS Int64) <= 1 AND 1 <= CAST(c1_max@1 AS Int64)"; // test cast(c1 as int64) = 1 // test column on the left @@ -2721,10 +2768,8 @@ mod tests { test_build_predicate_expression(&expr, &schema, &mut RequiredColumns::new()); assert_eq!(predicate_expr.to_string(), expected_expr); - let expected_expr = "CASE \ - WHEN c1_null_count@1 = c1_row_count@2 THEN false \ - ELSE TRY_CAST(c1_max@0 AS Int64) > 1 \ - END"; + let expected_expr = + "c1_null_count@1 != c1_row_count@2 AND TRY_CAST(c1_max@0 AS Int64) > 1"; // test column on the left let expr = @@ -2756,18 +2801,7 @@ mod tests { ], false, )); - let expected_expr = "CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE CAST(c1_min@0 AS Int64) <= 1 AND 1 <= CAST(c1_max@1 AS Int64) \ - END \ - OR CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE CAST(c1_min@0 AS Int64) <= 2 AND 2 <= CAST(c1_max@1 AS Int64) \ - END \ - OR CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE CAST(c1_min@0 AS Int64) <= 3 AND 3 <= CAST(c1_max@1 AS Int64) \ - END"; + let expected_expr = "c1_null_count@2 != c1_row_count@3 AND CAST(c1_min@0 AS Int64) <= 1 AND 1 <= CAST(c1_max@1 AS Int64) OR c1_null_count@2 != c1_row_count@3 AND CAST(c1_min@0 AS Int64) <= 2 AND 2 <= CAST(c1_max@1 AS Int64) OR c1_null_count@2 != c1_row_count@3 AND CAST(c1_min@0 AS Int64) <= 3 AND 3 <= CAST(c1_max@1 AS Int64)"; let predicate_expr = test_build_predicate_expression(&expr, &schema, &mut RequiredColumns::new()); assert_eq!(predicate_expr.to_string(), expected_expr); @@ -2781,18 +2815,7 @@ mod tests { ], true, )); - let expected_expr = "CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE CAST(c1_min@0 AS Int64) != 1 OR 1 != CAST(c1_max@1 AS Int64) \ - END \ - AND CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE CAST(c1_min@0 AS Int64) != 2 OR 2 != CAST(c1_max@1 AS Int64) \ - END \ - AND CASE \ - WHEN c1_null_count@2 = c1_row_count@3 THEN false \ - ELSE CAST(c1_min@0 AS Int64) != 3 OR 3 != CAST(c1_max@1 AS Int64) \ - END"; + let expected_expr = "c1_null_count@2 != c1_row_count@3 AND (CAST(c1_min@0 AS Int64) != 1 OR 1 != CAST(c1_max@1 AS Int64)) AND c1_null_count@2 != c1_row_count@3 AND (CAST(c1_min@0 AS Int64) != 2 OR 2 != CAST(c1_max@1 AS Int64)) AND c1_null_count@2 != c1_row_count@3 AND (CAST(c1_min@0 AS Int64) != 3 OR 3 != CAST(c1_max@1 AS Int64))"; let predicate_expr = test_build_predicate_expression(&expr, &schema, &mut RequiredColumns::new()); assert_eq!(predicate_expr.to_string(), expected_expr); diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index 24ffb963bbe2..806886b07170 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -85,7 +85,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST] 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], predicate=b@1 > 2, pruning_predicate=CASE WHEN b_null_count@1 = b_row_count@2 THEN false ELSE b_max@0 > 2 END, required_guarantees=[] +03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], predicate=b@1 > 2, pruning_predicate=b_null_count@1 != b_row_count@2 AND b_max@0 > 2, required_guarantees=[] # When filter pushdown *is* enabled, ParquetExec can filter exactly, @@ -113,7 +113,7 @@ physical_plan 03)----CoalesceBatchesExec: target_batch_size=8192 04)------FilterExec: b@1 > 2, projection=[a@0] 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 -06)----------ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], predicate=b@1 > 2, pruning_predicate=CASE WHEN b_null_count@1 = b_row_count@2 THEN false ELSE b_max@0 > 2 END, required_guarantees=[] +06)----------ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a, b], predicate=b@1 > 2, pruning_predicate=b_null_count@1 != b_row_count@2 AND b_max@0 > 2, required_guarantees=[] # also test querying on columns that are not in all the files query T @@ -131,7 +131,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST] 02)--SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], predicate=b@1 > 2 AND a@0 IS NOT NULL, pruning_predicate=CASE WHEN b_null_count@1 = b_row_count@2 THEN false ELSE b_max@0 > 2 END AND a_null_count@4 != a_row_count@3, required_guarantees=[] +03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[a], predicate=b@1 > 2 AND a@0 IS NOT NULL, pruning_predicate=b_null_count@1 != b_row_count@2 AND b_max@0 > 2 AND a_null_count@4 != a_row_count@3, required_guarantees=[] query I @@ -148,7 +148,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [b@0 ASC NULLS LAST] 02)--SortExec: expr=[b@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[b], predicate=a@0 = bar, pruning_predicate=CASE WHEN a_null_count@2 = a_row_count@3 THEN false ELSE a_min@0 <= bar AND bar <= a_max@1 END, required_guarantees=[a in (bar)] +03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet]]}, projection=[b], predicate=a@0 = bar, pruning_predicate=a_null_count@2 != a_row_count@3 AND a_min@0 <= bar AND bar <= a_max@1, required_guarantees=[a in (bar)] ## cleanup statement ok diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt index 858e42106221..a1db84b87850 100644 --- a/datafusion/sqllogictest/test_files/repartition_scan.slt +++ b/datafusion/sqllogictest/test_files/repartition_scan.slt @@ -61,7 +61,7 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column1@0 != 42 -03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..88], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:88..176], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:176..264], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:264..351]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..88], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:88..176], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:176..264], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:264..351]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != column1_row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] # disable round robin repartitioning statement ok @@ -77,7 +77,7 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column1@0 != 42 -03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..88], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:88..176], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:176..264], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:264..351]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..88], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:88..176], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:176..264], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:264..351]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != column1_row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] # enable round robin repartitioning again statement ok @@ -102,7 +102,7 @@ physical_plan 02)--SortExec: expr=[column1@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----CoalesceBatchesExec: target_batch_size=8192 04)------FilterExec: column1@0 != 42 -05)--------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..174], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:174..342, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..6], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:6..180], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:180..351]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +05)--------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..174], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:174..342, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..6], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:6..180], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:180..351]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != column1_row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] ## Read the files as though they are ordered @@ -138,7 +138,7 @@ physical_plan 01)SortPreservingMergeExec: [column1@0 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: column1@0 != 42 -04)------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..171], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..175], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:175..351], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:171..342]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +04)------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..171], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..175], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:175..351], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:171..342]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], predicate=column1@0 != 42, pruning_predicate=column1_null_count@2 != column1_row_count@3 AND (column1_min@0 != 42 OR 42 != column1_max@1), required_guarantees=[column1 not in (42)] # Cleanup statement ok diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 188e2ae0915f..56f088dfd10f 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -5054,7 +5054,7 @@ select b, row_number() over (order by a) from (select TRUE as a, 1 as b); 1 1 # test window functions on boolean columns -query T +statement count 0 create table t1 (id int, bool_col boolean) as values (1, true), (2, false), From d7aeb1a2571708582422366896a6e9f749816afa Mon Sep 17 00:00:00 2001 From: Arttu Date: Fri, 20 Dec 2024 15:03:41 +0100 Subject: [PATCH 5/9] enable DF's nested_expressions feature by in datafusion-substrait tests to make them pass (#13857) fixes #13854 Co-authored-by: Arttu Voutilainen --- datafusion/substrait/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 09c6e0351ed3..5e056d040e4c 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -46,6 +46,7 @@ substrait = { version = "0.50", features = ["serde"] } url = { workspace = true } [dev-dependencies] +datafusion = { workspace = true, features = ["nested_expressions"] } datafusion-functions-aggregate = { workspace = true } serde_json = "1.0" tokio = { workspace = true } From 31acf452d92c790234de385fc76b7347e8b9e7c6 Mon Sep 17 00:00:00 2001 From: Dmitrii Blaginin Date: Fri, 20 Dec 2024 17:28:08 +0300 Subject: [PATCH 6/9] Add configurable normalization for configuration options and preserve case for S3 paths (#13576) * Do not normalize values * Fix tests & update docs * Prettier * Lowercase config params * Unify transform and parse * Fix tests * Rename `default_transform` and relax boundaries * Make `compression` case-insensitive * Comment to new line * Deprecate and ignore `enable_options_value_normalization` * Update datafusion/common/src/config.rs * fix typo --------- Co-authored-by: Oleks V --- datafusion-cli/Cargo.lock | 1 + datafusion-cli/src/object_storage.rs | 9 ++- datafusion/common/Cargo.toml | 1 + datafusion/common/src/config.rs | 80 +++++++++++++------ datafusion/core/src/datasource/stream.rs | 2 +- datafusion/core/tests/config_from_env.rs | 17 +++- datafusion/sql/src/planner.rs | 35 +------- datafusion/sql/src/statement.rs | 3 +- datafusion/sql/tests/sql_integration.rs | 69 +--------------- .../test_files/create_external_table.slt | 14 ++++ .../test_files/information_schema.slt | 4 +- .../sqllogictest/test_files/set_variable.slt | 8 +- docs/source/user-guide/configs.md | 2 +- 13 files changed, 104 insertions(+), 141 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index d33cbf396470..2ffc64114ef7 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1331,6 +1331,7 @@ dependencies = [ "hashbrown 0.14.5", "indexmap", "libc", + "log", "object_store", "parquet", "paste", diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index de66b60fe449..045c924e5037 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -472,12 +472,13 @@ mod tests { #[tokio::test] async fn s3_object_store_builder() -> Result<()> { - let access_key_id = "fake_access_key_id"; - let secret_access_key = "fake_secret_access_key"; + // "fake" is uppercase to ensure the values are not lowercased when parsed + let access_key_id = "FAKE_access_key_id"; + let secret_access_key = "FAKE_secret_access_key"; let region = "fake_us-east-2"; let endpoint = "endpoint33"; - let session_token = "fake_session_token"; - let location = "s3://bucket/path/file.parquet"; + let session_token = "FAKE_session_token"; + let location = "s3://bucket/path/FAKE/file.parquet"; let table_url = ListingTableUrl::parse(location)?; let scheme = table_url.scheme(); diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 82909404e455..a81ec724dd66 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -57,6 +57,7 @@ half = { workspace = true } hashbrown = { workspace = true } indexmap = { workspace = true } libc = "0.2.140" +log = { workspace = true } object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } paste = "1.0.15" diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 4948833b1f5f..6e64700bd2e0 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -19,6 +19,7 @@ use std::any::Any; use std::collections::{BTreeMap, HashMap}; +use std::error::Error; use std::fmt::{self, Display}; use std::str::FromStr; @@ -29,7 +30,9 @@ use crate::{DataFusionError, Result}; /// A macro that wraps a configuration struct and automatically derives /// [`Default`] and [`ConfigField`] for it, allowing it to be used -/// in the [`ConfigOptions`] configuration tree +/// in the [`ConfigOptions`] configuration tree. +/// +/// `transform` is used to normalize values before parsing. /// /// For example, /// @@ -38,7 +41,7 @@ use crate::{DataFusionError, Result}; /// /// Amazing config /// pub struct MyConfig { /// /// Field 1 doc -/// field1: String, default = "".to_string() +/// field1: String, transform = str::to_lowercase, default = "".to_string() /// /// /// Field 2 doc /// field2: usize, default = 232 @@ -67,9 +70,12 @@ use crate::{DataFusionError, Result}; /// fn set(&mut self, key: &str, value: &str) -> Result<()> { /// let (key, rem) = key.split_once('.').unwrap_or((key, "")); /// match key { -/// "field1" => self.field1.set(rem, value), -/// "field2" => self.field2.set(rem, value), -/// "field3" => self.field3.set(rem, value), +/// "field1" => { +/// let value = str::to_lowercase(value); +/// self.field1.set(rem, value.as_ref()) +/// }, +/// "field2" => self.field2.set(rem, value.as_ref()), +/// "field3" => self.field3.set(rem, value.as_ref()), /// _ => _internal_err!( /// "Config value \"{}\" not found on MyConfig", /// key @@ -102,7 +108,6 @@ use crate::{DataFusionError, Result}; /// ``` /// /// NB: Misplaced commas may result in nonsensical errors -/// #[macro_export] macro_rules! config_namespace { ( @@ -110,7 +115,7 @@ macro_rules! config_namespace { $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr + $field_vis:vis $field_name:ident : $field_type:ty, $(warn = $warn: expr,)? $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { @@ -127,9 +132,14 @@ macro_rules! config_namespace { impl ConfigField for $struct_name { fn set(&mut self, key: &str, value: &str) -> Result<()> { let (key, rem) = key.split_once('.').unwrap_or((key, "")); + match key { $( - stringify!($field_name) => self.$field_name.set(rem, value), + stringify!($field_name) => { + $(let value = $transform(value);)? + $(log::warn!($warn);)? + self.$field_name.set(rem, value.as_ref()) + }, )* _ => return _config_err!( "Config value \"{}\" not found on {}", key, stringify!($struct_name) @@ -211,12 +221,15 @@ config_namespace! { /// When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) pub enable_ident_normalization: bool, default = true - /// When set to true, SQL parser will normalize options value (convert value to lowercase) - pub enable_options_value_normalization: bool, default = true + /// When set to true, SQL parser will normalize options value (convert value to lowercase). + /// Note that this option is ignored and will be removed in the future. All case-insensitive values + /// are normalized automatically. + pub enable_options_value_normalization: bool, warn = "`enable_options_value_normalization` is deprecated and ignored", default = false /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. pub dialect: String, default = "generic".to_string() + // no need to lowercase because `sqlparser::dialect_from_str`] is case-insensitive /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but /// ignore the length. If false, error if a `VARCHAR` with a length is @@ -431,7 +444,7 @@ config_namespace! { /// /// Note that this default setting is not the same as /// the default parquet writer setting. - pub compression: Option, default = Some("zstd(3)".into()) + pub compression: Option, transform = str::to_lowercase, default = Some("zstd(3)".into()) /// (writing) Sets if dictionary encoding is enabled. If NULL, uses /// default parquet writer setting @@ -444,7 +457,7 @@ config_namespace! { /// Valid values are: "none", "chunk", and "page" /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub statistics_enabled: Option, default = Some("page".into()) + pub statistics_enabled: Option, transform = str::to_lowercase, default = Some("page".into()) /// (writing) Sets max statistics size for any column. If NULL, uses /// default parquet writer setting @@ -470,7 +483,7 @@ config_namespace! { /// delta_byte_array, rle_dictionary, and byte_stream_split. /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub encoding: Option, default = None + pub encoding: Option, transform = str::to_lowercase, default = None /// (writing) Use any available bloom filters when reading parquet files pub bloom_filter_on_read: bool, default = true @@ -971,21 +984,37 @@ impl ConfigField for Option { } } +fn default_transform(input: &str) -> Result +where + T: FromStr, + ::Err: Sync + Send + Error + 'static, +{ + input.parse().map_err(|e| { + DataFusionError::Context( + format!( + "Error parsing '{}' as {}", + input, + std::any::type_name::() + ), + Box::new(DataFusionError::External(Box::new(e))), + ) + }) +} + #[macro_export] macro_rules! config_field { ($t:ty) => { + config_field!($t, value => default_transform(value)?); + }; + + ($t:ty, $arg:ident => $transform:expr) => { impl ConfigField for $t { fn visit(&self, v: &mut V, key: &str, description: &'static str) { v.some(key, self, description) } - fn set(&mut self, _: &str, value: &str) -> Result<()> { - *self = value.parse().map_err(|e| { - DataFusionError::Context( - format!(concat!("Error parsing {} as ", stringify!($t),), value), - Box::new(DataFusionError::External(Box::new(e))), - ) - })?; + fn set(&mut self, _: &str, $arg: &str) -> Result<()> { + *self = $transform; Ok(()) } } @@ -993,7 +1022,7 @@ macro_rules! config_field { } config_field!(String); -config_field!(bool); +config_field!(bool, value => default_transform(value.to_lowercase().as_str())?); config_field!(usize); config_field!(f64); config_field!(u64); @@ -1508,7 +1537,7 @@ macro_rules! config_namespace_with_hashmap { $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr + $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { @@ -1527,7 +1556,10 @@ macro_rules! config_namespace_with_hashmap { let (key, rem) = key.split_once('.').unwrap_or((key, "")); match key { $( - stringify!($field_name) => self.$field_name.set(rem, value), + stringify!($field_name) => { + $(let value = $transform(value);)? + self.$field_name.set(rem, value.as_ref()) + }, )* _ => _config_err!( "Config value \"{}\" not found on {}", key, stringify!($struct_name) @@ -1606,7 +1638,7 @@ config_namespace_with_hashmap! { /// lzo, brotli(level), lz4, zstd(level), and lz4_raw. /// These values are not case-sensitive. If NULL, uses /// default parquet options - pub compression: Option, default = None + pub compression: Option, transform = str::to_lowercase, default = None /// Sets if statistics are enabled for the column /// Valid values are: "none", "chunk", and "page" diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs index d8fad5b6cd37..2cea37fe17e2 100644 --- a/datafusion/core/src/datasource/stream.rs +++ b/datafusion/core/src/datasource/stream.rs @@ -62,7 +62,7 @@ impl TableProviderFactory for StreamTableFactory { let header = if let Ok(opt) = cmd .options .get("format.has_header") - .map(|has_header| bool::from_str(has_header)) + .map(|has_header| bool::from_str(has_header.to_lowercase().as_str())) .transpose() { opt.unwrap_or(false) diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs index a5a5a4524e60..976597c8a9ac 100644 --- a/datafusion/core/tests/config_from_env.rs +++ b/datafusion/core/tests/config_from_env.rs @@ -22,10 +22,19 @@ use std::env; fn from_env() { // Note: these must be a single test to avoid interference from concurrent execution let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS"; - env::set_var(env_key, "true"); - let config = ConfigOptions::from_env().unwrap(); + // valid testing in different cases + for bool_option in ["true", "TRUE", "True", "tRUe"] { + env::set_var(env_key, bool_option); + let config = ConfigOptions::from_env().unwrap(); + env::remove_var(env_key); + assert!(config.optimizer.filter_null_join_keys); + } + + // invalid testing + env::set_var(env_key, "ttruee"); + let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); + assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`"); env::remove_var(env_key); - assert!(config.optimizer.filter_null_join_keys); let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE"; @@ -37,7 +46,7 @@ fn from_env() { // for invalid testing env::set_var(env_key, "abc"); let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); - assert_eq!(err, "Error parsing abc as usize\ncaused by\nExternal error: invalid digit found in string"); + assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string"); env::remove_var(env_key); let config = ConfigOptions::from_env().unwrap(); diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 59fa4ca5f1f6..2d0ba8f8d994 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -24,10 +24,10 @@ use arrow_schema::*; use datafusion_common::{ field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, SchemaError, }; +use sqlparser::ast::TimezoneInfo; use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo}; use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; -use sqlparser::ast::{TimezoneInfo, Value}; use datafusion_common::TableReference; use datafusion_common::{ @@ -38,7 +38,7 @@ use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder}; use datafusion_expr::utils::find_column_exprs; use datafusion_expr::{col, Expr}; -use crate::utils::{make_decimal_type, value_to_string}; +use crate::utils::make_decimal_type; pub use datafusion_expr::planner::ContextProvider; /// SQL parser options @@ -56,7 +56,7 @@ impl Default for ParserOptions { parse_float_as_decimal: false, enable_ident_normalization: true, support_varchar_with_length: true, - enable_options_value_normalization: true, + enable_options_value_normalization: false, } } } @@ -87,32 +87,6 @@ impl IdentNormalizer { } } -/// Value Normalizer -#[derive(Debug)] -pub struct ValueNormalizer { - normalize: bool, -} - -impl Default for ValueNormalizer { - fn default() -> Self { - Self { normalize: true } - } -} - -impl ValueNormalizer { - pub fn new(normalize: bool) -> Self { - Self { normalize } - } - - pub fn normalize(&self, value: Value) -> Option { - match (value_to_string(&value), self.normalize) { - (Some(s), true) => Some(s.to_ascii_lowercase()), - (Some(s), false) => Some(s), - (None, _) => None, - } - } -} - /// Struct to store the states used by the Planner. The Planner will leverage the states to resolve /// CTEs, Views, subqueries and PREPARE statements. The states include /// Common Table Expression (CTE) provided with WITH clause and @@ -254,7 +228,6 @@ pub struct SqlToRel<'a, S: ContextProvider> { pub(crate) context_provider: &'a S, pub(crate) options: ParserOptions, pub(crate) ident_normalizer: IdentNormalizer, - pub(crate) value_normalizer: ValueNormalizer, } impl<'a, S: ContextProvider> SqlToRel<'a, S> { @@ -266,13 +239,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Create a new query planner pub fn new_with_options(context_provider: &'a S, options: ParserOptions) -> Self { let ident_normalize = options.enable_ident_normalization; - let options_value_normalize = options.enable_options_value_normalization; SqlToRel { context_provider, options, ident_normalizer: IdentNormalizer::new(ident_normalize), - value_normalizer: ValueNormalizer::new(options_value_normalize), } } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 38695f98b5fe..f750afbc4a53 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1386,8 +1386,7 @@ impl SqlToRel<'_, S> { return plan_err!("Option {key} is specified multiple times"); } - let Some(value_string) = self.value_normalizer.normalize(value.clone()) - else { + let Some(value_string) = crate::utils::value_to_string(&value) else { return plan_err!("Unsupported Value {}", value); }; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 9363d16c9fc9..786f72741282 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -29,11 +29,10 @@ use datafusion_common::{ }; use datafusion_expr::{ col, - dml::CopyTo, logical_plan::{LogicalPlan, Prepare}, test::function_stub::sum_udaf, - ColumnarValue, CreateExternalTable, CreateIndex, DdlStatement, ScalarUDF, - ScalarUDFImpl, Signature, Statement, Volatility, + ColumnarValue, CreateIndex, DdlStatement, ScalarUDF, ScalarUDFImpl, Signature, + Statement, Volatility, }; use datafusion_functions::{string, unicode}; use datafusion_sql::{ @@ -161,70 +160,6 @@ fn parse_ident_normalization() { } } -#[test] -fn test_parse_options_value_normalization() { - let test_data = [ - ( - "CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'", - "CreateExternalTable: Bare { table: \"test\" }", - HashMap::from([("format.location", "LoCaTiOn")]), - false, - ), - ( - "CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'", - "CreateExternalTable: Bare { table: \"test\" }", - HashMap::from([("format.location", "location")]), - true, - ), - ( - "COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')", - "CopyTo: format=csv output_url=fake_location options: (format.location LoCaTiOn)\n TableScan: test", - HashMap::from([("format.location", "LoCaTiOn")]), - false, - ), - ( - "COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')", - "CopyTo: format=csv output_url=fake_location options: (format.location location)\n TableScan: test", - HashMap::from([("format.location", "location")]), - true, - ), - ]; - - for (sql, expected_plan, expected_options, enable_options_value_normalization) in - test_data - { - let plan = logical_plan_with_options( - sql, - ParserOptions { - parse_float_as_decimal: false, - enable_ident_normalization: false, - support_varchar_with_length: false, - enable_options_value_normalization, - }, - ); - if let Ok(plan) = plan { - assert_eq!(expected_plan, format!("{plan}")); - - match plan { - LogicalPlan::Ddl(DdlStatement::CreateExternalTable( - CreateExternalTable { options, .. }, - )) - | LogicalPlan::Copy(CopyTo { options, .. }) => { - expected_options.iter().for_each(|(k, v)| { - assert_eq!(Some(&v.to_string()), options.get(*k)); - }); - } - _ => panic!( - "Expected Ddl(CreateExternalTable) or Copy(CopyTo) but got {:?}", - plan - ), - } - } else { - assert_eq!(expected_plan, plan.unwrap_err().strip_backtrace()); - } - } -} - #[test] fn select_no_relation() { quick_test( diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt index ed001cf9f84c..6a63ea1cd3e4 100644 --- a/datafusion/sqllogictest/test_files/create_external_table.slt +++ b/datafusion/sqllogictest/test_files/create_external_table.slt @@ -226,6 +226,20 @@ OPTIONS ( has_header false, compression gzip); +# Verify that some options are case insensitive +statement ok +CREATE EXTERNAL TABLE IF NOT EXISTS region ( + r_regionkey BIGINT, + r_name VARCHAR, + r_comment VARCHAR, + r_rev VARCHAR, +) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' +OPTIONS ( + format.delimiter '|', + has_header FALSE, + compression GZIP); + + # Create an external parquet table and infer schema to order by # query should succeed diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 4d51a61c8a52..1f6b5f9852ec 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -258,7 +258,7 @@ datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true -datafusion.sql_parser.enable_options_value_normalization true +datafusion.sql_parser.enable_options_value_normalization false datafusion.sql_parser.parse_float_as_decimal false datafusion.sql_parser.support_varchar_with_length true @@ -351,7 +351,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) -datafusion.sql_parser.enable_options_value_normalization true When set to true, SQL parser will normalize options value (convert value to lowercase) +datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 6f19c9f4d42f..bb4ac920d032 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -93,10 +93,10 @@ datafusion.execution.coalesce_batches false statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing 1 as bool +statement error DataFusion error: Error parsing '1' as bool SET datafusion.execution.coalesce_batches to 1 -statement error DataFusion error: Error parsing abc as bool +statement error DataFusion error: Error parsing 'abc' as bool SET datafusion.execution.coalesce_batches to abc # set u64 variable @@ -132,10 +132,10 @@ datafusion.execution.batch_size 2 statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing -1 as usize +statement error DataFusion error: Error parsing '-1' as usize SET datafusion.execution.batch_size to -1 -statement error DataFusion error: Error parsing abc as usize +statement error DataFusion error: Error parsing 'abc' as usize SET datafusion.execution.batch_size to abc statement error External error: invalid digit found in string diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 6a49fda668a9..77433c85cb66 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -122,6 +122,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information | | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | -| datafusion.sql_parser.enable_options_value_normalization | true | When set to true, SQL parser will normalize options value (convert value to lowercase) | +| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | From 87b77bba4fb47980245c19b8dd289a3ca83cf7e5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Dec 2024 09:32:48 -0500 Subject: [PATCH 7/9] Improve`Signature` and `comparison_coercion` documentation (#13840) * Improve Signature documentation more * Apply suggestions from code review Co-authored-by: Piotr Findeisen --------- Co-authored-by: Piotr Findeisen --- datafusion/expr-common/src/signature.rs | 33 ++++++++++++------- .../expr-common/src/type_coercion/binary.rs | 22 ++++++++++++- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index b5d25b4338c7..77ba1858e35b 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -103,9 +103,13 @@ pub enum TypeSignature { /// A function such as `concat` is `Variadic(vec![DataType::Utf8, /// DataType::LargeUtf8])` Variadic(Vec), - /// The acceptable signature and coercions rules to coerce arguments to this - /// signature are special for this function. If this signature is specified, - /// DataFusion will call `ScalarUDFImpl::coerce_types` to prepare argument types. + /// The acceptable signature and coercions rules are special for this + /// function. + /// + /// If this signature is specified, + /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types. + /// + /// [`ScalarUDFImpl::coerce_types`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html#method.coerce_types UserDefined, /// One or more arguments with arbitrary types VariadicAny, @@ -123,24 +127,29 @@ pub enum TypeSignature { /// One or more arguments belonging to the [`TypeSignatureClass`], in order. /// /// For example, `Coercible(vec![logical_float64()])` accepts - /// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]` + /// arguments like `vec![Int32]` or `vec![Float32]` /// since i32 and f32 can be cast to f64 /// /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`]. Coercible(Vec), - /// One or more arguments that can be "compared" + /// One or more arguments coercible to a single, comparable type. + /// + /// Each argument will be coerced to a single type using the + /// coercion rules described in [`comparison_coercion_numeric`]. + /// + /// # Examples + /// + /// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments + /// the types will both be coerced to `i64` before the function is invoked. /// - /// Each argument will be coerced to a single type based on comparison rules. - /// For example a function called with `i32` and `i64` has coerced type `Int64` so - /// each argument will be coerced to `Int64` before the function is invoked. + /// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments + /// the types will both be coerced to `Utf8` before the function is invoked. /// /// Note: - /// - If compares with numeric and string, numeric is preferred for numeric string cases. For example, `nullif('2', 1)` has coerced types `Int64`. - /// - If the result is Null, it will be coerced to String (Utf8View). - /// - See [`comparison_coercion`] for more details. /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8` /// - /// [`comparison_coercion`]: crate::type_coercion::binary::comparison_coercion + /// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric Comparable(usize), /// One or more arguments of arbitrary types. /// diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 49c1ccff3814..c775d3131692 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -625,6 +625,19 @@ pub fn try_type_union_resolution_with_struct( /// data type. However, users can write queries where the two arguments are /// different data types. In such cases, the data types are automatically cast /// (coerced) to a single data type to pass to the kernels. +/// +/// # Numeric comparisons +/// +/// When comparing numeric values, the lower precision type is coerced to the +/// higher precision type to avoid losing data. For example when comparing +/// `Int32` to `Int64` the coerced type is `Int64` so the `Int32` argument will +/// be cast. +/// +/// # Numeric / String comparisons +/// +/// When comparing numeric values and strings, both values will be coerced to +/// strings. For example when comparing `'2' > 1`, the arguments will be +/// coerced to `Utf8` for comparison pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { if lhs_type == rhs_type { // same type => equality is possible @@ -642,7 +655,14 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option 1` if `1` is an `Int32`, the arguments +/// will be coerced to `Int32`. pub fn comparison_coercion_numeric( lhs_type: &DataType, rhs_type: &DataType, From 74480ac58ee5658a275b1e8b0ebd9764d0e48844 Mon Sep 17 00:00:00 2001 From: zhuliquan Date: Fri, 20 Dec 2024 22:35:53 +0800 Subject: [PATCH 8/9] feat: support normalized expr in CSE (#13315) * feat: support normalized expr in CSE * feat: support normalize_eq in cse optimization * feat: support cumulative binary expr result in normalize_eq --------- Co-authored-by: Andrew Lamb --- datafusion/common/src/cse.rs | 150 +++++-- datafusion/expr/src/expr.rs | 389 +++++++++++++++++- datafusion/expr/src/logical_plan/plan.rs | 20 + .../optimizer/src/common_subexpr_eliminate.rs | 263 +++++++++++- 4 files changed, 790 insertions(+), 32 deletions(-) diff --git a/datafusion/common/src/cse.rs b/datafusion/common/src/cse.rs index ab02915858cd..f64571b8471e 100644 --- a/datafusion/common/src/cse.rs +++ b/datafusion/common/src/cse.rs @@ -50,12 +50,33 @@ impl HashNode for Arc { } } +/// The `Normalizeable` trait defines a method to determine whether a node can be normalized. +/// +/// Normalization is the process of converting a node into a canonical form that can be used +/// to compare nodes for equality. This is useful in optimizations like Common Subexpression Elimination (CSE), +/// where semantically equivalent nodes (e.g., `a + b` and `b + a`) should be treated as equal. +pub trait Normalizeable { + fn can_normalize(&self) -> bool; +} + +/// The `NormalizeEq` trait extends `Eq` and `Normalizeable` to provide a method for comparing +/// normlized nodes in optimizations like Common Subexpression Elimination (CSE). +/// +/// The `normalize_eq` method ensures that two nodes that are semantically equivalent (after normalization) +/// are considered equal in CSE optimization, even if their original forms differ. +/// +/// This trait allows for equality comparisons between nodes with equivalent semantics, regardless of their +/// internal representations. +pub trait NormalizeEq: Eq + Normalizeable { + fn normalize_eq(&self, other: &Self) -> bool; +} + /// Identifier that represents a [`TreeNode`] tree. /// /// This identifier is designed to be efficient and "hash", "accumulate", "equal" and /// "have no collision (as low as possible)" -#[derive(Debug, Eq, PartialEq)] -struct Identifier<'n, N> { +#[derive(Debug, Eq)] +struct Identifier<'n, N: NormalizeEq> { // Hash of `node` built up incrementally during the first, visiting traversal. // Its value is not necessarily equal to default hash of the node. E.g. it is not // equal to `expr.hash()` if the node is `Expr`. @@ -63,20 +84,29 @@ struct Identifier<'n, N> { node: &'n N, } -impl Clone for Identifier<'_, N> { +impl Clone for Identifier<'_, N> { fn clone(&self) -> Self { *self } } -impl Copy for Identifier<'_, N> {} +impl Copy for Identifier<'_, N> {} -impl Hash for Identifier<'_, N> { +impl Hash for Identifier<'_, N> { fn hash(&self, state: &mut H) { state.write_u64(self.hash); } } -impl<'n, N: HashNode> Identifier<'n, N> { +impl PartialEq for Identifier<'_, N> { + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash && self.node.normalize_eq(other.node) + } +} + +impl<'n, N> Identifier<'n, N> +where + N: HashNode + NormalizeEq, +{ fn new(node: &'n N, random_state: &RandomState) -> Self { let mut hasher = random_state.build_hasher(); node.hash_node(&mut hasher); @@ -213,7 +243,11 @@ pub enum FoundCommonNodes { /// /// A [`TreeNode`] without any children (column, literal etc.) will not have identifier /// because they should not be recognized as common subtree. -struct CSEVisitor<'a, 'n, N, C: CSEController> { +struct CSEVisitor<'a, 'n, N, C> +where + N: NormalizeEq, + C: CSEController, +{ /// statistics of [`TreeNode`]s node_stats: &'a mut NodeStats<'n, N>, @@ -244,7 +278,10 @@ struct CSEVisitor<'a, 'n, N, C: CSEController> { } /// Record item that used when traversing a [`TreeNode`] tree. -enum VisitRecord<'n, N> { +enum VisitRecord<'n, N> +where + N: NormalizeEq, +{ /// Marks the beginning of [`TreeNode`]. It contains: /// - The post-order index assigned during the first, visiting traversal. EnterMark(usize), @@ -258,7 +295,11 @@ enum VisitRecord<'n, N> { NodeItem(Identifier<'n, N>, bool), } -impl<'n, N: TreeNode + HashNode, C: CSEController> CSEVisitor<'_, 'n, N, C> { +impl<'n, N, C> CSEVisitor<'_, 'n, N, C> +where + N: TreeNode + HashNode + NormalizeEq, + C: CSEController, +{ /// Find the first `EnterMark` in the stack, and accumulates every `NodeItem` before /// it. Returns a tuple that contains: /// - The pre-order index of the [`TreeNode`] we marked. @@ -271,17 +312,26 @@ impl<'n, N: TreeNode + HashNode, C: CSEController> CSEVisitor<'_, 'n, /// information up from children to parents via `visit_stack` during the first, /// visiting traversal and no need to test the expression's validity beforehand with /// an extra traversal). - fn pop_enter_mark(&mut self) -> (usize, Option>, bool) { - let mut node_id = None; + fn pop_enter_mark( + &mut self, + can_normalize: bool, + ) -> (usize, Option>, bool) { + let mut node_ids: Vec> = vec![]; let mut is_valid = true; while let Some(item) = self.visit_stack.pop() { match item { VisitRecord::EnterMark(down_index) => { + if can_normalize { + node_ids.sort_by_key(|i| i.hash); + } + let node_id = node_ids + .into_iter() + .fold(None, |accum, item| Some(item.combine(accum))); return (down_index, node_id, is_valid); } VisitRecord::NodeItem(sub_node_id, sub_node_is_valid) => { - node_id = Some(sub_node_id.combine(node_id)); + node_ids.push(sub_node_id); is_valid &= sub_node_is_valid; } } @@ -290,8 +340,10 @@ impl<'n, N: TreeNode + HashNode, C: CSEController> CSEVisitor<'_, 'n, } } -impl<'n, N: TreeNode + HashNode + Eq, C: CSEController> TreeNodeVisitor<'n> - for CSEVisitor<'_, 'n, N, C> +impl<'n, N, C> TreeNodeVisitor<'n> for CSEVisitor<'_, 'n, N, C> +where + N: TreeNode + HashNode + NormalizeEq, + C: CSEController, { type Node = N; @@ -331,7 +383,8 @@ impl<'n, N: TreeNode + HashNode + Eq, C: CSEController> TreeNodeVisito } fn f_up(&mut self, node: &'n Self::Node) -> Result { - let (down_index, sub_node_id, sub_node_is_valid) = self.pop_enter_mark(); + let (down_index, sub_node_id, sub_node_is_valid) = + self.pop_enter_mark(node.can_normalize()); let node_id = Identifier::new(node, self.random_state).combine(sub_node_id); let is_valid = C::is_valid(node) && sub_node_is_valid; @@ -369,7 +422,11 @@ impl<'n, N: TreeNode + HashNode + Eq, C: CSEController> TreeNodeVisito /// Rewrite a [`TreeNode`] tree by replacing detected common subtrees with the /// corresponding temporary [`TreeNode`], that column contains the evaluate result of /// replaced [`TreeNode`] tree. -struct CSERewriter<'a, 'n, N, C: CSEController> { +struct CSERewriter<'a, 'n, N, C> +where + N: NormalizeEq, + C: CSEController, +{ /// statistics of [`TreeNode`]s node_stats: &'a NodeStats<'n, N>, @@ -386,8 +443,10 @@ struct CSERewriter<'a, 'n, N, C: CSEController> { controller: &'a mut C, } -impl> TreeNodeRewriter - for CSERewriter<'_, '_, N, C> +impl TreeNodeRewriter for CSERewriter<'_, '_, N, C> +where + N: TreeNode + NormalizeEq, + C: CSEController, { type Node = N; @@ -408,13 +467,30 @@ impl> TreeNodeRewriter self.down_index += 1; } - let (node, alias) = - self.common_nodes.entry(node_id).or_insert_with(|| { - let node_alias = self.controller.generate_alias(); - (node, node_alias) - }); - - let rewritten = self.controller.rewrite(node, alias); + // We *must* replace all original nodes with same `node_id`, not just the first + // node which is inserted into the common_nodes. This is because nodes with the same + // `node_id` are semantically equivalent, but not exactly the same. + // + // For example, `a + 1` and `1 + a` are semantically equivalent but not identical. + // In this case, we should replace the common expression `1 + a` with a new variable + // (e.g., `__common_cse_1`). So, `a + 1` and `1 + a` would both be replaced by + // `__common_cse_1`. + // + // The final result would be: + // - `__common_cse_1 as a + 1` + // - `__common_cse_1 as 1 + a` + // + // This way, we can efficiently handle semantically equivalent expressions without + // incorrectly treating them as identical. + let rewritten = if let Some((_, alias)) = self.common_nodes.get(&node_id) + { + self.controller.rewrite(&node, alias) + } else { + let node_alias = self.controller.generate_alias(); + let rewritten = self.controller.rewrite(&node, &node_alias); + self.common_nodes.insert(node_id, (node, node_alias)); + rewritten + }; return Ok(Transformed::new(rewritten, true, TreeNodeRecursion::Jump)); } @@ -441,7 +517,11 @@ pub struct CSE> { controller: C, } -impl> CSE { +impl CSE +where + N: TreeNode + HashNode + Clone + NormalizeEq, + C: CSEController, +{ pub fn new(controller: C) -> Self { Self { random_state: RandomState::new(), @@ -557,6 +637,7 @@ impl> CSE ) -> Result> { let mut found_common = false; let mut node_stats = NodeStats::new(); + let id_arrays_list = nodes_list .iter() .map(|nodes| { @@ -596,7 +677,10 @@ impl> CSE #[cfg(test)] mod test { use crate::alias::AliasGenerator; - use crate::cse::{CSEController, HashNode, IdArray, Identifier, NodeStats, CSE}; + use crate::cse::{ + CSEController, HashNode, IdArray, Identifier, NodeStats, NormalizeEq, + Normalizeable, CSE, + }; use crate::tree_node::tests::TestTreeNode; use crate::Result; use std::collections::HashSet; @@ -662,6 +746,18 @@ mod test { } } + impl Normalizeable for TestTreeNode { + fn can_normalize(&self) -> bool { + false + } + } + + impl NormalizeEq for TestTreeNode { + fn normalize_eq(&self, other: &Self) -> bool { + self == other + } + } + #[test] fn id_array_visitor() -> Result<()> { let alias_generator = AliasGenerator::new(); diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c495b5396f53..af54dad79d2e 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -30,7 +30,7 @@ use crate::Volatility; use crate::{udaf, ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; use arrow::datatypes::{DataType, FieldRef}; -use datafusion_common::cse::HashNode; +use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable}; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion, }; @@ -1665,6 +1665,393 @@ impl Expr { } } +impl Normalizeable for Expr { + fn can_normalize(&self) -> bool { + #[allow(clippy::match_like_matches_macro)] + match self { + Expr::BinaryExpr(BinaryExpr { + op: + _op @ (Operator::Plus + | Operator::Multiply + | Operator::BitwiseAnd + | Operator::BitwiseOr + | Operator::BitwiseXor + | Operator::Eq + | Operator::NotEq), + .. + }) => true, + _ => false, + } + } +} + +impl NormalizeEq for Expr { + fn normalize_eq(&self, other: &Self) -> bool { + match (self, other) { + ( + Expr::BinaryExpr(BinaryExpr { + left: self_left, + op: self_op, + right: self_right, + }), + Expr::BinaryExpr(BinaryExpr { + left: other_left, + op: other_op, + right: other_right, + }), + ) => { + if self_op != other_op { + return false; + } + + if matches!( + self_op, + Operator::Plus + | Operator::Multiply + | Operator::BitwiseAnd + | Operator::BitwiseOr + | Operator::BitwiseXor + | Operator::Eq + | Operator::NotEq + ) { + (self_left.normalize_eq(other_left) + && self_right.normalize_eq(other_right)) + || (self_left.normalize_eq(other_right) + && self_right.normalize_eq(other_left)) + } else { + self_left.normalize_eq(other_left) + && self_right.normalize_eq(other_right) + } + } + ( + Expr::Alias(Alias { + expr: self_expr, + relation: self_relation, + name: self_name, + }), + Expr::Alias(Alias { + expr: other_expr, + relation: other_relation, + name: other_name, + }), + ) => { + self_name == other_name + && self_relation == other_relation + && self_expr.normalize_eq(other_expr) + } + ( + Expr::Like(Like { + negated: self_negated, + expr: self_expr, + pattern: self_pattern, + escape_char: self_escape_char, + case_insensitive: self_case_insensitive, + }), + Expr::Like(Like { + negated: other_negated, + expr: other_expr, + pattern: other_pattern, + escape_char: other_escape_char, + case_insensitive: other_case_insensitive, + }), + ) + | ( + Expr::SimilarTo(Like { + negated: self_negated, + expr: self_expr, + pattern: self_pattern, + escape_char: self_escape_char, + case_insensitive: self_case_insensitive, + }), + Expr::SimilarTo(Like { + negated: other_negated, + expr: other_expr, + pattern: other_pattern, + escape_char: other_escape_char, + case_insensitive: other_case_insensitive, + }), + ) => { + self_negated == other_negated + && self_escape_char == other_escape_char + && self_case_insensitive == other_case_insensitive + && self_expr.normalize_eq(other_expr) + && self_pattern.normalize_eq(other_pattern) + } + (Expr::Not(self_expr), Expr::Not(other_expr)) + | (Expr::IsNull(self_expr), Expr::IsNull(other_expr)) + | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr)) + | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr)) + | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr)) + | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr)) + | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr)) + | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr)) + | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr)) + | (Expr::Negative(self_expr), Expr::Negative(other_expr)) + | ( + Expr::Unnest(Unnest { expr: self_expr }), + Expr::Unnest(Unnest { expr: other_expr }), + ) => self_expr.normalize_eq(other_expr), + ( + Expr::Between(Between { + expr: self_expr, + negated: self_negated, + low: self_low, + high: self_high, + }), + Expr::Between(Between { + expr: other_expr, + negated: other_negated, + low: other_low, + high: other_high, + }), + ) => { + self_negated == other_negated + && self_expr.normalize_eq(other_expr) + && self_low.normalize_eq(other_low) + && self_high.normalize_eq(other_high) + } + ( + Expr::Cast(Cast { + expr: self_expr, + data_type: self_data_type, + }), + Expr::Cast(Cast { + expr: other_expr, + data_type: other_data_type, + }), + ) + | ( + Expr::TryCast(TryCast { + expr: self_expr, + data_type: self_data_type, + }), + Expr::TryCast(TryCast { + expr: other_expr, + data_type: other_data_type, + }), + ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr), + ( + Expr::ScalarFunction(ScalarFunction { + func: self_func, + args: self_args, + }), + Expr::ScalarFunction(ScalarFunction { + func: other_func, + args: other_args, + }), + ) => { + self_func.name() == other_func.name() + && self_args.len() == other_args.len() + && self_args + .iter() + .zip(other_args.iter()) + .all(|(a, b)| a.normalize_eq(b)) + } + ( + Expr::AggregateFunction(AggregateFunction { + func: self_func, + args: self_args, + distinct: self_distinct, + filter: self_filter, + order_by: self_order_by, + null_treatment: self_null_treatment, + }), + Expr::AggregateFunction(AggregateFunction { + func: other_func, + args: other_args, + distinct: other_distinct, + filter: other_filter, + order_by: other_order_by, + null_treatment: other_null_treatment, + }), + ) => { + self_func.name() == other_func.name() + && self_distinct == other_distinct + && self_null_treatment == other_null_treatment + && self_args.len() == other_args.len() + && self_args + .iter() + .zip(other_args.iter()) + .all(|(a, b)| a.normalize_eq(b)) + && match (self_filter, other_filter) { + (Some(self_filter), Some(other_filter)) => { + self_filter.normalize_eq(other_filter) + } + (None, None) => true, + _ => false, + } + && match (self_order_by, other_order_by) { + (Some(self_order_by), Some(other_order_by)) => self_order_by + .iter() + .zip(other_order_by.iter()) + .all(|(a, b)| { + a.asc == b.asc + && a.nulls_first == b.nulls_first + && a.expr.normalize_eq(&b.expr) + }), + (None, None) => true, + _ => false, + } + } + ( + Expr::WindowFunction(WindowFunction { + fun: self_fun, + args: self_args, + partition_by: self_partition_by, + order_by: self_order_by, + window_frame: self_window_frame, + null_treatment: self_null_treatment, + }), + Expr::WindowFunction(WindowFunction { + fun: other_fun, + args: other_args, + partition_by: other_partition_by, + order_by: other_order_by, + window_frame: other_window_frame, + null_treatment: other_null_treatment, + }), + ) => { + self_fun.name() == other_fun.name() + && self_window_frame == other_window_frame + && self_null_treatment == other_null_treatment + && self_args.len() == other_args.len() + && self_args + .iter() + .zip(other_args.iter()) + .all(|(a, b)| a.normalize_eq(b)) + && self_partition_by + .iter() + .zip(other_partition_by.iter()) + .all(|(a, b)| a.normalize_eq(b)) + && self_order_by + .iter() + .zip(other_order_by.iter()) + .all(|(a, b)| { + a.asc == b.asc + && a.nulls_first == b.nulls_first + && a.expr.normalize_eq(&b.expr) + }) + } + ( + Expr::Exists(Exists { + subquery: self_subquery, + negated: self_negated, + }), + Expr::Exists(Exists { + subquery: other_subquery, + negated: other_negated, + }), + ) => { + self_negated == other_negated + && self_subquery.normalize_eq(other_subquery) + } + ( + Expr::InSubquery(InSubquery { + expr: self_expr, + subquery: self_subquery, + negated: self_negated, + }), + Expr::InSubquery(InSubquery { + expr: other_expr, + subquery: other_subquery, + negated: other_negated, + }), + ) => { + self_negated == other_negated + && self_expr.normalize_eq(other_expr) + && self_subquery.normalize_eq(other_subquery) + } + ( + Expr::ScalarSubquery(self_subquery), + Expr::ScalarSubquery(other_subquery), + ) => self_subquery.normalize_eq(other_subquery), + ( + Expr::GroupingSet(GroupingSet::Rollup(self_exprs)), + Expr::GroupingSet(GroupingSet::Rollup(other_exprs)), + ) + | ( + Expr::GroupingSet(GroupingSet::Cube(self_exprs)), + Expr::GroupingSet(GroupingSet::Cube(other_exprs)), + ) => { + self_exprs.len() == other_exprs.len() + && self_exprs + .iter() + .zip(other_exprs.iter()) + .all(|(a, b)| a.normalize_eq(b)) + } + ( + Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)), + Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)), + ) => { + self_exprs.len() == other_exprs.len() + && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| { + a.len() == b.len() + && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y)) + }) + } + ( + Expr::InList(InList { + expr: self_expr, + list: self_list, + negated: self_negated, + }), + Expr::InList(InList { + expr: other_expr, + list: other_list, + negated: other_negated, + }), + ) => { + // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)` + self_negated == other_negated + && self_expr.normalize_eq(other_expr) + && self_list.len() == other_list.len() + && self_list + .iter() + .zip(other_list.iter()) + .all(|(a, b)| a.normalize_eq(b)) + } + ( + Expr::Case(Case { + expr: self_expr, + when_then_expr: self_when_then_expr, + else_expr: self_else_expr, + }), + Expr::Case(Case { + expr: other_expr, + when_then_expr: other_when_then_expr, + else_expr: other_else_expr, + }), + ) => { + // TODO: normalize_eq for when_then_expr + // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END` + self_when_then_expr.len() == other_when_then_expr.len() + && self_when_then_expr + .iter() + .zip(other_when_then_expr.iter()) + .all(|((self_when, self_then), (other_when, other_then))| { + self_when.normalize_eq(other_when) + && self_then.normalize_eq(other_then) + }) + && match (self_expr, other_expr) { + (Some(self_expr), Some(other_expr)) => { + self_expr.normalize_eq(other_expr) + } + (None, None) => true, + (_, _) => false, + } + && match (self_else_expr, other_else_expr) { + (Some(self_else_expr), Some(other_else_expr)) => { + self_else_expr.normalize_eq(other_else_expr) + } + (None, None) => true, + (_, _) => false, + } + } + (_, _) => self == other, + } + } +} + impl HashNode for Expr { /// As it is pretty easy to forget changing this method when `Expr` changes the /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 31bf4c573444..6c2b923cf6ad 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -45,6 +45,7 @@ use crate::{ }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion_common::cse::{NormalizeEq, Normalizeable}; use datafusion_common::tree_node::{ Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion, }; @@ -3354,6 +3355,25 @@ pub struct Subquery { pub outer_ref_columns: Vec, } +impl Normalizeable for Subquery { + fn can_normalize(&self) -> bool { + false + } +} + +impl NormalizeEq for Subquery { + fn normalize_eq(&self, other: &Self) -> bool { + // TODO: may be implement NormalizeEq for LogicalPlan? + *self.subquery == *other.subquery + && self.outer_ref_columns.len() == other.outer_ref_columns.len() + && self + .outer_ref_columns + .iter() + .zip(other.outer_ref_columns.iter()) + .all(|(a, b)| a.normalize_eq(b)) + } +} + impl Subquery { pub fn try_from_expr(plan: &Expr) -> Result<&Subquery> { match plan { diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 0ea2d24effbb..e7c9a198f3ad 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -795,8 +795,9 @@ mod test { use arrow::datatypes::{DataType, Field, Schema}; use datafusion_expr::logical_plan::{table_scan, JoinType}; use datafusion_expr::{ - grouping_set, AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, ScalarUDF, - ScalarUDFImpl, Signature, SimpleAggregateUDF, Volatility, + grouping_set, is_null, not, AccumulatorFactoryFunction, AggregateUDF, + ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF, + Volatility, }; use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder}; @@ -1054,8 +1055,9 @@ mod test { .project(vec![lit(1) + col("a"), col("a") + lit(1)])? .build()?; - let expected = "Projection: Int32(1) + test.a, test.a + Int32(1)\ - \n TableScan: test"; + let expected = "Projection: __common_expr_1 AS Int32(1) + test.a, __common_expr_1 AS test.a + Int32(1)\ + \n Projection: Int32(1) + test.a AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; assert_optimized_plan_eq(expected, plan, None); @@ -1412,6 +1414,259 @@ mod test { Ok(()) } + #[test] + fn test_normalize_add_expression() -> Result<()> { + // a + b <=> b + a + let table_scan = test_table_scan()?; + let expr = ((col("a") + col("b")) * (col("b") + col("a"))).eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 * __common_expr_1 = Int32(30)\ + \n Projection: test.a + test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[test] + fn test_normalize_multi_expression() -> Result<()> { + // a * b <=> b * a + let table_scan = test_table_scan()?; + let expr = ((col("a") * col("b")) + (col("b") * col("a"))).eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 + __common_expr_1 = Int32(30)\ + \n Projection: test.a * test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[test] + fn test_normalize_bitset_and_expression() -> Result<()> { + // a & b <=> b & a + let table_scan = test_table_scan()?; + let expr = ((col("a") & col("b")) + (col("b") & col("a"))).eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 + __common_expr_1 = Int32(30)\ + \n Projection: test.a & test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[test] + fn test_normalize_bitset_or_expression() -> Result<()> { + // a | b <=> b | a + let table_scan = test_table_scan()?; + let expr = ((col("a") | col("b")) + (col("b") | col("a"))).eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 + __common_expr_1 = Int32(30)\ + \n Projection: test.a | test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[test] + fn test_normalize_bitset_xor_expression() -> Result<()> { + // a # b <=> b # a + let table_scan = test_table_scan()?; + let expr = ((col("a") ^ col("b")) + (col("b") ^ col("a"))).eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 + __common_expr_1 = Int32(30)\ + \n Projection: test.a BIT_XOR test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[test] + fn test_normalize_eq_expression() -> Result<()> { + // a = b <=> b = a + let table_scan = test_table_scan()?; + let expr = (col("a").eq(col("b"))).and(col("b").eq(col("a"))); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 AND __common_expr_1\ + \n Projection: test.a = test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[test] + fn test_normalize_ne_expression() -> Result<()> { + // a != b <=> b != a + let table_scan = test_table_scan()?; + let expr = (col("a").not_eq(col("b"))).and(col("b").not_eq(col("a"))); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 AND __common_expr_1\ + \n Projection: test.a != test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[test] + fn test_normalize_complex_expression() -> Result<()> { + // case1: a + b * c <=> b * c + a + let table_scan = test_table_scan()?; + let expr = ((col("a") + col("b") * col("c")) - (col("b") * col("c") + col("a"))) + .eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 - __common_expr_1 = Int32(30)\ + \n Projection: test.a + test.b * test.c AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + // ((c1 + c2 / c3) * c3 <=> c3 * (c2 / c3 + c1)) + let table_scan = test_table_scan()?; + let expr = (((col("a") + col("b") / col("c")) * col("c")) + / (col("c") * (col("b") / col("c") + col("a"))) + + col("a")) + .eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 / __common_expr_1 + test.a = Int32(30)\ + \n Projection: (test.a + test.b / test.c) * test.c AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + // c2 / (c1 + c3) <=> c2 / (c3 + c1) + let table_scan = test_table_scan()?; + let expr = ((col("b") / (col("a") + col("c"))) + * (col("b") / (col("c") + col("a")))) + .eq(lit(30)); + let plan = LogicalPlanBuilder::from(table_scan).filter(expr)?.build()?; + let expected = "Projection: test.a, test.b, test.c\ + \n Filter: __common_expr_1 * __common_expr_1 = Int32(30)\ + \n Projection: test.b / (test.a + test.c) AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + Ok(()) + } + + #[derive(Debug)] + pub struct TestUdf { + signature: Signature, + } + + impl TestUdf { + pub fn new() -> Self { + Self { + signature: Signature::numeric(1, Volatility::Immutable), + } + } + } + + impl ScalarUDFImpl for TestUdf { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "my_udf" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _: &[DataType]) -> Result { + Ok(DataType::Int32) + } + + fn invoke(&self, _: &[ColumnarValue]) -> Result { + panic!("not implemented") + } + } + + #[test] + fn test_normalize_inner_binary_expression() -> Result<()> { + // Not(a == b) <=> Not(b == a) + let table_scan = test_table_scan()?; + let expr1 = not(col("a").eq(col("b"))); + let expr2 = not(col("b").eq(col("a"))); + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![expr1, expr2])? + .build()?; + let expected = "Projection: __common_expr_1 AS NOT test.a = test.b, __common_expr_1 AS NOT test.b = test.a\ + \n Projection: NOT test.a = test.b AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + // is_null(a == b) <=> is_null(b == a) + let table_scan = test_table_scan()?; + let expr1 = is_null(col("a").eq(col("b"))); + let expr2 = is_null(col("b").eq(col("a"))); + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![expr1, expr2])? + .build()?; + let expected = "Projection: __common_expr_1 AS test.a = test.b IS NULL, __common_expr_1 AS test.b = test.a IS NULL\ + \n Projection: test.a = test.b IS NULL AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + // a + b between 0 and 10 <=> b + a between 0 and 10 + let table_scan = test_table_scan()?; + let expr1 = (col("a") + col("b")).between(lit(0), lit(10)); + let expr2 = (col("b") + col("a")).between(lit(0), lit(10)); + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![expr1, expr2])? + .build()?; + let expected = "Projection: __common_expr_1 AS test.a + test.b BETWEEN Int32(0) AND Int32(10), __common_expr_1 AS test.b + test.a BETWEEN Int32(0) AND Int32(10)\ + \n Projection: test.a + test.b BETWEEN Int32(0) AND Int32(10) AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + // c between a + b and 10 <=> c between b + a and 10 + let table_scan = test_table_scan()?; + let expr1 = col("c").between(col("a") + col("b"), lit(10)); + let expr2 = col("c").between(col("b") + col("a"), lit(10)); + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![expr1, expr2])? + .build()?; + let expected = "Projection: __common_expr_1 AS test.c BETWEEN test.a + test.b AND Int32(10), __common_expr_1 AS test.c BETWEEN test.b + test.a AND Int32(10)\ + \n Projection: test.c BETWEEN test.a + test.b AND Int32(10) AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + + // function call with argument <=> function call with argument + let udf = ScalarUDF::from(TestUdf::new()); + let table_scan = test_table_scan()?; + let expr1 = udf.call(vec![col("a") + col("b")]); + let expr2 = udf.call(vec![col("b") + col("a")]); + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![expr1, expr2])? + .build()?; + let expected = "Projection: __common_expr_1 AS my_udf(test.a + test.b), __common_expr_1 AS my_udf(test.b + test.a)\ + \n Projection: my_udf(test.a + test.b) AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; + assert_optimized_plan_eq(expected, plan, None); + Ok(()) + } + /// returns a "random" function that is marked volatile (aka each invocation /// returns a different value) /// From 75202b587108cd8de6c702a84463ac0ca0ca4d4b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Dec 2024 09:37:35 -0500 Subject: [PATCH 9/9] Upgrade to sqlparser `0.53.0` (#13767) * chore: Udpate to sqlparser 0.53.0 * Update for new sqlparser API * more api updates * Avoid serializing query to SQL string unless it is necessary * Box wildcard options * chore: update datafusion-cli Cargo.lock --- Cargo.toml | 2 +- datafusion-cli/Cargo.lock | 8 +- datafusion/common/src/utils/mod.rs | 5 +- .../user_defined_scalar_functions.rs | 10 +- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/expr_fn.rs | 8 +- .../src/analyzer/inline_table_scan.rs | 8 +- .../proto/src/logical_plan/from_proto.rs | 2 +- .../tests/cases/roundtrip_logical_plan.rs | 12 +- datafusion/sql/src/expr/function.rs | 24 +- datafusion/sql/src/expr/mod.rs | 8 +- datafusion/sql/src/parser.rs | 14 +- datafusion/sql/src/planner.rs | 14 +- datafusion/sql/src/select.rs | 1 + datafusion/sql/src/statement.rs | 207 ++++++++++++++---- datafusion/sql/src/unparser/ast.rs | 3 + datafusion/sql/src/unparser/dialect.rs | 6 +- datafusion/sql/src/unparser/expr.rs | 32 ++- datafusion/sql/src/unparser/plan.rs | 9 +- datafusion/sql/src/unparser/utils.rs | 12 +- .../test_files/information_schema.slt | 4 +- 21 files changed, 271 insertions(+), 120 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cc94b4292a50..b7c8c09a8537 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,7 +147,7 @@ recursive = "0.1.1" regex = "1.8" rstest = "0.23.0" serde_json = "1" -sqlparser = { version = "0.52.0", features = ["visitor"] } +sqlparser = { version = "0.53.0", features = ["visitor"] } tempfile = "3" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } url = "2.2" diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 2ffc64114ef7..a435869dbece 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -3755,9 +3755,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a875d8cd437cc8a97e9aeaeea352ec9a19aea99c23e9effb17757291de80b08" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" dependencies = [ "log", "sqlparser_derive", @@ -3765,9 +3765,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 5e840f859400..29d33fec14ab 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -887,10 +887,10 @@ pub fn get_available_parallelism() -> usize { #[cfg(test)] mod tests { + use super::*; use crate::ScalarValue::Null; use arrow::array::Float64Array; - - use super::*; + use sqlparser::tokenizer::Span; #[test] fn test_bisect_linear_left_and_right() -> Result<()> { @@ -1118,6 +1118,7 @@ mod tests { let expected_parsed = vec![Ident { value: identifier.to_string(), quote_style, + span: Span::empty(), }]; assert_eq!( diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 4d2a536c4920..30b3c6e2bbeb 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -27,10 +27,6 @@ use arrow_array::{ Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray, }; use arrow_schema::{DataType, Field, Schema}; -use parking_lot::Mutex; -use regex::Regex; -use sqlparser::ast::Ident; - use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionState}; use datafusion::prelude::*; use datafusion::{execution::registry::FunctionRegistry, test_util}; @@ -48,6 +44,10 @@ use datafusion_expr::{ Volatility, }; use datafusion_functions_nested::range::range_udf; +use parking_lot::Mutex; +use regex::Regex; +use sqlparser::ast::Ident; +use sqlparser::tokenizer::Span; /// test that casting happens on udfs. /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and @@ -1187,6 +1187,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( name: Some(Ident { value: "name".into(), quote_style: None, + span: Span::empty(), }), data_type: DataType::Utf8, default_expr: None, @@ -1196,6 +1197,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( language: Some(Ident { value: "plrust".into(), quote_style: None, + span: Span::empty(), }), behavior: None, function_body: Some(lit(body)), diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index af54dad79d2e..c82572ebd5f1 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -313,7 +313,7 @@ pub enum Expr { /// plan into physical plan. Wildcard { qualifier: Option, - options: WildcardOptions, + options: Box, }, /// List of grouping set expressions. Only valid in the context of an aggregate /// GROUP BY expression list diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index a44dd24039dc..a2de5e7b259f 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -123,7 +123,7 @@ pub fn placeholder(id: impl Into) -> Expr { pub fn wildcard() -> Expr { Expr::Wildcard { qualifier: None, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), } } @@ -131,7 +131,7 @@ pub fn wildcard() -> Expr { pub fn wildcard_with_options(options: WildcardOptions) -> Expr { Expr::Wildcard { qualifier: None, - options, + options: Box::new(options), } } @@ -148,7 +148,7 @@ pub fn wildcard_with_options(options: WildcardOptions) -> Expr { pub fn qualified_wildcard(qualifier: impl Into) -> Expr { Expr::Wildcard { qualifier: Some(qualifier.into()), - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), } } @@ -159,7 +159,7 @@ pub fn qualified_wildcard_with_options( ) -> Expr { Expr::Wildcard { qualifier: Some(qualifier.into()), - options, + options: Box::new(options), } } diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs index 342d85a915b4..95781b395f3c 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs @@ -23,8 +23,7 @@ use crate::analyzer::AnalyzerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{Column, Result}; -use datafusion_expr::expr::WildcardOptions; -use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder}; +use datafusion_expr::{logical_plan::LogicalPlan, wildcard, Expr, LogicalPlanBuilder}; /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`] /// (DataFrame / ViewTable) @@ -93,10 +92,7 @@ fn generate_projection_expr( ))); } } else { - exprs.push(Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }); + exprs.push(wildcard()); } Ok(exprs) } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 301efc42a7c4..6ab3e0c9096c 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -513,7 +513,7 @@ pub fn parse_expr( let qualifier = qualifier.to_owned().map(|x| x.try_into()).transpose()?; Ok(Expr::Wildcard { qualifier, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }) } ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index f793e96f612b..c0885ece08bc 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -67,7 +67,7 @@ use datafusion_common::{ use datafusion_expr::dml::CopyTo; use datafusion_expr::expr::{ self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction, - Unnest, WildcardOptions, + Unnest, }; use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore}; use datafusion_expr::{ @@ -2061,10 +2061,7 @@ fn roundtrip_unnest() { #[test] fn roundtrip_wildcard() { - let test_expr = Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }; + let test_expr = wildcard(); let ctx = SessionContext::new(); roundtrip_expr_test(test_expr, ctx); @@ -2072,10 +2069,7 @@ fn roundtrip_wildcard() { #[test] fn roundtrip_qualified_wildcard() { - let test_expr = Expr::Wildcard { - qualifier: Some("foo".into()), - options: WildcardOptions::default(), - }; + let test_expr = qualified_wildcard("foo"); let ctx = SessionContext::new(); roundtrip_expr_test(test_expr, ctx); diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 67fa23b86990..da1a4ba81f5a 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -22,11 +22,11 @@ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, Dependency, Result, }; -use datafusion_expr::expr::WildcardOptions; use datafusion_expr::expr::{ScalarFunction, Unnest}; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{ - expr, Expr, ExprFunctionExt, ExprSchemable, WindowFrame, WindowFunctionDefinition, + expr, qualified_wildcard, wildcard, Expr, ExprFunctionExt, ExprSchemable, + WindowFrame, WindowFunctionDefinition, }; use sqlparser::ast::{ DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, @@ -169,6 +169,11 @@ impl FunctionArgs { "Calling {name}: SEPARATOR not supported in function arguments: {sep}" ) } + FunctionArgumentClause::JsonNullClause(jn) => { + return not_impl_err!( + "Calling {name}: JSON NULL clause not supported in function arguments: {jn}" + ) + } } } @@ -413,17 +418,11 @@ impl SqlToRel<'_, S> { name: _, arg: FunctionArgExpr::Wildcard, operator: _, - } => Ok(Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }), + } => Ok(wildcard()), FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => { self.sql_expr_to_logical_expr(arg, schema, planner_context) } - FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }), + FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(wildcard()), FunctionArg::Unnamed(FunctionArgExpr::QualifiedWildcard(object_name)) => { let qualifier = self.object_name_to_table_reference(object_name)?; // Sanity check on qualifier with schema @@ -431,10 +430,7 @@ impl SqlToRel<'_, S> { if qualified_indices.is_empty() { return plan_err!("Invalid qualifier {qualifier}"); } - Ok(Expr::Wildcard { - qualifier: Some(qualifier), - options: WildcardOptions::default(), - }) + Ok(qualified_wildcard(qualifier)) } _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"), } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index e8ec8d7b7d1c..a651d8fa5d35 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -593,13 +593,13 @@ impl SqlToRel<'_, S> { } not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}") } - SQLExpr::Wildcard => Ok(Expr::Wildcard { + SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard { qualifier: None, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }), - SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard { + SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard { qualifier: Some(self.object_name_to_table_reference(object_name)?), - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }), SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values), _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index efec6020641c..f185d65fa194 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -21,6 +21,7 @@ use std::collections::VecDeque; use std::fmt; use sqlparser::ast::ExprWithAlias; +use sqlparser::tokenizer::TokenWithSpan; use sqlparser::{ ast::{ ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query, @@ -28,7 +29,7 @@ use sqlparser::{ }, dialect::{keywords::Keyword, Dialect, GenericDialect}, parser::{Parser, ParserError}, - tokenizer::{Token, TokenWithLocation, Tokenizer, Word}, + tokenizer::{Token, Tokenizer, Word}, }; // Use `Parser::expected` instead, if possible @@ -338,7 +339,7 @@ impl<'a> DFParser<'a> { fn expected( &self, expected: &str, - found: TokenWithLocation, + found: TokenWithSpan, ) -> Result { parser_err!(format!("Expected {expected}, found: {found}")) } @@ -876,6 +877,7 @@ mod tests { use super::*; use sqlparser::ast::Expr::Identifier; use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident}; + use sqlparser::tokenizer::Span; fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> { let statements = DFParser::parse_sql(sql)?; @@ -911,6 +913,7 @@ mod tests { name: Ident { value: name.into(), quote_style: None, + span: Span::empty(), }, data_type, collation: None, @@ -1219,6 +1222,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc, nulls_first, @@ -1250,6 +1254,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(true), nulls_first: None, @@ -1259,6 +1264,7 @@ mod tests { expr: Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(false), nulls_first: Some(true), @@ -1290,11 +1296,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), @@ -1335,11 +1343,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 2d0ba8f8d994..d917a707ca20 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -310,7 +310,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { plan: LogicalPlan, alias: TableAlias, ) -> Result { - let plan = self.apply_expr_alias(plan, alias.columns)?; + let idents = alias.columns.into_iter().map(|c| c.name).collect(); + let plan = self.apply_expr_alias(plan, idents)?; LogicalPlanBuilder::from(plan) .alias(TableReference::bare( @@ -513,7 +514,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Regclass | SQLDataType::Custom(_, _) | SQLDataType::Array(_) - | SQLDataType::Enum(_) + | SQLDataType::Enum(_, _) | SQLDataType::Set(_) | SQLDataType::MediumInt(_) | SQLDataType::UnsignedMediumInt(_) @@ -557,6 +558,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Nullable(_) | SQLDataType::LowCardinality(_) | SQLDataType::Trigger + // MySQL datatypes + | SQLDataType::TinyBlob + | SQLDataType::MediumBlob + | SQLDataType::LongBlob + | SQLDataType::TinyText + | SQLDataType::MediumText + | SQLDataType::LongText + | SQLDataType::Bit(_) + |SQLDataType::BitVarying(_) => not_impl_err!( "Unsupported SQL type {sql_type:?}" ), diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index c2a9bac24e66..9a84c00a8044 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -655,6 +655,7 @@ impl SqlToRel<'_, S> { opt_rename, opt_replace: _opt_replace, opt_ilike: _opt_ilike, + wildcard_token: _wildcard_token, } = options; if opt_rename.is_some() { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index f750afbc4a53..e264b9083cc0 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -33,7 +33,7 @@ use arrow_schema::{DataType, Fields}; use datafusion_common::error::_plan_err; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ - exec_err, not_impl_err, plan_datafusion_err, plan_err, schema_err, + exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, schema_err, unqualified_field_not_found, Column, Constraint, Constraints, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, SchemaError, SchemaReference, TableReference, ToDFSchema, @@ -54,13 +54,16 @@ use datafusion_expr::{ TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, }; -use sqlparser::ast::{self, SqliteOnConflict}; +use sqlparser::ast::{ + self, BeginTransactionKind, NullsDistinctOption, ShowStatementIn, + ShowStatementOptions, SqliteOnConflict, +}; use sqlparser::ast::{ Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr, - ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, - TableWithJoins, TransactionMode, UnaryOperator, Value, + ShowCreateObject, Statement, TableConstraint, TableFactor, TableWithJoins, + TransactionMode, UnaryOperator, Value, }; use sqlparser::parser::ParserError::ParserError; @@ -107,6 +110,7 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec SqlToRel<'_, S> { statement: Statement, planner_context: &mut PlannerContext, ) -> Result { - let sql = Some(statement.to_string()); match statement { Statement::ExplainTable { describe_alias: DescribeAlias::Describe, // only parse 'DESCRIBE table_name' and not 'EXPLAIN table_name' @@ -514,6 +517,35 @@ impl SqlToRel<'_, S> { return not_impl_err!("To not supported")?; } + // put the statement back together temporarily to get the SQL + // string representation + let stmt = Statement::CreateView { + or_replace, + materialized, + name, + columns, + query, + options: CreateTableOptions::None, + cluster_by, + comment, + with_no_schema_binding, + if_not_exists, + temporary, + to, + }; + let sql = stmt.to_string(); + let Statement::CreateView { + name, + columns, + query, + or_replace, + temporary, + .. + } = stmt + else { + return internal_err!("Unreachable code in create view"); + }; + let columns = columns .into_iter() .map(|view_column_def| { @@ -534,7 +566,7 @@ impl SqlToRel<'_, S> { name: self.object_name_to_table_reference(name)?, input: Arc::new(plan), or_replace, - definition: sql, + definition: Some(sql), temporary, }))) } @@ -685,19 +717,99 @@ impl SqlToRel<'_, S> { Statement::ShowTables { extended, full, - db_name, - filter, - // SHOW TABLES IN/FROM are equivalent, this field specifies which the user - // specified, but it doesn't affect the plan so ignore the field - clause: _, - } => self.show_tables_to_plan(extended, full, db_name, filter), + terse, + history, + external, + show_options, + } => { + // We only support the basic "SHOW TABLES" + // https://github.com/apache/datafusion/issues/3188 + if extended { + return not_impl_err!("SHOW TABLES EXTENDED not supported")?; + } + if full { + return not_impl_err!("SHOW FULL TABLES not supported")?; + } + if terse { + return not_impl_err!("SHOW TERSE TABLES not supported")?; + } + if history { + return not_impl_err!("SHOW TABLES HISTORY not supported")?; + } + if external { + return not_impl_err!("SHOW EXTERNAL TABLES not supported")?; + } + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if show_in.is_some() { + return not_impl_err!("SHOW TABLES IN not supported")?; + } + if starts_with.is_some() { + return not_impl_err!("SHOW TABLES LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW TABLES LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW TABLES LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!("SHOW TABLES FILTER not supported")?; + } + self.show_tables_to_plan() + } Statement::ShowColumns { extended, full, - table_name, - filter, - } => self.show_columns_to_plan(extended, full, table_name, filter), + show_options, + } => { + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if starts_with.is_some() { + return not_impl_err!("SHOW COLUMNS LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!( + "SHOW COLUMNS with WHERE or LIKE is not supported" + )?; + } + let Some(ShowStatementIn { + // specifies if the syntax was `SHOW COLUMNS IN` or `SHOW + // COLUMNS FROM` which is not different in DataFusion + clause: _, + parent_type, + parent_name, + }) = show_in + else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + if let Some(parent_type) = parent_type { + return not_impl_err!("SHOW COLUMNS IN {parent_type} not supported"); + } + let Some(table_name) = parent_name else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + self.show_columns_to_plan(extended, full, table_name) + } Statement::Insert(Insert { or, @@ -766,10 +878,14 @@ impl SqlToRel<'_, S> { from, selection, returning, + or, } => { if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } + if or.is_some() { + plan_err!("ON conflict not supported")?; + } self.update_to_plan(table, assignments, from, selection) } @@ -810,12 +926,14 @@ impl SqlToRel<'_, S> { modes, begin: false, modifier, + transaction, } => { if let Some(modifier) = modifier { return not_impl_err!( "Transaction modifier not supported: {modifier}" ); } + self.validate_transaction_kind(transaction)?; let isolation_level: ast::TransactionIsolationLevel = modes .iter() .filter_map(|m: &TransactionMode| match m { @@ -879,7 +997,7 @@ impl SqlToRel<'_, S> { }); Ok(LogicalPlan::Statement(statement)) } - Statement::CreateFunction { + Statement::CreateFunction(ast::CreateFunction { or_replace, temporary, name, @@ -889,7 +1007,7 @@ impl SqlToRel<'_, S> { behavior, language, .. - } => { + }) => { let return_type = match return_type { Some(t) => Some(self.convert_data_type(&t)?), None => None, @@ -1033,8 +1151,8 @@ impl SqlToRel<'_, S> { }, ))) } - _ => { - not_impl_err!("Unsupported SQL statement: {sql:?}") + stmt => { + not_impl_err!("Unsupported SQL statement: {stmt}") } } } @@ -1065,24 +1183,12 @@ impl SqlToRel<'_, S> { } /// Generate a logical plan from a "SHOW TABLES" query - fn show_tables_to_plan( - &self, - extended: bool, - full: bool, - db_name: Option, - filter: Option, - ) -> Result { + fn show_tables_to_plan(&self) -> Result { if self.has_table("information_schema", "tables") { - // We only support the basic "SHOW TABLES" - // https://github.com/apache/datafusion/issues/3188 - if db_name.is_some() || filter.is_some() || full || extended { - plan_err!("Unsupported parameters to SHOW TABLES") - } else { - let query = "SELECT * FROM information_schema.tables;"; - let mut rewrite = DFParser::parse_sql(query)?; - assert_eq!(rewrite.len(), 1); - self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 - } + let query = "SELECT * FROM information_schema.tables;"; + let mut rewrite = DFParser::parse_sql(query)?; + assert_eq!(rewrite.len(), 1); + self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 } else { plan_err!("SHOW TABLES is not supported unless information_schema is enabled") } @@ -1841,22 +1947,18 @@ impl SqlToRel<'_, S> { extended: bool, full: bool, sql_table_name: ObjectName, - filter: Option, ) -> Result { - if filter.is_some() { - return plan_err!("SHOW COLUMNS with WHERE or LIKE is not supported"); - } + // Figure out the where clause + let where_clause = object_name_to_qualifier( + &sql_table_name, + self.options.enable_ident_normalization, + ); if !self.has_table("information_schema", "columns") { return plan_err!( "SHOW COLUMNS is not supported unless information_schema is enabled" ); } - // Figure out the where clause - let where_clause = object_name_to_qualifier( - &sql_table_name, - self.options.enable_ident_normalization, - ); // Do a table lookup to verify the table exists let table_ref = self.object_name_to_table_reference(sql_table_name)?; @@ -1916,4 +2018,19 @@ impl SqlToRel<'_, S> { .get_table_source(tables_reference) .is_ok() } + + fn validate_transaction_kind( + &self, + kind: Option, + ) -> Result<()> { + match kind { + // BEGIN + None => Ok(()), + // BEGIN TRANSACTION + Some(BeginTransactionKind::Transaction) => Ok(()), + Some(BeginTransactionKind::Work) => { + not_impl_err!("Transaction kind not supported: {kind:?}") + } + } + } } diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index ad0b5f16b283..345d16adef29 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -24,6 +24,7 @@ use core::fmt; use sqlparser::ast; +use sqlparser::ast::helpers::attached_token::AttachedToken; #[derive(Clone)] pub(super) struct QueryBuilder { @@ -268,6 +269,7 @@ impl SelectBuilder { connect_by: None, window_before_qualify: false, prewhere: None, + select_token: AttachedToken::empty(), }) } fn create_empty() -> Self { @@ -469,6 +471,7 @@ impl TableRelationBuilder { version: self.version.clone(), partitions: self.partitions.clone(), with_ordinality: false, + json_path: None, }) } fn create_empty() -> Self { diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index ae387d441fa2..a82687533e31 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -18,15 +18,15 @@ use std::sync::Arc; use arrow_schema::TimeUnit; +use datafusion_common::Result; use datafusion_expr::Expr; use regex::Regex; +use sqlparser::tokenizer::Span; use sqlparser::{ ast::{self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo}, keywords::ALL_KEYWORDS, }; -use datafusion_common::Result; - use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser}; /// `Dialect` to use for Unparsing @@ -288,6 +288,7 @@ impl PostgreSqlDialect { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -299,6 +300,7 @@ impl PostgreSqlDialect { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 48dfc425ee63..f09de133b571 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -43,6 +43,8 @@ use datafusion_expr::{ expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, }; +use sqlparser::ast::helpers::attached_token::AttachedToken; +use sqlparser::tokenizer::Span; /// Convert a DataFusion [`Expr`] to [`ast::Expr`] /// @@ -233,6 +235,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -244,6 +247,7 @@ impl Unparser<'_> { over, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } Expr::SimilarTo(Like { @@ -278,6 +282,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: agg @@ -291,6 +296,7 @@ impl Unparser<'_> { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } Expr::ScalarSubquery(subq) => { @@ -404,12 +410,16 @@ impl Unparser<'_> { } // TODO: unparsing wildcard addition options Expr::Wildcard { qualifier, .. } => { + let attached_token = AttachedToken::empty(); if let Some(qualifier) = qualifier { let idents: Vec = qualifier.to_vec().into_iter().map(Ident::new).collect(); - Ok(ast::Expr::QualifiedWildcard(ObjectName(idents))) + Ok(ast::Expr::QualifiedWildcard( + ObjectName(idents), + attached_token, + )) } else { - Ok(ast::Expr::Wildcard) + Ok(ast::Expr::Wildcard(attached_token)) } } Expr::GroupingSet(grouping_set) => match grouping_set { @@ -480,6 +490,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -491,6 +502,7 @@ impl Unparser<'_> { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } @@ -709,6 +721,7 @@ impl Unparser<'_> { Ident { value: ident, quote_style, + span: Span::empty(), } } @@ -716,6 +729,7 @@ impl Unparser<'_> { Ident { value: str, quote_style: None, + span: Span::empty(), } } @@ -1481,6 +1495,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: "UNNEST".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -1492,6 +1507,7 @@ impl Unparser<'_> { over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, })) } @@ -1672,7 +1688,7 @@ mod tests { let dummy_logical_plan = table_scan(Some("t"), &dummy_schema, None)? .project(vec![Expr::Wildcard { qualifier: None, - options: WildcardOptions::default(), + options: Box::new(WildcardOptions::default()), }])? .filter(col("a").eq(lit(1)))? .build()?; @@ -1864,10 +1880,7 @@ mod tests { (sum(col("a")), r#"sum(a)"#), ( count_udaf() - .call(vec![Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }]) + .call(vec![wildcard()]) .distinct() .build() .unwrap(), @@ -1875,10 +1888,7 @@ mod tests { ), ( count_udaf() - .call(vec![Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }]) + .call(vec![wildcard()]) .filter(lit(true)) .build() .unwrap(), diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index e9f9f486ea9a..f2d46a9f4cce 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -44,7 +44,7 @@ use datafusion_expr::{ expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, }; -use sqlparser::ast::{self, Ident, SetExpr}; +use sqlparser::ast::{self, Ident, SetExpr, TableAliasColumnDef}; use std::sync::Arc; /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`] @@ -1069,6 +1069,13 @@ impl Unparser<'_> { } fn new_table_alias(&self, alias: String, columns: Vec) -> ast::TableAlias { + let columns = columns + .into_iter() + .map(|ident| TableAliasColumnDef { + name: ident, + data_type: None, + }) + .collect(); ast::TableAlias { name: self.new_ident_quoted_if_needs(alias), columns, diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index 354a68f60964..3a7fa5ddcabb 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -17,6 +17,10 @@ use std::{cmp::Ordering, sync::Arc, vec}; +use super::{ + dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, + rewrite::TableAliasRewriter, Unparser, +}; use datafusion_common::{ internal_err, tree_node::{Transformed, TransformedResult, TreeNode}, @@ -29,11 +33,7 @@ use datafusion_expr::{ use indexmap::IndexSet; use sqlparser::ast; - -use super::{ - dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, - rewrite::TableAliasRewriter, Unparser, -}; +use sqlparser::tokenizer::Span; /// Recursively searches children of [LogicalPlan] to find an Aggregate node if exists /// prior to encountering a Join, TableScan, or a nested subquery (derived table factor). @@ -426,6 +426,7 @@ pub(crate) fn date_part_to_sql( name: ast::ObjectName(vec![ast::Ident { value: "strftime".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -444,6 +445,7 @@ pub(crate) fn date_part_to_sql( over: None, within_group: vec![], parameters: ast::FunctionArguments::None, + uses_odbc_syntax: false, }))); } } diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 1f6b5f9852ec..476a933c72b7 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -483,10 +483,10 @@ set datafusion.catalog.information_schema = true; statement ok CREATE TABLE t AS SELECT 1::int as i; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t LIKE 'f'; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t WHERE column_name = 'bar'; query TTTTTT