From 6c8abb456cd8fc4f6df8433b176ec6331ab4d954 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 18 May 2024 10:56:00 +0800 Subject: [PATCH 1/9] remove expr Signed-off-by: jayzhan211 --- .../core/src/datasource/listing/helpers.rs | 1 - datafusion/core/src/physical_planner.rs | 26 +--- datafusion/expr/src/expr.rs | 127 ------------------ datafusion/expr/src/expr_schema.rs | 96 ++++--------- datafusion/expr/src/tree_node.rs | 21 +-- datafusion/expr/src/utils.rs | 1 - .../optimizer/src/analyzer/type_coercion.rs | 1 - .../optimizer/src/optimize_projections/mod.rs | 23 ---- datafusion/optimizer/src/push_down_filter.rs | 1 - .../simplify_expressions/expr_simplifier.rs | 3 +- .../src/simplify_expressions/guarantees.rs | 5 +- datafusion/physical-expr/src/planner.rs | 25 +--- datafusion/proto/proto/datafusion.proto | 2 +- datafusion/proto/src/generated/pbjson.rs | 14 -- datafusion/proto/src/generated/prost.rs | 28 ++-- .../proto/src/logical_plan/from_proto.rs | 59 +------- datafusion/proto/src/logical_plan/to_proto.rs | 44 +----- datafusion/sql/src/unparser/expr.rs | 3 - .../sqllogictest/test_files/projection.slt | 17 +++ 19 files changed, 70 insertions(+), 427 deletions(-) diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 0cffa0513171..5b8709009665 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -84,7 +84,6 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool { | Expr::Exists { .. } | Expr::InSubquery(_) | Expr::ScalarSubquery(_) - | Expr::GetIndexedField { .. } | Expr::GroupingSet(_) | Expr::Case { .. } => Ok(TreeNodeRecursion::Continue), diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 406196a59146..597a03a52f21 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -83,8 +83,7 @@ use datafusion_common::{ use datafusion_expr::dml::CopyTo; use datafusion_expr::expr::{ self, AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr, - Cast, GetFieldAccess, GetIndexedField, GroupingSet, InList, Like, TryCast, - WindowFunction, + Cast, GroupingSet, InList, Like, TryCast, WindowFunction, }; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::expr_vec_fmt; @@ -216,29 +215,6 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { let expr = create_physical_name(expr, false)?; Ok(format!("{expr} IS NOT UNKNOWN")) } - Expr::GetIndexedField(GetIndexedField { expr: _, field }) => { - match field { - GetFieldAccess::NamedStructField { name: _ } => { - unreachable!( - "NamedStructField should have been rewritten in OperatorToFunction" - ) - } - GetFieldAccess::ListIndex { key: _ } => { - unreachable!( - "ListIndex should have been rewritten in OperatorToFunction" - ) - } - GetFieldAccess::ListRange { - start: _, - stop: _, - stride: _, - } => { - unreachable!( - "ListRange should have been rewritten in OperatorToFunction" - ) - } - }; - } Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args), Expr::WindowFunction(WindowFunction { fun, diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index a0bd0086aac7..5e43c160ba0a 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -143,9 +143,6 @@ pub enum Expr { IsNotUnknown(Box), /// arithmetic negation of an expression, the operand must be of a signed numeric data type Negative(Box), - /// Returns the field of a [`arrow::array::ListArray`] or - /// [`arrow::array::StructArray`] by index or range - GetIndexedField(GetIndexedField), /// Whether an expression is between a given range. Between(Between), /// The CASE expression is similar to a series of nested if/else and there are two forms that @@ -888,7 +885,6 @@ impl Expr { Expr::Column(..) => "Column", Expr::OuterReferenceColumn(_, _) => "Outer", Expr::Exists { .. } => "Exists", - Expr::GetIndexedField { .. } => "GetIndexedField", Expr::GroupingSet(..) => "GroupingSet", Expr::InList { .. } => "InList", Expr::InSubquery(..) => "InSubquery", @@ -1180,91 +1176,6 @@ impl Expr { )) } - /// Return access to the named field. Example `expr["name"]` - /// - /// ## Access field "my_field" from column "c1" - /// - /// For example if column "c1" holds documents like this - /// - /// ```json - /// { - /// "my_field": 123.34, - /// "other_field": "Boston", - /// } - /// ``` - /// - /// You can access column "my_field" with - /// - /// ``` - /// # use datafusion_expr::{col}; - /// let expr = col("c1") - /// .field("my_field"); - /// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); - /// ``` - pub fn field(self, name: impl Into) -> Self { - Expr::GetIndexedField(GetIndexedField { - expr: Box::new(self), - field: GetFieldAccess::NamedStructField { - name: ScalarValue::from(name.into()), - }, - }) - } - - /// Return access to the element field. Example `expr["name"]` - /// - /// ## Example Access element 2 from column "c1" - /// - /// For example if column "c1" holds documents like this - /// - /// ```json - /// [10, 20, 30, 40] - /// ``` - /// - /// You can access the value "30" with - /// - /// ``` - /// # use datafusion_expr::{lit, col, Expr}; - /// let expr = col("c1") - /// .index(lit(3)); - /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]"); - /// ``` - pub fn index(self, key: Expr) -> Self { - Expr::GetIndexedField(GetIndexedField { - expr: Box::new(self), - field: GetFieldAccess::ListIndex { key: Box::new(key) }, - }) - } - - /// Return elements between `1` based `start` and `stop`, for - /// example `expr[1:3]` - /// - /// ## Example: Access element 2, 3, 4 from column "c1" - /// - /// For example if column "c1" holds documents like this - /// - /// ```json - /// [10, 20, 30, 40] - /// ``` - /// - /// You can access the value `[20, 30, 40]` with - /// - /// ``` - /// # use datafusion_expr::{lit, col}; - /// let expr = col("c1") - /// .range(lit(2), lit(4)); - /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4):Int64(1)]"); - /// ``` - pub fn range(self, start: Expr, stop: Expr) -> Self { - Expr::GetIndexedField(GetIndexedField { - expr: Box::new(self), - field: GetFieldAccess::ListRange { - start: Box::new(start), - stop: Box::new(stop), - stride: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))), - }, - }) - } - #[deprecated(since = "39.0.0", note = "use try_as_col instead")] pub fn try_into_col(&self) -> Result { match self { @@ -1362,7 +1273,6 @@ impl Expr { | Expr::Cast(..) | Expr::Column(..) | Expr::Exists(..) - | Expr::GetIndexedField(..) | Expr::GroupingSet(..) | Expr::InList(..) | Expr::InSubquery(..) @@ -1611,19 +1521,6 @@ impl fmt::Display for Expr { Some(qualifier) => write!(f, "{qualifier}.*"), None => write!(f, "*"), }, - Expr::GetIndexedField(GetIndexedField { field, expr }) => match field { - GetFieldAccess::NamedStructField { name } => { - write!(f, "({expr})[{name}]") - } - GetFieldAccess::ListIndex { key } => write!(f, "({expr})[{key}]"), - GetFieldAccess::ListRange { - start, - stop, - stride, - } => { - write!(f, "({expr})[{start}:{stop}:{stride}]") - } - }, Expr::GroupingSet(grouping_sets) => match grouping_sets { GroupingSet::Rollup(exprs) => { // ROLLUP (c0, c1, c2) @@ -1828,30 +1725,6 @@ fn write_name(w: &mut W, e: &Expr) -> Result<()> { Expr::ScalarSubquery(subquery) => { w.write_str(subquery.subquery.schema().field(0).name().as_str())?; } - Expr::GetIndexedField(GetIndexedField { expr, field }) => { - write_name(w, expr)?; - match field { - GetFieldAccess::NamedStructField { name } => write!(w, "[{name}]")?, - GetFieldAccess::ListIndex { key } => { - w.write_str("[")?; - write_name(w, key)?; - w.write_str("]")?; - } - GetFieldAccess::ListRange { - start, - stop, - stride, - } => { - w.write_str("[")?; - write_name(w, start)?; - w.write_str(":")?; - write_name(w, stop)?; - w.write_str(":")?; - write_name(w, stride)?; - w.write_str("]")?; - } - } - } Expr::Unnest(Unnest { expr }) => { w.write_str("unnest(")?; write_name(w, expr)?; diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 2c08dbe0429a..270b94417231 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -17,11 +17,9 @@ use super::{Between, Expr, Like}; use crate::expr::{ - AggregateFunction, AggregateFunctionDefinition, Alias, BinaryExpr, Cast, - GetFieldAccess, GetIndexedField, InList, InSubquery, Placeholder, ScalarFunction, - Sort, TryCast, Unnest, WindowFunction, + AggregateFunction, AggregateFunctionDefinition, Alias, BinaryExpr, Cast, InList, + InSubquery, Placeholder, ScalarFunction, Sort, TryCast, Unnest, WindowFunction, }; -use crate::field_util::GetFieldAccessSchema; use crate::type_coercion::binary::get_result_type; use crate::type_coercion::functions::data_types_with_scalar_udf; use crate::{utils, LogicalPlan, Projection, Subquery}; @@ -214,9 +212,6 @@ impl ExprSchemable for Expr { // grouping sets do not really have a type and do not appear in projections Ok(DataType::Null) } - Expr::GetIndexedField(GetIndexedField { expr, field }) => { - field_for_index(expr, field, schema).map(|x| x.data_type().clone()) - } } } @@ -320,16 +315,6 @@ impl ExprSchemable for Expr { Expr::Wildcard { .. } => internal_err!( "Wildcard expressions are not valid in a logical query plan" ), - Expr::GetIndexedField(GetIndexedField { expr, field }) => { - // If schema is nested, check if parent is nullable - // if it is, return early - if let Expr::Column(col) = expr.as_ref() { - if input_schema.nullable(col)? { - return Ok(true); - } - } - field_for_index(expr, field, input_schema).map(|x| x.is_nullable()) - } Expr::GroupingSet(_) => { // grouping sets do not really have the concept of nullable and do not appear // in projections @@ -473,33 +458,6 @@ impl ExprSchemable for Expr { } } -/// return the schema [`Field`] for the type referenced by `get_indexed_field` -fn field_for_index( - expr: &Expr, - field: &GetFieldAccess, - schema: &dyn ExprSchema, -) -> Result { - let expr_dt = expr.get_type(schema)?; - match field { - GetFieldAccess::NamedStructField { name } => { - GetFieldAccessSchema::NamedStructField { name: name.clone() } - } - GetFieldAccess::ListIndex { key } => GetFieldAccessSchema::ListIndex { - key_dt: key.get_type(schema)?, - }, - GetFieldAccess::ListRange { - start, - stop, - stride, - } => GetFieldAccessSchema::ListRange { - start_dt: start.get_type(schema)?, - stop_dt: stop.get_type(schema)?, - stride_dt: stride.get_type(schema)?, - }, - } - .get_accessed_field(&expr_dt) -} - /// cast subquery in InSubquery/ScalarSubquery to a given type. pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result { if subquery.subquery.schema().field(0).data_type() == cast_to_type { @@ -536,7 +494,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result vec![expr.as_ref()], - Expr::GetIndexedField(GetIndexedField { expr, field }) => { - let expr = expr.as_ref(); - match field { - GetFieldAccess::ListIndex {key} => vec![key.as_ref(), expr], - GetFieldAccess::ListRange {start, stop, stride} => { - vec![start.as_ref(), stop.as_ref(),stride.as_ref(), expr] - } - GetFieldAccess::NamedStructField { .. } => vec![expr], - } - } Expr::GroupingSet(GroupingSet::Rollup(exprs)) | Expr::GroupingSet(GroupingSet::Cube(exprs)) => exprs.iter().collect(), Expr::ScalarFunction (ScalarFunction{ args, .. } ) => { @@ -374,11 +364,6 @@ impl TreeNode for Expr { .update_data(|(new_expr, new_list)| { Expr::InList(InList::new(new_expr, new_list, negated)) }), - Expr::GetIndexedField(GetIndexedField { expr, field }) => { - transform_box(expr, &mut f)?.update_data(|be| { - Expr::GetIndexedField(GetIndexedField::new(be, field)) - }) - } }) } } diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 581e299cf993..0d25a3443f47 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -303,7 +303,6 @@ pub fn expr_to_columns(expr: &Expr, accum: &mut HashSet) -> Result<()> { | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::Wildcard { .. } - | Expr::GetIndexedField { .. } | Expr::Placeholder(_) | Expr::OuterReferenceColumn { .. } => {} } diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 0f1f3ba7e729..3d08bd6c7e42 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -450,7 +450,6 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> { | Expr::IsNotNull(_) | Expr::IsNull(_) | Expr::Negative(_) - | Expr::GetIndexedField(_) | Expr::Cast(_) | Expr::TryCast(_) | Expr::Sort(_) diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 5a9705381d7f..a13ae4bd4192 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -1037,29 +1037,6 @@ mod tests { assert_optimized_plan_equal(plan, expected) } - #[test] - fn test_struct_field_push_down() -> Result<()> { - let schema = Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int64, false), - Field::new_struct( - "s", - vec![ - Field::new("x", DataType::Int64, false), - Field::new("y", DataType::Int64, false), - ], - false, - ), - ])); - - let table_scan = table_scan(TableReference::none(), &schema, None)?.build()?; - let plan = LogicalPlanBuilder::from(table_scan) - .project(vec![col("s").field("x")])? - .build()?; - let expected = "Projection: (?table?.s)[x]\ - \n TableScan: ?table? projection=[s]"; - assert_optimized_plan_equal(plan, expected) - } - #[test] fn test_neg_push_down() -> Result<()> { let table_scan = test_table_scan()?; diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index b684b5490342..c88913c02b98 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -237,7 +237,6 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result { | Expr::IsNotFalse(_) | Expr::IsNotUnknown(_) | Expr::Negative(_) - | Expr::GetIndexedField(_) | Expr::Between(_) | Expr::Case(_) | Expr::Cast(_) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 455d659fb25e..25504e5c78e7 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -616,8 +616,7 @@ impl<'a> ConstEvaluator<'a> { | Expr::Case(_) | Expr::Cast { .. } | Expr::TryCast { .. } - | Expr::InList { .. } - | Expr::GetIndexedField { .. } => true, + | Expr::InList { .. } => true, } } diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs index 9d8e3fecebc1..aa1861e60a63 100644 --- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs +++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs @@ -271,9 +271,9 @@ mod tests { values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(), }, ), - // s.y ∈ [1, 3] (not null) + // s ∈ [1, 3] (not null) ( - col("s").field("y"), + col("s"), NullableInterval::NotNull { values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(), }, @@ -285,7 +285,6 @@ mod tests { // (original_expr, expected_simplification) let simplified_cases = &[ (col("x").lt(lit(0)), false), - (col("s").field("y").lt(lit(0)), false), (col("x").lt_eq(lit(3)), true), (col("x").gt(lit(3)), false), (col("x").gt(lit(0)), true), diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index ab57a8e80056..9e8561eb68c5 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -20,16 +20,13 @@ use std::sync::Arc; use arrow::datatypes::Schema; use datafusion_common::{ - exec_err, internal_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, + exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, }; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction}; use datafusion_expr::var_provider::is_system_variables; use datafusion_expr::var_provider::VarType; -use datafusion_expr::{ - binary_expr, Between, BinaryExpr, Expr, GetFieldAccess, GetIndexedField, Like, - Operator, TryCast, -}; +use datafusion_expr::{binary_expr, Between, BinaryExpr, Expr, Like, Operator, TryCast}; use crate::scalar_function; use crate::{ @@ -287,24 +284,6 @@ pub fn create_physical_expr( input_dfschema, execution_props, )?), - Expr::GetIndexedField(GetIndexedField { expr: _, field }) => match field { - GetFieldAccess::NamedStructField { name: _ } => { - internal_err!( - "NamedStructField should be rewritten in OperatorToFunction" - ) - } - GetFieldAccess::ListIndex { key: _ } => { - internal_err!("ListIndex should be rewritten in OperatorToFunction") - } - GetFieldAccess::ListRange { - start: _, - stop: _, - stride: _, - } => { - internal_err!("ListRange should be rewritten in OperatorToFunction") - } - }, - Expr::ScalarFunction(ScalarFunction { func, args }) => { let physical_args = create_physical_exprs(args, input_dfschema, execution_props)?; diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 8d69b0bad5ed..1c11e8e50df7 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -397,7 +397,7 @@ message LogicalExprNode { // Scalar UDF expressions ScalarUDFExprNode scalar_udf_expr = 20; - GetIndexedField get_indexed_field = 21; + // GetIndexedField get_indexed_field = 21; GroupingSetNode grouping_set = 22; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 8df0aeb851df..77ba0808fb77 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -13763,9 +13763,6 @@ impl serde::Serialize for LogicalExprNode { logical_expr_node::ExprType::ScalarUdfExpr(v) => { struct_ser.serialize_field("scalarUdfExpr", v)?; } - logical_expr_node::ExprType::GetIndexedField(v) => { - struct_ser.serialize_field("getIndexedField", v)?; - } logical_expr_node::ExprType::GroupingSet(v) => { struct_ser.serialize_field("groupingSet", v)?; } @@ -13850,8 +13847,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { "aggregateUdfExpr", "scalar_udf_expr", "scalarUdfExpr", - "get_indexed_field", - "getIndexedField", "grouping_set", "groupingSet", "cube", @@ -13897,7 +13892,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { WindowExpr, AggregateUdfExpr, ScalarUdfExpr, - GetIndexedField, GroupingSet, Cube, Rollup, @@ -13952,7 +13946,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { "windowExpr" | "window_expr" => Ok(GeneratedField::WindowExpr), "aggregateUdfExpr" | "aggregate_udf_expr" => Ok(GeneratedField::AggregateUdfExpr), "scalarUdfExpr" | "scalar_udf_expr" => Ok(GeneratedField::ScalarUdfExpr), - "getIndexedField" | "get_indexed_field" => Ok(GeneratedField::GetIndexedField), "groupingSet" | "grouping_set" => Ok(GeneratedField::GroupingSet), "cube" => Ok(GeneratedField::Cube), "rollup" => Ok(GeneratedField::Rollup), @@ -14120,13 +14113,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { return Err(serde::de::Error::duplicate_field("scalarUdfExpr")); } expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::ScalarUdfExpr) -; - } - GeneratedField::GetIndexedField => { - if expr_type__.is_some() { - return Err(serde::de::Error::duplicate_field("getIndexedField")); - } - expr_type__ = map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::GetIndexedField) ; } GeneratedField::GroupingSet => { diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index b6b7687e6c00..a175987f1994 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -566,7 +566,7 @@ pub struct SubqueryAliasNode { pub struct LogicalExprNode { #[prost( oneof = "logical_expr_node::ExprType", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35" )] pub expr_type: ::core::option::Option, } @@ -622,8 +622,6 @@ pub mod logical_expr_node { /// Scalar UDF expressions #[prost(message, tag = "20")] ScalarUdfExpr(super::ScalarUdfExprNode), - #[prost(message, tag = "21")] - GetIndexedField(::prost::alloc::boxed::Box), #[prost(message, tag = "22")] GroupingSet(super::GroupingSetNode), #[prost(message, tag = "23")] @@ -701,24 +699,24 @@ pub struct NamedStructField { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListIndex { - #[prost(message, optional, boxed, tag = "1")] - pub key: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "1")] + pub key: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListRange { - #[prost(message, optional, boxed, tag = "1")] - pub start: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag = "2")] - pub stop: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag = "3")] - pub stride: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "1")] + pub start: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub stop: ::core::option::Option, + #[prost(message, optional, tag = "3")] + pub stride: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct GetIndexedField { - #[prost(message, optional, boxed, tag = "1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "1")] + pub expr: ::core::option::Option, #[prost(oneof = "get_indexed_field::Field", tags = "2, 3, 4")] pub field: ::core::option::Option, } @@ -730,9 +728,9 @@ pub mod get_indexed_field { #[prost(message, tag = "2")] NamedStructField(super::NamedStructField), #[prost(message, tag = "3")] - ListIndex(::prost::alloc::boxed::Box), + ListIndex(super::ListIndex), #[prost(message, tag = "4")] - ListRange(::prost::alloc::boxed::Box), + ListRange(super::ListRange), } } #[allow(clippy::derive_partial_eq_without_eq)] diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 5df8eb59e173..b6f72f6773a2 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -42,7 +42,7 @@ use datafusion_expr::{ expr::{self, InList, Sort, WindowFunction}, logical_plan::{PlanType, StringifiedPlan}, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, Case, Cast, Expr, - GetFieldAccess, GetIndexedField, GroupingSet, + GroupingSet, GroupingSet::GroupingSets, JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -924,63 +924,6 @@ pub fn parse_expr( }) .expect("Binary expression could not be reduced to a single expression.")) } - ExprType::GetIndexedField(get_indexed_field) => { - let expr = parse_required_expr( - get_indexed_field.expr.as_deref(), - registry, - "expr", - codec, - )?; - let field = match &get_indexed_field.field { - Some(protobuf::get_indexed_field::Field::NamedStructField( - named_struct_field, - )) => GetFieldAccess::NamedStructField { - name: named_struct_field - .name - .as_ref() - .ok_or_else(|| Error::required("value"))? - .try_into()?, - }, - Some(protobuf::get_indexed_field::Field::ListIndex(list_index)) => { - GetFieldAccess::ListIndex { - key: Box::new(parse_required_expr( - list_index.key.as_deref(), - registry, - "key", - codec, - )?), - } - } - Some(protobuf::get_indexed_field::Field::ListRange(list_range)) => { - GetFieldAccess::ListRange { - start: Box::new(parse_required_expr( - list_range.start.as_deref(), - registry, - "start", - codec, - )?), - stop: Box::new(parse_required_expr( - list_range.stop.as_deref(), - registry, - "stop", - codec, - )?), - stride: Box::new(parse_required_expr( - list_range.stride.as_deref(), - registry, - "stride", - codec, - )?), - } - } - None => return Err(proto_error("Field must not be None")), - }; - - Ok(Expr::GetIndexedField(GetIndexedField::new( - Box::new(expr), - field, - ))) - } ExprType::Column(column) => Ok(Expr::Column(column.into())), ExprType::Literal(literal) => { let scalar_value: ScalarValue = literal.try_into()?; diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 52482c890ac9..91f7411e911a 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -35,9 +35,8 @@ use datafusion_common::{ Column, Constraint, Constraints, DFSchema, DFSchemaRef, ScalarValue, TableReference, }; use datafusion_expr::expr::{ - self, AggregateFunctionDefinition, Alias, Between, BinaryExpr, Cast, GetFieldAccess, - GetIndexedField, GroupingSet, InList, Like, Placeholder, ScalarFunction, Sort, - Unnest, + self, AggregateFunctionDefinition, Alias, Between, BinaryExpr, Cast, GroupingSet, + InList, Like, Placeholder, ScalarFunction, Sort, Unnest, }; use datafusion_expr::{ logical_plan::PlanType, logical_plan::StringifiedPlan, AggregateFunction, @@ -957,45 +956,6 @@ pub fn serialize_expr( // see discussion in https://github.com/apache/datafusion/issues/2565 return Err(Error::General("Proto serialization error: Expr::ScalarSubquery(_) | Expr::InSubquery(_) | Expr::Exists { .. } | Exp:OuterReferenceColumn not supported".to_string())); } - Expr::GetIndexedField(GetIndexedField { expr, field }) => { - let field = match field { - GetFieldAccess::NamedStructField { name } => { - protobuf::get_indexed_field::Field::NamedStructField( - protobuf::NamedStructField { - name: Some(name.try_into()?), - }, - ) - } - GetFieldAccess::ListIndex { key } => { - protobuf::get_indexed_field::Field::ListIndex(Box::new( - protobuf::ListIndex { - key: Some(Box::new(serialize_expr(key.as_ref(), codec)?)), - }, - )) - } - GetFieldAccess::ListRange { - start, - stop, - stride, - } => protobuf::get_indexed_field::Field::ListRange(Box::new( - protobuf::ListRange { - start: Some(Box::new(serialize_expr(start.as_ref(), codec)?)), - stop: Some(Box::new(serialize_expr(stop.as_ref(), codec)?)), - stride: Some(Box::new(serialize_expr(stride.as_ref(), codec)?)), - }, - )), - }; - - protobuf::LogicalExprNode { - expr_type: Some(ExprType::GetIndexedField(Box::new( - protobuf::GetIndexedField { - expr: Some(Box::new(serialize_expr(expr.as_ref(), codec)?)), - field: Some(field), - }, - ))), - } - } - Expr::GroupingSet(GroupingSet::Cube(exprs)) => protobuf::LogicalExprNode { expr_type: Some(ExprType::Cube(CubeNode { expr: serialize_exprs(exprs, codec)?, diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index a50bb7a69823..a818fefa364a 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -411,9 +411,6 @@ impl Unparser<'_> { ) }) } - Expr::GetIndexedField(_) => { - not_impl_err!("Unsupported Expr conversion: {expr:?}") - } Expr::TryCast(TryCast { expr, data_type }) => { let inner_expr = self.expr_to_sql(expr)?; Ok(ast::Expr::TryCast { diff --git a/datafusion/sqllogictest/test_files/projection.slt b/datafusion/sqllogictest/test_files/projection.slt index 843ab71091f5..3c8855e34712 100644 --- a/datafusion/sqllogictest/test_files/projection.slt +++ b/datafusion/sqllogictest/test_files/projection.slt @@ -233,3 +233,20 @@ DROP TABLE test; statement ok DROP TABLE test_simple; + +## projection push down with Struct +statement ok +create table t as values (struct(1)); + +query TT +explain select column1.c0 from t; +---- +logical_plan +01)Projection: get_field(t.column1, Utf8("c0")) +02)--TableScan: t projection=[column1] +physical_plan +01)ProjectionExec: expr=[get_field(column1@0, c0) as t.column1[c0]] +02)--MemoryExec: partitions=1, partition_sizes=[1] + +statement ok +drop table t; From 3aff84bbe3aef6773a6622faf285e641362e1668 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 18 May 2024 13:51:42 +0800 Subject: [PATCH 2/9] add expr extension Signed-off-by: jayzhan211 --- datafusion/core/tests/expr_api/mod.rs | 10 ++-- datafusion/functions-array/src/expr_ext.rs | 61 ++++++++++++++++++++++ datafusion/functions-array/src/lib.rs | 1 + datafusion/functions/src/core/expr_ext.rs | 46 ++++++++++++++++ datafusion/functions/src/core/mod.rs | 1 + 5 files changed, 115 insertions(+), 4 deletions(-) create mode 100644 datafusion/functions-array/src/expr_ext.rs create mode 100644 datafusion/functions/src/core/expr_ext.rs diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index d7e839824b3b..00c589d15340 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -21,6 +21,8 @@ use arrow_array::{ArrayRef, RecordBatch, StringArray, StructArray}; use arrow_schema::{DataType, Field}; use datafusion::prelude::*; use datafusion_common::DFSchema; +use datafusion_functions::core::expr_ext::FieldAccessor; +use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor}; /// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan use std::sync::{Arc, OnceLock}; @@ -61,7 +63,7 @@ fn test_eq_with_coercion() { #[test] fn test_get_field() { evaluate_expr_test( - get_field(col("props"), lit("a")), + col("props").field(lit("a")), vec![ "+------------+", "| expr |", @@ -77,7 +79,7 @@ fn test_get_field() { #[test] fn test_nested_get_field() { evaluate_expr_test( - get_field(col("props"), lit("a")) + col("props").field(lit("a")) .eq(lit("2021-02-02")) .or(col("id").eq(lit(1))), vec![ @@ -95,7 +97,7 @@ fn test_nested_get_field() { #[test] fn test_list() { evaluate_expr_test( - array_element(col("list"), lit(1i64)), + col("list").index(lit(1i64)), vec![ "+------+", "| expr |", "+------+", "| one |", "| two |", "| five |", "+------+", @@ -106,7 +108,7 @@ fn test_list() { #[test] fn test_list_range() { evaluate_expr_test( - array_slice(col("list"), lit(1i64), lit(2i64), None), + col("list").range(lit(1i64), lit(2i64)), vec![ "+--------------+", "| expr |", diff --git a/datafusion/functions-array/src/expr_ext.rs b/datafusion/functions-array/src/expr_ext.rs new file mode 100644 index 000000000000..a4432a939c7d --- /dev/null +++ b/datafusion/functions-array/src/expr_ext.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::Expr; + +use crate::extract::{array_element, array_slice}; + +pub trait IndexAccessor { + fn index(self, key: Expr) -> Expr; +} + +impl IndexAccessor for Expr { + fn index(self, key: Expr) -> Expr { + array_element(self, key) + } +} + +pub trait SliceAccessor { + fn range(self, start: Expr, stop: Expr) -> Expr; +} + +impl SliceAccessor for Expr { + fn range(self, start: Expr, stop: Expr) -> Expr { + array_slice(self, start, stop, None) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use datafusion_expr::{col, lit}; + + #[test] + fn test_index() { + let expr1 = col("a").index(lit(1)); + let expr2 = array_element(col("a"), lit(1)); + assert_eq!(expr1, expr2); + } + + #[test] + fn test_range() { + let expr1 = col("a").range(lit(1), lit(2)); + let expr2 = array_slice(col("a"), lit(1), lit(2), None); + assert_eq!(expr1, expr2); + } +} diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index 5914736773b7..93be8bd79063 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -34,6 +34,7 @@ pub mod concat; pub mod dimension; pub mod empty; pub mod except; +pub mod expr_ext; pub mod extract; pub mod flatten; pub mod length; diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs new file mode 100644 index 000000000000..c45d21d34a9f --- /dev/null +++ b/datafusion/functions/src/core/expr_ext.rs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Extension methods for Expr. + +use datafusion_expr::Expr; + +use super::expr_fn::get_field; + +pub trait FieldAccessor { + fn field(self, field: Expr) -> Expr; +} + +impl FieldAccessor for Expr { + fn field(self, field: Expr) -> Expr { + get_field(self, field) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use datafusion_expr::col; + + #[test] + fn test_field() { + let expr1 = col("a").field(col("b")); + let expr2 = get_field(col("a"), col("b")); + assert_eq!(expr1, expr2); + } +} diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index d60e6017ddcb..6859379fbbf9 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -23,6 +23,7 @@ use std::sync::Arc; pub mod arrow_cast; pub mod arrowtypeof; pub mod coalesce; +pub mod expr_ext; pub mod getfield; pub mod named_struct; pub mod nullif; From 5bf1d828ac5498c99995089e34e3ba53be684347 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 18 May 2024 13:54:43 +0800 Subject: [PATCH 3/9] doc Signed-off-by: jayzhan211 --- datafusion/functions-array/src/expr_ext.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/functions-array/src/expr_ext.rs b/datafusion/functions-array/src/expr_ext.rs index a4432a939c7d..2ed66fbd5802 100644 --- a/datafusion/functions-array/src/expr_ext.rs +++ b/datafusion/functions-array/src/expr_ext.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Extension methods for Expr. + use datafusion_expr::Expr; use crate::extract::{array_element, array_slice}; From db6b48a56d3154688690015aa4ae4ecba65e804b Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 18 May 2024 14:10:39 +0800 Subject: [PATCH 4/9] move test that has struct Signed-off-by: jayzhan211 --- datafusion/core/tests/expr_api/mod.rs | 5 +- .../core/tests/optimizer_integration.rs | 128 +++++++++++++++++- datafusion/expr/src/expr_schema.rs | 27 +--- datafusion/functions/src/core/expr_ext.rs | 12 +- .../src/simplify_expressions/guarantees.rs | 2 +- .../optimizer/src/simplify_expressions/mod.rs | 3 + datafusion/sqllogictest/test_files/test1.slt | 15 ++ 7 files changed, 155 insertions(+), 37 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/test1.slt diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index 00c589d15340..b74e572e9622 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -63,7 +63,7 @@ fn test_eq_with_coercion() { #[test] fn test_get_field() { evaluate_expr_test( - col("props").field(lit("a")), + col("props").field("a"), vec![ "+------------+", "| expr |", @@ -79,7 +79,8 @@ fn test_get_field() { #[test] fn test_nested_get_field() { evaluate_expr_test( - col("props").field(lit("a")) + col("props") + .field("a") .eq(lit("2021-02-02")) .or(col("id").eq(lit(1))), vec![ diff --git a/datafusion/core/tests/optimizer_integration.rs b/datafusion/core/tests/optimizer_integration.rs index 8acf8618c738..e75ed3a8d6d7 100644 --- a/datafusion/core/tests/optimizer_integration.rs +++ b/datafusion/core/tests/optimizer_integration.rs @@ -23,11 +23,18 @@ use std::collections::HashMap; use std::sync::Arc; use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use arrow_schema::{Fields, SchemaBuilder}; use datafusion_common::config::ConfigOptions; -use datafusion_common::{plan_err, Result}; -use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF}; +use datafusion_common::tree_node::{TransformedResult, TreeNode}; +use datafusion_common::{plan_err, DFSchema, Result, ScalarValue}; +use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; +use datafusion_expr::{ + col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator, ScalarUDF, TableSource, WindowUDF +}; +use datafusion_functions::core::expr_ext::FieldAccessor; use datafusion_optimizer::analyzer::Analyzer; use datafusion_optimizer::optimizer::Optimizer; +use datafusion_optimizer::simplify_expressions::GuaranteeRewriter; use datafusion_optimizer::{OptimizerConfig, OptimizerContext}; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::sqlparser::ast::Statement; @@ -233,3 +240,120 @@ impl TableSource for MyTableSource { self.schema.clone() } } + +#[test] +fn test_nested_schema_nullability() { + let mut builder = SchemaBuilder::new(); + builder.push(Field::new("foo", DataType::Int32, true)); + builder.push(Field::new( + "parent", + DataType::Struct(Fields::from(vec![Field::new( + "child", + DataType::Int64, + false, + )])), + true, + )); + let schema = builder.finish(); + + let dfschema = DFSchema::from_field_specific_qualified_schema( + vec![Some("table_name".into()), None], + &Arc::new(schema), + ) + .unwrap(); + + let expr = col("parent").field("child"); + assert!(expr.nullable(&dfschema).unwrap()); +} + +#[test] +fn test_inequalities_non_null_bounded() { + let guarantees = vec![ + // x ∈ [1, 3] (not null) + ( + col("x"), + NullableInterval::NotNull { + values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(), + }, + ), + // s.y ∈ [1, 3] (not null) + ( + col("s").field("y"), + NullableInterval::NotNull { + values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(), + }, + ), + ]; + + let mut rewriter = GuaranteeRewriter::new(guarantees.iter()); + + // (original_expr, expected_simplification) + let simplified_cases = &[ + (col("x").lt(lit(0)), false), + (col("s").field("y").lt(lit(0)), false), + (col("x").lt_eq(lit(3)), true), + (col("x").gt(lit(3)), false), + (col("x").gt(lit(0)), true), + (col("x").eq(lit(0)), false), + (col("x").not_eq(lit(0)), true), + (col("x").between(lit(0), lit(5)), true), + (col("x").between(lit(5), lit(10)), false), + (col("x").not_between(lit(0), lit(5)), false), + (col("x").not_between(lit(5), lit(10)), true), + ( + Expr::BinaryExpr(BinaryExpr { + left: Box::new(col("x")), + op: Operator::IsDistinctFrom, + right: Box::new(lit(ScalarValue::Null)), + }), + true, + ), + ( + Expr::BinaryExpr(BinaryExpr { + left: Box::new(col("x")), + op: Operator::IsDistinctFrom, + right: Box::new(lit(5)), + }), + true, + ), + ]; + + validate_simplified_cases(&mut rewriter, simplified_cases); + + let unchanged_cases = &[ + col("x").gt(lit(2)), + col("x").lt_eq(lit(2)), + col("x").eq(lit(2)), + col("x").not_eq(lit(2)), + col("x").between(lit(3), lit(5)), + col("x").not_between(lit(3), lit(10)), + ]; + + validate_unchanged_cases(&mut rewriter, unchanged_cases); +} + +fn validate_simplified_cases(rewriter: &mut GuaranteeRewriter, cases: &[(Expr, T)]) +where + ScalarValue: From, + T: Clone, +{ + for (expr, expected_value) in cases { + let output = expr.clone().rewrite(rewriter).data().unwrap(); + let expected = lit(ScalarValue::from(expected_value.clone())); + assert_eq!( + output, expected, + "{} simplified to {}, but expected {}", + expr, output, expected + ); + } +} +fn validate_unchanged_cases(rewriter: &mut GuaranteeRewriter, cases: &[Expr]) { + for expr in cases { + let output = expr.clone().rewrite(rewriter).data().unwrap(); + assert_eq!( + &output, expr, + "{} was simplified to {}, but expected it to be unchanged", + expr, output + ); + } +} \ No newline at end of file diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 270b94417231..8b7f30d245b4 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -494,7 +494,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result Expr; + fn field(self, name: impl Into) -> Expr; } impl FieldAccessor for Expr { - fn field(self, field: Expr) -> Expr { - get_field(self, field) + fn field(self, name: impl Into) -> Expr { + get_field(self, lit(name.into())) } } @@ -39,8 +39,8 @@ mod tests { #[test] fn test_field() { - let expr1 = col("a").field(col("b")); - let expr2 = get_field(col("a"), col("b")); + let expr1 = col("a").field("b"); + let expr2 = get_field(col("a"), lit("b")); assert_eq!(expr1, expr2); } } diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs index aa1861e60a63..8f7f8004d182 100644 --- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs +++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs @@ -39,7 +39,7 @@ use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr}; /// See a full example in [`ExprSimplifier::with_guarantees()`]. /// /// [`ExprSimplifier::with_guarantees()`]: crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees -pub(crate) struct GuaranteeRewriter<'a> { +pub struct GuaranteeRewriter<'a> { guarantees: HashMap<&'a Expr, &'a NullableInterval>, } diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs b/datafusion/optimizer/src/simplify_expressions/mod.rs index d0399fef07e6..46c066c11c0f 100644 --- a/datafusion/optimizer/src/simplify_expressions/mod.rs +++ b/datafusion/optimizer/src/simplify_expressions/mod.rs @@ -30,3 +30,6 @@ pub use datafusion_expr::simplify::{SimplifyContext, SimplifyInfo}; pub use expr_simplifier::*; pub use simplify_exprs::*; + +// Export for test in datafusion/core/tests/optimizer_integration.rs +pub use guarantees::GuaranteeRewriter; diff --git a/datafusion/sqllogictest/test_files/test1.slt b/datafusion/sqllogictest/test_files/test1.slt new file mode 100644 index 000000000000..5d163107955b --- /dev/null +++ b/datafusion/sqllogictest/test_files/test1.slt @@ -0,0 +1,15 @@ +statement ok +create table t as values (struct(1)); + +query TT +explain select column1.c0 from t; +---- +logical_plan +01)Projection: get_field(t.column1, Utf8("c0")) +02)--TableScan: t projection=[column1] +physical_plan +01)ProjectionExec: expr=[get_field(column1@0, c0) as t.column1[c0]] +02)--MemoryExec: partitions=1, partition_sizes=[1] + +statement ok +drop table t; From 1a754b4b4e93ce89f514f3b78697fe7a41a5f890 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 18 May 2024 14:17:48 +0800 Subject: [PATCH 5/9] fmt Signed-off-by: jayzhan211 --- datafusion/core/tests/optimizer_integration.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/optimizer_integration.rs b/datafusion/core/tests/optimizer_integration.rs index e75ed3a8d6d7..f84e8e24af78 100644 --- a/datafusion/core/tests/optimizer_integration.rs +++ b/datafusion/core/tests/optimizer_integration.rs @@ -29,7 +29,8 @@ use datafusion_common::tree_node::{TransformedResult, TreeNode}; use datafusion_common::{plan_err, DFSchema, Result, ScalarValue}; use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; use datafusion_expr::{ - col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator, ScalarUDF, TableSource, WindowUDF + col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator, + ScalarUDF, TableSource, WindowUDF, }; use datafusion_functions::core::expr_ext::FieldAccessor; use datafusion_optimizer::analyzer::Analyzer; @@ -356,4 +357,4 @@ fn validate_unchanged_cases(rewriter: &mut GuaranteeRewriter, cases: &[Expr]) { expr, output ); } -} \ No newline at end of file +} From dc3bfd74c6126eede7f324a234e9775c619b8e12 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 18 May 2024 14:37:13 +0800 Subject: [PATCH 6/9] add foc and fix displayed name Signed-off-by: jayzhan211 --- datafusion/functions-array/src/expr_ext.rs | 39 ++++++++++++++++++++ datafusion/functions-array/src/extract.rs | 11 ++++++ datafusion/functions/src/core/expr_ext.rs | 22 +++++++++++ datafusion/sqllogictest/test_files/test1.slt | 15 -------- 4 files changed, 72 insertions(+), 15 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/test1.slt diff --git a/datafusion/functions-array/src/expr_ext.rs b/datafusion/functions-array/src/expr_ext.rs index 2ed66fbd5802..0d27dfcb5fb1 100644 --- a/datafusion/functions-array/src/expr_ext.rs +++ b/datafusion/functions-array/src/expr_ext.rs @@ -26,6 +26,25 @@ pub trait IndexAccessor { } impl IndexAccessor for Expr { + /// Return access to the element field. Example `expr["name"]` + /// + /// ## Example Access element 2 from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// [10, 20, 30, 40] + /// ``` + /// + /// You can access the value "30" with + /// + /// ``` + /// # use datafusion_expr::{lit, col, Expr}; + /// # use datafusion_functions_array::expr_ext::IndexAccessor; + /// let expr = col("c1") + /// .index(lit(3)); + /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]"); + /// ``` fn index(self, key: Expr) -> Expr { array_element(self, key) } @@ -36,6 +55,26 @@ pub trait SliceAccessor { } impl SliceAccessor for Expr { + /// Return elements between `1` based `start` and `stop`, for + /// example `expr[1:3]` + /// + /// ## Example: Access element 2, 3, 4 from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// [10, 20, 30, 40] + /// ``` + /// + /// You can access the value `[20, 30, 40]` with + /// + /// ``` + /// # use datafusion_expr::{lit, col}; + /// # use datafusion_functions_array::expr_ext::SliceAccessor; + /// let expr = col("c1") + /// .range(lit(2), lit(4)); + /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]"); + /// ``` fn range(self, start: Expr, stop: Expr) -> Expr { array_slice(self, start, stop, None) } diff --git a/datafusion/functions-array/src/extract.rs b/datafusion/functions-array/src/extract.rs index 152e5f3c4b13..a12cdc20dfa3 100644 --- a/datafusion/functions-array/src/extract.rs +++ b/datafusion/functions-array/src/extract.rs @@ -97,6 +97,11 @@ impl ScalarUDFImpl for ArrayElement { "array_element" } + fn display_name(&self, args: &[Expr]) -> Result { + let args_name: Vec = args.iter().map(|e| e.to_string()).collect(); + Ok(format!("{}[{}]", args_name[0], args_name[1])) + } + fn signature(&self) -> &Signature { &self.signature } @@ -245,6 +250,12 @@ impl ScalarUDFImpl for ArraySlice { fn as_any(&self) -> &dyn Any { self } + + fn display_name(&self, args: &[Expr]) -> Result { + let args_name: Vec = args.iter().map(|e| e.to_string()).collect(); + Ok(format!("{}[{}]", args_name[0], args_name[1..].join(":"))) + } + fn name(&self) -> &str { "array_slice" } diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs index f754e731020b..d6fb7426fa32 100644 --- a/datafusion/functions/src/core/expr_ext.rs +++ b/datafusion/functions/src/core/expr_ext.rs @@ -26,6 +26,28 @@ pub trait FieldAccessor { } impl FieldAccessor for Expr { + /// Return access to the named field. Example `expr["name"]` + /// + /// ## Access field "my_field" from column "c1" + /// + /// For example if column "c1" holds documents like this + /// + /// ```json + /// { + /// "my_field": 123.34, + /// "other_field": "Boston", + /// } + /// ``` + /// + /// You can access column "my_field" with + /// + /// ``` + /// # use datafusion_expr::{col}; + /// # use datafusion_functions::core::expr_ext::FieldAccessor; + /// let expr = col("c1") + /// .field("my_field"); + /// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); + /// ``` fn field(self, name: impl Into) -> Expr { get_field(self, lit(name.into())) } diff --git a/datafusion/sqllogictest/test_files/test1.slt b/datafusion/sqllogictest/test_files/test1.slt deleted file mode 100644 index 5d163107955b..000000000000 --- a/datafusion/sqllogictest/test_files/test1.slt +++ /dev/null @@ -1,15 +0,0 @@ -statement ok -create table t as values (struct(1)); - -query TT -explain select column1.c0 from t; ----- -logical_plan -01)Projection: get_field(t.column1, Utf8("c0")) -02)--TableScan: t projection=[column1] -physical_plan -01)ProjectionExec: expr=[get_field(column1@0, c0) as t.column1[c0]] -02)--MemoryExec: partitions=1, partition_sizes=[1] - -statement ok -drop table t; From 136f3dff16d46fd9469934f1cd0b03a7ed00e9d2 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 18 May 2024 14:41:14 +0800 Subject: [PATCH 7/9] rm test Signed-off-by: jayzhan211 --- .../src/simplify_expressions/guarantees.rs | 65 ------------------- 1 file changed, 65 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs index 8f7f8004d182..2ccf93e2d5b3 100644 --- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs +++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs @@ -261,71 +261,6 @@ mod tests { } } - #[test] - fn test_inequalities_non_null_bounded() { - let guarantees = vec![ - // x ∈ [1, 3] (not null) - ( - col("x"), - NullableInterval::NotNull { - values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(), - }, - ), - // s ∈ [1, 3] (not null) - ( - col("s"), - NullableInterval::NotNull { - values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(), - }, - ), - ]; - - let mut rewriter = GuaranteeRewriter::new(guarantees.iter()); - - // (original_expr, expected_simplification) - let simplified_cases = &[ - (col("x").lt(lit(0)), false), - (col("x").lt_eq(lit(3)), true), - (col("x").gt(lit(3)), false), - (col("x").gt(lit(0)), true), - (col("x").eq(lit(0)), false), - (col("x").not_eq(lit(0)), true), - (col("x").between(lit(0), lit(5)), true), - (col("x").between(lit(5), lit(10)), false), - (col("x").not_between(lit(0), lit(5)), false), - (col("x").not_between(lit(5), lit(10)), true), - ( - Expr::BinaryExpr(BinaryExpr { - left: Box::new(col("x")), - op: Operator::IsDistinctFrom, - right: Box::new(lit(ScalarValue::Null)), - }), - true, - ), - ( - Expr::BinaryExpr(BinaryExpr { - left: Box::new(col("x")), - op: Operator::IsDistinctFrom, - right: Box::new(lit(5)), - }), - true, - ), - ]; - - validate_simplified_cases(&mut rewriter, simplified_cases); - - let unchanged_cases = &[ - col("x").gt(lit(2)), - col("x").lt_eq(lit(2)), - col("x").eq(lit(2)), - col("x").not_eq(lit(2)), - col("x").between(lit(3), lit(5)), - col("x").not_between(lit(3), lit(10)), - ]; - - validate_unchanged_cases(&mut rewriter, unchanged_cases); - } - #[test] fn test_inequalities_non_null_unbounded() { let guarantees = vec![ From 26cb3d087d5009d0da27274b5ee756ebbe478a1e Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sun, 19 May 2024 14:10:16 +0800 Subject: [PATCH 8/9] rebase Signed-off-by: jayzhan211 --- datafusion/functions/src/core/expr_ext.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs index d6fb7426fa32..65547b381fc6 100644 --- a/datafusion/functions/src/core/expr_ext.rs +++ b/datafusion/functions/src/core/expr_ext.rs @@ -17,12 +17,12 @@ //! Extension methods for Expr. -use datafusion_expr::{lit, Expr}; +use datafusion_expr::{Expr, Literal}; use super::expr_fn::get_field; pub trait FieldAccessor { - fn field(self, name: impl Into) -> Expr; + fn field(self, name: impl Literal) -> Expr; } impl FieldAccessor for Expr { @@ -48,8 +48,8 @@ impl FieldAccessor for Expr { /// .field("my_field"); /// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); /// ``` - fn field(self, name: impl Into) -> Expr { - get_field(self, lit(name.into())) + fn field(self, name: impl Literal) -> Expr { + get_field(self, name) } } @@ -62,7 +62,7 @@ mod tests { #[test] fn test_field() { let expr1 = col("a").field("b"); - let expr2 = get_field(col("a"), lit("b")); + let expr2 = get_field(col("a"), "b"); assert_eq!(expr1, expr2); } } From f70e1e6bb8a4a6890ecabedd01de2abc33ab9a4d Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Tue, 21 May 2024 07:21:06 +0800 Subject: [PATCH 9/9] move doc Signed-off-by: jayzhan211 --- datafusion/functions-array/src/expr_ext.rs | 78 +++++++++++----------- datafusion/functions/src/core/expr_ext.rs | 44 ++++++------ 2 files changed, 61 insertions(+), 61 deletions(-) diff --git a/datafusion/functions-array/src/expr_ext.rs b/datafusion/functions-array/src/expr_ext.rs index 0d27dfcb5fb1..5505ef746881 100644 --- a/datafusion/functions-array/src/expr_ext.rs +++ b/datafusion/functions-array/src/expr_ext.rs @@ -21,60 +21,60 @@ use datafusion_expr::Expr; use crate::extract::{array_element, array_slice}; +/// Return access to the element field. Example `expr["name"]` +/// +/// ## Example Access element 2 from column "c1" +/// +/// For example if column "c1" holds documents like this +/// +/// ```json +/// [10, 20, 30, 40] +/// ``` +/// +/// You can access the value "30" with +/// +/// ``` +/// # use datafusion_expr::{lit, col, Expr}; +/// # use datafusion_functions_array::expr_ext::IndexAccessor; +/// let expr = col("c1") +/// .index(lit(3)); +/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]"); +/// ``` pub trait IndexAccessor { fn index(self, key: Expr) -> Expr; } impl IndexAccessor for Expr { - /// Return access to the element field. Example `expr["name"]` - /// - /// ## Example Access element 2 from column "c1" - /// - /// For example if column "c1" holds documents like this - /// - /// ```json - /// [10, 20, 30, 40] - /// ``` - /// - /// You can access the value "30" with - /// - /// ``` - /// # use datafusion_expr::{lit, col, Expr}; - /// # use datafusion_functions_array::expr_ext::IndexAccessor; - /// let expr = col("c1") - /// .index(lit(3)); - /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]"); - /// ``` fn index(self, key: Expr) -> Expr { array_element(self, key) } } +/// Return elements between `1` based `start` and `stop`, for +/// example `expr[1:3]` +/// +/// ## Example: Access element 2, 3, 4 from column "c1" +/// +/// For example if column "c1" holds documents like this +/// +/// ```json +/// [10, 20, 30, 40] +/// ``` +/// +/// You can access the value `[20, 30, 40]` with +/// +/// ``` +/// # use datafusion_expr::{lit, col}; +/// # use datafusion_functions_array::expr_ext::SliceAccessor; +/// let expr = col("c1") +/// .range(lit(2), lit(4)); +/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]"); +/// ``` pub trait SliceAccessor { fn range(self, start: Expr, stop: Expr) -> Expr; } impl SliceAccessor for Expr { - /// Return elements between `1` based `start` and `stop`, for - /// example `expr[1:3]` - /// - /// ## Example: Access element 2, 3, 4 from column "c1" - /// - /// For example if column "c1" holds documents like this - /// - /// ```json - /// [10, 20, 30, 40] - /// ``` - /// - /// You can access the value `[20, 30, 40]` with - /// - /// ``` - /// # use datafusion_expr::{lit, col}; - /// # use datafusion_functions_array::expr_ext::SliceAccessor; - /// let expr = col("c1") - /// .range(lit(2), lit(4)); - /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]"); - /// ``` fn range(self, start: Expr, stop: Expr) -> Expr { array_slice(self, start, stop, None) } diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs index 65547b381fc6..d80df0f334ab 100644 --- a/datafusion/functions/src/core/expr_ext.rs +++ b/datafusion/functions/src/core/expr_ext.rs @@ -21,33 +21,33 @@ use datafusion_expr::{Expr, Literal}; use super::expr_fn::get_field; +/// Return access to the named field. Example `expr["name"]` +/// +/// ## Access field "my_field" from column "c1" +/// +/// For example if column "c1" holds documents like this +/// +/// ```json +/// { +/// "my_field": 123.34, +/// "other_field": "Boston", +/// } +/// ``` +/// +/// You can access column "my_field" with +/// +/// ``` +/// # use datafusion_expr::{col}; +/// # use datafusion_functions::core::expr_ext::FieldAccessor; +/// let expr = col("c1") +/// .field("my_field"); +/// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); +/// ``` pub trait FieldAccessor { fn field(self, name: impl Literal) -> Expr; } impl FieldAccessor for Expr { - /// Return access to the named field. Example `expr["name"]` - /// - /// ## Access field "my_field" from column "c1" - /// - /// For example if column "c1" holds documents like this - /// - /// ```json - /// { - /// "my_field": 123.34, - /// "other_field": "Boston", - /// } - /// ``` - /// - /// You can access column "my_field" with - /// - /// ``` - /// # use datafusion_expr::{col}; - /// # use datafusion_functions::core::expr_ext::FieldAccessor; - /// let expr = col("c1") - /// .field("my_field"); - /// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); - /// ``` fn field(self, name: impl Literal) -> Expr { get_field(self, name) }