From 96a3e185a42782b27f5b30a23610606f1a950b4b Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Fri, 9 Aug 2024 00:32:35 +0100 Subject: [PATCH] support tuples as types --- datafusion/expr/src/type_coercion/binary.rs | 28 +++++++++++- datafusion/sql/src/expr/mod.rs | 20 ++++++++- datafusion/sqllogictest/test_files/struct.slt | 44 +++++++++++++++++-- 3 files changed, 87 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 6de0118f6bae..f7af05034d43 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -25,7 +25,7 @@ use crate::Operator; use arrow::array::{new_empty_array, Array}; use arrow::compute::can_cast_types; use arrow::datatypes::{ - DataType, Field, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, + DataType, Field, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, }; use datafusion_common::{exec_datafusion_err, plan_datafusion_err, plan_err, Result}; @@ -498,6 +498,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option Option { + use arrow::datatypes::DataType::*; + match (lhs_type, rhs_type) { + (Struct(lhs_fields), Struct(rhs_fields)) => { + if lhs_fields.len() != rhs_fields.len() { + return None; + } + + let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter()) + .map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), rhs.data_type())) + .collect::>>()?; + + let fields = types + .into_iter() + .enumerate() + .map(|(i, datatype)| { + Arc::new(Field::new(format!("c{i}"), datatype, true)) + }) + .collect::>(); + Some(Struct(fields.into())) + } + _ => None, + } +} + /// Returns the output type of applying mathematics operations such as /// `+` to arguments of `lhs_type` and `rhs_type`. fn mathematics_numerical_coercion( diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index edb0002842a8..f2b4e0b4e43d 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -661,6 +661,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}") } + SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values), _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } } @@ -670,7 +671,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { &self, schema: &DFSchema, planner_context: &mut PlannerContext, - values: Vec, + values: Vec, fields: Vec, ) -> Result { if !fields.is_empty() { @@ -695,6 +696,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}") } + fn parse_tuple( + &self, + schema: &DFSchema, + planner_context: &mut PlannerContext, + values: Vec, + ) -> Result { + match values.first() { + Some(SQLExpr::Identifier(_)) | Some(SQLExpr::Value(_)) => { + self.parse_struct(schema, planner_context, values, vec![]) + } + None => not_impl_err!("Empty tuple not supported yet"), + _ => { + not_impl_err!("Only identifiers and literals are supported in tuples") + } + } + } + fn sql_position_to_expr( &self, substr_expr: SQLExpr, diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index caa612f556fe..5c66bca1e0c2 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -218,9 +218,6 @@ select named_struct('field_a', 1, 'field_b', 2); ---- {field_a: 1, field_b: 2} -statement ok -drop table values; - query T select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3)); ---- @@ -236,3 +233,44 @@ query ? select {'animal': {'cat': 1, 'dog': 2, 'bird': {'parrot': 3, 'canary': 1}}, 'genre': {'fiction': ['mystery', 'sci-fi', 'fantasy'], 'non-fiction': {'biography': 5, 'history': 7, 'science': {'physics': 2, 'biology': 3}}}, 'vehicle': {'car': {'sedan': 4, 'suv': 2}, 'bicycle': 3, 'boat': ['sailboat', 'motorboat']}, 'weather': {'sunny': True, 'temperature': 25.5, 'wind': {'speed': 10, 'direction': 'NW'}}}; ---- {animal: {cat: 1, dog: 2, bird: {parrot: 3, canary: 1}}, genre: {fiction: [mystery, sci-fi, fantasy], non-fiction: {biography: 5, history: 7, science: {physics: 2, biology: 3}}}, vehicle: {car: {sedan: 4, suv: 2}, bicycle: 3, boat: [sailboat, motorboat]}, weather: {sunny: true, temperature: 25.5, wind: {speed: 10, direction: NW}}} + +# test tuple as struct +query B +select ('x', 'y') = ('x', 'y'); +---- +true + +query B +select ('x', 'y') = ('y', 'x'); +---- +false + +query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Struct.* +select ('x', 'y') = ('x', 'y', 'z'); + +query B +select ('x', 'y') IN (('x', 'y')); +---- +true + +query B +select ('x', 'y') IN (('x', 'y'), ('y', 'x')); +---- +true + +query I +select a from values where (a, c) = (1, 'a'); +---- +1 + +query I +select a from values where (a, c) IN ((1, 'a'), (2, 'b')); +---- +1 +2 + +statement ok +drop table values; + +statement ok +drop table struct_values;