From 4d6e013c4b9ad6f9cf8e7b44c42cfd34f8cb528f Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Fri, 7 Jul 2023 10:51:30 +0800 Subject: [PATCH 1/2] add diff dims support for array concat Signed-off-by: jayzhan211 --- .../tests/sqllogictests/test_files/array.slt | 25 ++++++ .../physical-expr/src/array_expressions.rs | 89 +++++++++++++------ 2 files changed, 89 insertions(+), 25 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index 7eebb23d9cc9..43ce9b89da65 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -365,6 +365,31 @@ select array_concat(make_array(), make_array(2, 3)); ---- [2, 3] +# array_concat with different dimensions +# 2D + 1D +query ? +select array_concat(make_array([1,2], [3,4]), make_array(5, 6)) +---- +[[1, 2], [3, 4], [5, 6]] + +# 1D + 2D +query ? +select array_concat(make_array(5, 6), make_array([1,2], [3,4])) +---- +[[5, 6], [1, 2], [3, 4]] + +# 2D + 1D + 1D +query ? +select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)) +---- +[[1, 2], [3, 4], [5, 6], [7, 8]] + +# 1D + 2D + 3D +query ? +select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])) +---- +[[[10, 20]], [[30, 40]], [[50, 60]]] + ## array_position # array_position scalar function #1 diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index cd174918db37..ef8ba512c36f 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -373,20 +373,84 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { Ok(res) } +fn compute_array_ndims(arg: u8, arr: ArrayRef) -> Result { + match arr.data_type() { + DataType::List(..) => { + let list_array = downcast_arg!(arr, ListArray); + compute_array_ndims(arg + 1, list_array.value(0)) + } + DataType::Null + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Boolean + | DataType::Float32 + | DataType::Float64 + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => Ok(arg), + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not implemented for type '{data_type:?}'." + ))), + } +} + +fn align_array_dimensions(args: Vec) -> Result> { + // Compute the number of dimensions for each array + let args_ndim: Result> = args + .iter() + .map(|arr| compute_array_ndims(0, arr.clone())) + .collect(); + let args_ndim = args_ndim?; + + // Find the maximum number of dimensions + let max_ndim = *args_ndim.iter().max().unwrap(); + + // Align the dimensions of the arrays + let aligned_args: Result> = args + .into_iter() + .map(|array| { + let ndim = compute_array_ndims(0, array.clone())?; + if ndim < max_ndim { + let mut aligned_array = array.clone(); + for _ in 0..(max_ndim - ndim) { + let data_type = aligned_array.as_ref().data_type().clone(); + aligned_array = array_array(&[aligned_array], data_type)?; + } + Ok(aligned_array) + } else { + Ok(array.clone()) + } + }) + .collect(); + + aligned_args +} + /// Array_concat/Array_cat SQL function pub fn array_concat(args: &[ArrayRef]) -> Result { match args[0].data_type() { DataType::List(field) => match field.data_type() { DataType::Null => array_concat(&args[1..]), _ => { + let args = align_array_dimensions(args.to_vec())?; + let list_arrays = downcast_vec!(args, ListArray) .collect::>>()?; + let len: usize = list_arrays.iter().map(|a| a.values().len()).sum(); + let capacity = Capacities::Array(list_arrays.iter().map(|a| a.len()).sum()); let array_data: Vec<_> = list_arrays.iter().map(|a| a.to_data()).collect::>(); + let array_data = array_data.iter().collect(); + let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity); @@ -1217,31 +1281,6 @@ pub fn array_dims(args: &[ArrayRef]) -> Result { /// Array_ndims SQL function pub fn array_ndims(args: &[ArrayRef]) -> Result { - fn compute_array_ndims(arg: u8, arr: ArrayRef) -> Result { - match arr.data_type() { - DataType::List(..) => { - let list_array = downcast_arg!(arr, ListArray); - compute_array_ndims(arg + 1, list_array.value(0)) - } - DataType::Null - | DataType::Utf8 - | DataType::LargeUtf8 - | DataType::Boolean - | DataType::Float32 - | DataType::Float64 - | DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 => Ok(arg), - data_type => Err(DataFusionError::NotImplemented(format!( - "Array is not implemented for type '{data_type:?}'." - ))), - } - } let arg: u8 = 0; Ok(Arc::new(UInt8Array::from(vec![compute_array_ndims( arg, From 88bca149a66495c19add01139d48d28f82824891 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 8 Jul 2023 08:22:57 +0800 Subject: [PATCH 2/2] address comment Signed-off-by: jayzhan211 --- .../core/tests/sqllogictests/test_files/array.slt | 9 ++++----- datafusion/physical-expr/src/array_expressions.rs | 13 ++++++------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index 43ce9b89da65..2d7a6099893c 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -365,26 +365,25 @@ select array_concat(make_array(), make_array(2, 3)); ---- [2, 3] -# array_concat with different dimensions -# 2D + 1D +# array_concat with different dimensions #1 (2D + 1D) query ? select array_concat(make_array([1,2], [3,4]), make_array(5, 6)) ---- [[1, 2], [3, 4], [5, 6]] -# 1D + 2D +# array_concat with different dimensions #2 (1D + 2D) query ? select array_concat(make_array(5, 6), make_array([1,2], [3,4])) ---- [[5, 6], [1, 2], [3, 4]] -# 2D + 1D + 1D +# array_concat with different dimensions #3 (2D + 1D + 1D) query ? select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)) ---- [[1, 2], [3, 4], [5, 6], [7, 8]] -# 1D + 2D + 3D +# array_concat with different dimensions #4 (1D + 2D + 3D) query ? select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])) ---- diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index ef8ba512c36f..cbf5896b85f4 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -400,15 +400,14 @@ fn compute_array_ndims(arg: u8, arr: ArrayRef) -> Result { } fn align_array_dimensions(args: Vec) -> Result> { - // Compute the number of dimensions for each array - let args_ndim: Result> = args + // Find the maximum number of dimensions + let max_ndim: u8 = *args .iter() .map(|arr| compute_array_ndims(0, arr.clone())) - .collect(); - let args_ndim = args_ndim?; - - // Find the maximum number of dimensions - let max_ndim = *args_ndim.iter().max().unwrap(); + .collect::>>()? + .iter() + .max() + .unwrap(); // Align the dimensions of the arrays let aligned_args: Result> = args