diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index b46875725fc8..cf20c14cac76 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -29,17 +29,18 @@ CREATE TABLE values( b INT, c INT, d FLOAT, - e VARCHAR + e VARCHAR, + f VARCHAR ) AS VALUES - (1, 1, 2, 1.1, 'Lorem'), - (2, 3, 4, 2.2, 'ipsum'), - (3, 5, 6, 3.3, 'dolor'), - (4, 7, 8, 4.4, 'sit'), - (NULL, 9, 10, 5.5, 'amet'), - (5, NULL, 12, 6.6, ','), - (6, 11, NULL, 7.7, 'consectetur'), - (7, 13, 14, NULL, 'adipiscing'), - (8, 15, 16, 8.8, NULL) + (1, 1, 2, 1.1, 'Lorem', 'A'), + (2, 3, 4, 2.2, 'ipsum', ''), + (3, 5, 6, 3.3, 'dolor', 'BB'), + (4, 7, 8, 4.4, 'sit', NULL), + (NULL, 9, 10, 5.5, 'amet', 'CCC'), + (5, NULL, 12, 6.6, ',', 'DD'), + (6, 11, NULL, 7.7, 'consectetur', 'E'), + (7, 13, 14, NULL, 'adipiscing', 'F'), + (8, 15, 16, 8.8, NULL, '') ; statement ok @@ -189,21 +190,35 @@ select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NUL ---- [] [] [[], []] -# make_array with columns #1 -query ???? -select make_array(a), make_array(b, c), make_array(d), make_array(e) from values; ----- -[1] [1, 2] [1.1] [Lorem] -[2] [3, 4] [2.2] [ipsum] -[3] [5, 6] [3.3] [dolor] -[4] [7, 8] [4.4] [sit] -[0] [9, 10] [5.5] [amet] -[5] [0, 12] [6.6] [,] -[6] [11, 0] [7.7] [consectetur] -[7] [13, 14] [0.0] [adipiscing] -[8] [15, 16] [8.8] [] - -# make_array with columns #2 +# make_array with 1 columns +query ??? +select make_array(a), make_array(d), make_array(e) from values; +---- +[1] [1.1] [Lorem] +[2] [2.2] [ipsum] +[3] [3.3] [dolor] +[4] [4.4] [sit] +[] [5.5] [amet] +[5] [6.6] [,] +[6] [7.7] [consectetur] +[7] [] [adipiscing] +[8] [8.8] [] + +# make_array with 2 columns #1 +query ?? +select make_array(b, c), make_array(e, f) from values; +---- +[1, 2] [Lorem, A] +[3, 4] [ipsum, ] +[5, 6] [dolor, BB] +[7, 8] [sit, ] +[9, 10] [amet, CCC] +[, 12] [,, DD] +[11, ] [consectetur, E] +[13, 14] [adipiscing, F] +[15, 16] [, ] + +# make_array with 4 columns query ? select make_array(a, b, c, d) from values; ---- @@ -211,12 +226,31 @@ select make_array(a, b, c, d) from values; [2.0, 3.0, 4.0, 2.2] [3.0, 5.0, 6.0, 3.3] [4.0, 7.0, 8.0, 4.4] -[0.0, 9.0, 10.0, 5.5] -[5.0, 0.0, 12.0, 6.6] -[6.0, 11.0, 0.0, 7.7] -[7.0, 13.0, 14.0, 0.0] +[, 9.0, 10.0, 5.5] +[5.0, , 12.0, 6.6] +[6.0, 11.0, , 7.7] +[7.0, 13.0, 14.0, ] [8.0, 15.0, 16.0, 8.8] +# make_array null handling +query ?B?BB +select + make_array(a), make_array(a)[1] IS NULL, + make_array(e, f), make_array(e, f)[1] IS NULL, make_array(e, f)[2] IS NULL +from values; +---- +[1] false [Lorem, A] false false +[2] false [ipsum, ] false false +[3] false [dolor, BB] false false +[4] false [sit, ] false true +[] true [amet, CCC] false false +[5] false [,, DD] false false +[6] false [consectetur, E] false false +[7] false [adipiscing, F] false false +[8] false [, ] true false + + + ## array_append # array_append scalar function #2 diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 6af87b506ea8..a0fed8f90889 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -40,6 +40,11 @@ macro_rules! downcast_arg { }}; } +/// Downcasts multiple arguments into a single concrete type +/// $ARGS: &[ArrayRef] +/// $ARRAY_TYPE: type to downcast to +/// +/// $returns a Vec<$ARRAY_TYPE> macro_rules! downcast_vec { ($ARGS:expr, $ARRAY_TYPE:ident) => {{ $ARGS @@ -66,18 +71,38 @@ macro_rules! new_builder { }}; } +/// Combines multiple arrays into a single ListArray +/// +/// $ARGS: slice of arrays, each with $ARRAY_TYPE +/// $ARRAY_TYPE: the type of the list elements +/// $BUILDER_TYPE: the type of ArrayBuilder for the list elements +/// +/// Returns: a ListArray where the elements each have the same type as +/// $ARRAY_TYPE and each element have a length of $ARGS.len() macro_rules! array { ($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{ let builder = new_builder!($BUILDER_TYPE, $ARGS[0].len()); let mut builder = ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, $ARGS.len()); + let num_rows = $ARGS[0].len(); + assert!( + $ARGS.iter().all(|a| a.len() == num_rows), + "all arguments must have the same number of rows" + ); + // for each entry in the array - for index in 0..$ARGS[0].len() { + for index in 0..num_rows { + // for each column for arg in $ARGS { match arg.as_any().downcast_ref::<$ARRAY_TYPE>() { + // Copy the source array value into the target ListArray Some(arr) => { - builder.values().append_value(arr.value(index)); + if arr.is_valid(index) { + builder.values().append_value(arr.value(index)); + } else { + builder.values().append_null(); + } } None => match arg.as_any().downcast_ref::() { Some(arr) => { @@ -179,6 +204,46 @@ fn compute_array_dims(arr: Option) -> Result>>> } } +/// Convert one or more [`ArrayRef`] of the same type into a +/// `ListArray` +/// +/// # Example (non nested) +/// +/// Calling `array(col1, col2)` where col1 and col2 are non nested +/// would return a single new `ListArray`, where each row was a list +/// of 2 elements: +/// +/// ```text +/// ┌─────────┐ ┌─────────┐ ┌──────────────┐ +/// │ ┌─────┐ │ │ ┌─────┐ │ │ ┌──────────┐ │ +/// │ │ A │ │ │ │ X │ │ │ │ [A, X] │ │ +/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │ +/// │ │NULL │ │ │ │ Y │ │──────────▶│ │[NULL, Y] │ │ +/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │ +/// │ │ C │ │ │ │ Z │ │ │ │ [C, Z] │ │ +/// │ └─────┘ │ │ └─────┘ │ │ └──────────┘ │ +/// └─────────┘ └─────────┘ └──────────────┘ +/// col1 col2 output +/// ``` +/// +/// # Example (nested) +/// +/// Calling `array(col1, col2)` where col1 and col2 are lists +/// would return a single new `ListArray`, where each row was a list +/// of the corresponding elements of col1 and col2 flattened. +/// +/// ``` text +/// ┌──────────────┐ ┌──────────────┐ ┌────────────────────────┐ +/// │ ┌──────────┐ │ │ ┌──────────┐ │ │ ┌────────────────────┐ │ +/// │ │ [A, X] │ │ │ │ [] │ │ │ │ [A, X] │ │ +/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │ +/// │ │[NULL, Y] │ │ │ │[Q, R, S] │ │───────▶│ │ [NULL, Y, Q, R, S] │ │ +/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │ +/// │ │ [C, Z] │ │ │ │ NULL │ │ │ │ [C, Z, NULL] │ │ +/// │ └──────────┘ │ │ └──────────┘ │ │ └────────────────────┘ │ +/// └──────────────┘ └──────────────┘ └────────────────────────┘ +/// col1 col2 output +/// ``` fn array_array(args: &[ArrayRef], data_type: DataType) -> Result { // do not accept 0 arguments. if args.is_empty() { @@ -200,6 +265,7 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result { let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity); + // Copy over all the child data for (i, a) in arrays.iter().enumerate() { mutable.extend(i, 0, a.len()) } @@ -239,8 +305,11 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result { Ok(res) } -/// put values in an array. -pub fn array(values: &[ColumnarValue]) -> Result { +/// Convert one or more [`ColumnarValue`] of the same type into a +/// `ListArray` +/// +/// See [`array_array`] for more details. +fn array(values: &[ColumnarValue]) -> Result { let arrays: Vec = values .iter() .map(|x| match x {