Skip to content

Commit

Permalink
Fix make_array null handling, update tests (#6900)
Browse files Browse the repository at this point in the history
* Fix `make_array` null handling, update tests

* Apply suggestions from code review
  • Loading branch information
alamb authored Jul 11, 2023
1 parent 4e2a72f commit 311e8c7
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 33 deletions.
92 changes: 63 additions & 29 deletions datafusion/core/tests/sqllogictests/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,18 @@ CREATE TABLE values(
b INT,
c INT,
d FLOAT,
e VARCHAR
e VARCHAR,
f VARCHAR
) AS VALUES
(1, 1, 2, 1.1, 'Lorem'),
(2, 3, 4, 2.2, 'ipsum'),
(3, 5, 6, 3.3, 'dolor'),
(4, 7, 8, 4.4, 'sit'),
(NULL, 9, 10, 5.5, 'amet'),
(5, NULL, 12, 6.6, ','),
(6, 11, NULL, 7.7, 'consectetur'),
(7, 13, 14, NULL, 'adipiscing'),
(8, 15, 16, 8.8, NULL)
(1, 1, 2, 1.1, 'Lorem', 'A'),
(2, 3, 4, 2.2, 'ipsum', ''),
(3, 5, 6, 3.3, 'dolor', 'BB'),
(4, 7, 8, 4.4, 'sit', NULL),
(NULL, 9, 10, 5.5, 'amet', 'CCC'),
(5, NULL, 12, 6.6, ',', 'DD'),
(6, 11, NULL, 7.7, 'consectetur', 'E'),
(7, 13, 14, NULL, 'adipiscing', 'F'),
(8, 15, 16, 8.8, NULL, '')
;

statement ok
Expand Down Expand Up @@ -189,34 +190,67 @@ select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NUL
----
[] [] [[], []]

# make_array with columns #1
query ????
select make_array(a), make_array(b, c), make_array(d), make_array(e) from values;
----
[1] [1, 2] [1.1] [Lorem]
[2] [3, 4] [2.2] [ipsum]
[3] [5, 6] [3.3] [dolor]
[4] [7, 8] [4.4] [sit]
[0] [9, 10] [5.5] [amet]
[5] [0, 12] [6.6] [,]
[6] [11, 0] [7.7] [consectetur]
[7] [13, 14] [0.0] [adipiscing]
[8] [15, 16] [8.8] []

# make_array with columns #2
# make_array with 1 columns
query ???
select make_array(a), make_array(d), make_array(e) from values;
----
[1] [1.1] [Lorem]
[2] [2.2] [ipsum]
[3] [3.3] [dolor]
[4] [4.4] [sit]
[] [5.5] [amet]
[5] [6.6] [,]
[6] [7.7] [consectetur]
[7] [] [adipiscing]
[8] [8.8] []

# make_array with 2 columns #1
query ??
select make_array(b, c), make_array(e, f) from values;
----
[1, 2] [Lorem, A]
[3, 4] [ipsum, ]
[5, 6] [dolor, BB]
[7, 8] [sit, ]
[9, 10] [amet, CCC]
[, 12] [,, DD]
[11, ] [consectetur, E]
[13, 14] [adipiscing, F]
[15, 16] [, ]

# make_array with 4 columns
query ?
select make_array(a, b, c, d) from values;
----
[1.0, 1.0, 2.0, 1.1]
[2.0, 3.0, 4.0, 2.2]
[3.0, 5.0, 6.0, 3.3]
[4.0, 7.0, 8.0, 4.4]
[0.0, 9.0, 10.0, 5.5]
[5.0, 0.0, 12.0, 6.6]
[6.0, 11.0, 0.0, 7.7]
[7.0, 13.0, 14.0, 0.0]
[, 9.0, 10.0, 5.5]
[5.0, , 12.0, 6.6]
[6.0, 11.0, , 7.7]
[7.0, 13.0, 14.0, ]
[8.0, 15.0, 16.0, 8.8]

# make_array null handling
query ?B?BB
select
make_array(a), make_array(a)[1] IS NULL,
make_array(e, f), make_array(e, f)[1] IS NULL, make_array(e, f)[2] IS NULL
from values;
----
[1] false [Lorem, A] false false
[2] false [ipsum, ] false false
[3] false [dolor, BB] false false
[4] false [sit, ] false true
[] true [amet, CCC] false false
[5] false [,, DD] false false
[6] false [consectetur, E] false false
[7] false [adipiscing, F] false false
[8] false [, ] true false



## array_append

# array_append scalar function #2
Expand Down
77 changes: 73 additions & 4 deletions datafusion/physical-expr/src/array_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ macro_rules! downcast_arg {
}};
}

/// Downcasts multiple arguments into a single concrete type
/// $ARGS: &[ArrayRef]
/// $ARRAY_TYPE: type to downcast to
///
/// $returns a Vec<$ARRAY_TYPE>
macro_rules! downcast_vec {
($ARGS:expr, $ARRAY_TYPE:ident) => {{
$ARGS
Expand All @@ -66,18 +71,38 @@ macro_rules! new_builder {
}};
}

/// Combines multiple arrays into a single ListArray
///
/// $ARGS: slice of arrays, each with $ARRAY_TYPE
/// $ARRAY_TYPE: the type of the list elements
/// $BUILDER_TYPE: the type of ArrayBuilder for the list elements
///
/// Returns: a ListArray where the elements each have the same type as
/// $ARRAY_TYPE and each element have a length of $ARGS.len()
macro_rules! array {
($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
let builder = new_builder!($BUILDER_TYPE, $ARGS[0].len());
let mut builder =
ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, $ARGS.len());

let num_rows = $ARGS[0].len();
assert!(
$ARGS.iter().all(|a| a.len() == num_rows),
"all arguments must have the same number of rows"
);

// for each entry in the array
for index in 0..$ARGS[0].len() {
for index in 0..num_rows {
// for each column
for arg in $ARGS {
match arg.as_any().downcast_ref::<$ARRAY_TYPE>() {
// Copy the source array value into the target ListArray
Some(arr) => {
builder.values().append_value(arr.value(index));
if arr.is_valid(index) {
builder.values().append_value(arr.value(index));
} else {
builder.values().append_null();
}
}
None => match arg.as_any().downcast_ref::<NullArray>() {
Some(arr) => {
Expand Down Expand Up @@ -179,6 +204,46 @@ fn compute_array_dims(arr: Option<ArrayRef>) -> Result<Option<Vec<Option<u64>>>>
}
}

/// Convert one or more [`ArrayRef`] of the same type into a
/// `ListArray`
///
/// # Example (non nested)
///
/// Calling `array(col1, col2)` where col1 and col2 are non nested
/// would return a single new `ListArray`, where each row was a list
/// of 2 elements:
///
/// ```text
/// ┌─────────┐ ┌─────────┐ ┌──────────────┐
/// │ ┌─────┐ │ │ ┌─────┐ │ │ ┌──────────┐ │
/// │ │ A │ │ │ │ X │ │ │ │ [A, X] │ │
/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │
/// │ │NULL │ │ │ │ Y │ │──────────▶│ │[NULL, Y] │ │
/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │
/// │ │ C │ │ │ │ Z │ │ │ │ [C, Z] │ │
/// │ └─────┘ │ │ └─────┘ │ │ └──────────┘ │
/// └─────────┘ └─────────┘ └──────────────┘
/// col1 col2 output
/// ```
///
/// # Example (nested)
///
/// Calling `array(col1, col2)` where col1 and col2 are lists
/// would return a single new `ListArray`, where each row was a list
/// of the corresponding elements of col1 and col2 flattened.
///
/// ``` text
/// ┌──────────────┐ ┌──────────────┐ ┌────────────────────────┐
/// │ ┌──────────┐ │ │ ┌──────────┐ │ │ ┌────────────────────┐ │
/// │ │ [A, X] │ │ │ │ [] │ │ │ │ [A, X] │ │
/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │
/// │ │[NULL, Y] │ │ │ │[Q, R, S] │ │───────▶│ │ [NULL, Y, Q, R, S] │ │
/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │
/// │ │ [C, Z] │ │ │ │ NULL │ │ │ │ [C, Z, NULL] │ │
/// │ └──────────┘ │ │ └──────────┘ │ │ └────────────────────┘ │
/// └──────────────┘ └──────────────┘ └────────────────────────┘
/// col1 col2 output
/// ```
fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
// do not accept 0 arguments.
if args.is_empty() {
Expand All @@ -200,6 +265,7 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
let mut mutable =
MutableArrayData::with_capacities(array_data, false, capacity);

// Copy over all the child data
for (i, a) in arrays.iter().enumerate() {
mutable.extend(i, 0, a.len())
}
Expand Down Expand Up @@ -239,8 +305,11 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
Ok(res)
}

/// put values in an array.
pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
/// Convert one or more [`ColumnarValue`] of the same type into a
/// `ListArray`
///
/// See [`array_array`] for more details.
fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
let arrays: Vec<ArrayRef> = values
.iter()
.map(|x| match x {
Expand Down

0 comments on commit 311e8c7

Please sign in to comment.