Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix make_array null handling, update tests #6900

Merged
merged 2 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 63 additions & 29 deletions datafusion/core/tests/sqllogictests/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,18 @@ CREATE TABLE values(
b INT,
c INT,
d FLOAT,
e VARCHAR
e VARCHAR,
f VARCHAR
) AS VALUES
(1, 1, 2, 1.1, 'Lorem'),
(2, 3, 4, 2.2, 'ipsum'),
(3, 5, 6, 3.3, 'dolor'),
(4, 7, 8, 4.4, 'sit'),
(NULL, 9, 10, 5.5, 'amet'),
(5, NULL, 12, 6.6, ','),
(6, 11, NULL, 7.7, 'consectetur'),
(7, 13, 14, NULL, 'adipiscing'),
(8, 15, 16, 8.8, NULL)
(1, 1, 2, 1.1, 'Lorem', 'A'),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adds a new column, e and whitespace OCD

(2, 3, 4, 2.2, 'ipsum', ''),
(3, 5, 6, 3.3, 'dolor', 'BB'),
(4, 7, 8, 4.4, 'sit', NULL),
(NULL, 9, 10, 5.5, 'amet', 'CCC'),
(5, NULL, 12, 6.6, ',', 'DD'),
(6, 11, NULL, 7.7, 'consectetur', 'E'),
(7, 13, 14, NULL, 'adipiscing', 'F'),
(8, 15, 16, 8.8, NULL, '')
;

statement ok
Expand Down Expand Up @@ -189,34 +190,67 @@ select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NUL
----
[] [] [[], []]

# make_array with columns #1
query ????
select make_array(a), make_array(b, c), make_array(d), make_array(e) from values;
----
[1] [1, 2] [1.1] [Lorem]
[2] [3, 4] [2.2] [ipsum]
[3] [5, 6] [3.3] [dolor]
[4] [7, 8] [4.4] [sit]
[0] [9, 10] [5.5] [amet]
[5] [0, 12] [6.6] [,]
[6] [11, 0] [7.7] [consectetur]
[7] [13, 14] [0.0] [adipiscing]
[8] [15, 16] [8.8] []

# make_array with columns #2
# make_array with 1 columns
query ???
select make_array(a), make_array(d), make_array(e) from values;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I broke out the tests for make_array with a single column from the tests of make_array with 2 column

----
[1] [1.1] [Lorem]
[2] [2.2] [ipsum]
[3] [3.3] [dolor]
[4] [4.4] [sit]
[] [5.5] [amet]
[5] [6.6] [,]
[6] [7.7] [consectetur]
[7] [] [adipiscing]
[8] [8.8] []

# make_array with 2 columns #1
query ??
select make_array(b, c), make_array(e, f) from values;
----
[1, 2] [Lorem, A]
[3, 4] [ipsum, ]
[5, 6] [dolor, BB]
[7, 8] [sit, ]
[9, 10] [amet, CCC]
[, 12] [,, DD]
[11, ] [consectetur, E]
[13, 14] [adipiscing, F]
[15, 16] [, ]

# make_array with 4 columns
query ?
select make_array(a, b, c, d) from values;
----
[1.0, 1.0, 2.0, 1.1]
[2.0, 3.0, 4.0, 2.2]
[3.0, 5.0, 6.0, 3.3]
[4.0, 7.0, 8.0, 4.4]
[0.0, 9.0, 10.0, 5.5]
[5.0, 0.0, 12.0, 6.6]
[6.0, 11.0, 0.0, 7.7]
[7.0, 13.0, 14.0, 0.0]
[, 9.0, 10.0, 5.5]
[5.0, , 12.0, 6.6]
[6.0, 11.0, , 7.7]
[7.0, 13.0, 14.0, ]
[8.0, 15.0, 16.0, 8.8]

# make_array null handling
query ?B?BB
select
make_array(a), make_array(a)[1] IS NULL,
make_array(e, f), make_array(e, f)[1] IS NULL, make_array(e, f)[2] IS NULL
from values;
----
[1] false [Lorem, A] false false
[2] false [ipsum, ] false false
[3] false [dolor, BB] false false
[4] false [sit, ] false true
[] true [amet, CCC] false false
[5] false [,, DD] false false
[6] false [consectetur, E] false false
[7] false [adipiscing, F] false false
[8] false [, ] true false



## array_append

# array_append scalar function #2
Expand Down
77 changes: 73 additions & 4 deletions datafusion/physical-expr/src/array_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ macro_rules! downcast_arg {
}};
}

/// Downcasts multiple arguments into a single concrete type
/// $ARGS: &[ArrayRef]
/// $ARRAY_TYPE: type to downcast to
///
/// $returns a Vec<$ARRAY_TYPE>
macro_rules! downcast_vec {
($ARGS:expr, $ARRAY_TYPE:ident) => {{
$ARGS
Expand All @@ -66,18 +71,38 @@ macro_rules! new_builder {
}};
}

/// Combines multiple arrays into a single ListArray
///
/// $ARGS: slice of arrays, each with $ARRAY_TYPE
/// $ARRAY_TYPE: the type of the list elements
/// $BUILDER_TYPE: the type of ArrayBuilder for the list elements
///
/// Returns: a ListArray where the elements each have the same type as
/// $ARRAY_TYPE and each element have a length of $ARGS.len()
macro_rules! array {
($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
let builder = new_builder!($BUILDER_TYPE, $ARGS[0].len());
let mut builder =
ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, $ARGS.len());

let num_rows = $ARGS[0].len();
assert!(
$ARGS.iter().all(|a| a.len() == num_rows),
"all arguments must have the same number of rows"
);

// for each entry in the array
for index in 0..$ARGS[0].len() {
for index in 0..num_rows {
// for each column
for arg in $ARGS {
match arg.as_any().downcast_ref::<$ARRAY_TYPE>() {
// Copy the source array value into the target ListArray
Some(arr) => {
builder.values().append_value(arr.value(index));
if arr.is_valid(index) {
builder.values().append_value(arr.value(index));
} else {
builder.values().append_null();
}
Comment on lines +101 to +105
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These lines are the only actual code change; The rest of this PR is doc comments and tests

}
None => match arg.as_any().downcast_ref::<NullArray>() {
Some(arr) => {
Expand Down Expand Up @@ -179,6 +204,46 @@ fn compute_array_dims(arr: Option<ArrayRef>) -> Result<Option<Vec<Option<u64>>>>
}
}

/// Convert one or more [`ArrayRef`] of the same type into a
/// `ListArray`
///
/// # Example (non nested)
///
/// Calling `array(col1, col2)` where col1 and col2 are non nested
/// would return a single new `ListArray`, where each row was a list
/// of 2 elements:
///
/// ```text
/// ┌─────────┐ ┌─────────┐ ┌──────────────┐
/// │ ┌─────┐ │ │ ┌─────┐ │ │ ┌──────────┐ │
/// │ │ A │ │ │ │ X │ │ │ │ [A, X] │ │
/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │
/// │ │NULL │ │ │ │ Y │ │──────────▶│ │[NULL, Y] │ │
/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │
/// │ │ C │ │ │ │ Z │ │ │ │ [C, Z] │ │
/// │ └─────┘ │ │ └─────┘ │ │ └──────────┘ │
/// └─────────┘ └─────────┘ └──────────────┘
/// col1 col2 output
/// ```
///
/// # Example (nested)
///
/// Calling `array(col1, col2)` where col1 and col2 are lists
/// would return a single new `ListArray`, where each row was a list
/// of the corresponding elements of col1 and col2 flattened.
///
/// ``` text
/// ┌──────────────┐ ┌──────────────┐ ┌────────────────────────┐
/// │ ┌──────────┐ │ │ ┌──────────┐ │ │ ┌────────────────────┐ │
/// │ │ [A, X] │ │ │ │ [] │ │ │ │ [A, X] │ │
/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │
/// │ │[NULL, Y] │ │ │ │[Q, R, S] │ │───────▶│ │ [NULL, Y, Q, R, S] │ │
/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │
/// │ │ [C, Z] │ │ │ │ NULL │ │ │ │ [C, Z, NULL] │ │
/// │ └──────────┘ │ │ └──────────┘ │ │ └────────────────────┘ │
/// └──────────────┘ └──────────────┘ └────────────────────────┘
/// col1 col2 output
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are good comments 👍
I think we can make a custom template for all array functions.

/// ```
fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
// do not accept 0 arguments.
if args.is_empty() {
Expand All @@ -200,6 +265,7 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
let mut mutable =
MutableArrayData::with_capacities(array_data, false, capacity);

// Copy over all the child data
for (i, a) in arrays.iter().enumerate() {
mutable.extend(i, 0, a.len())
}
Expand Down Expand Up @@ -239,8 +305,11 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
Ok(res)
}

/// put values in an array.
pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
/// Convert one or more [`ColumnarValue`] of the same type into a
/// `ListArray`
///
/// See [`array_array`] for more details.
fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
let arrays: Vec<ArrayRef> = values
.iter()
.map(|x| match x {
Expand Down