diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs index b9cc429d7023..04ae32ec35aa 100644 --- a/datafusion/common/src/cast.rs +++ b/datafusion/common/src/cast.rs @@ -30,7 +30,7 @@ use arrow::{ IntervalYearMonthArray, LargeListArray, ListArray, MapArray, NullArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, - TimestampSecondArray, UInt32Array, UInt64Array, UnionArray, + TimestampSecondArray, UInt32Array, UInt64Array, UInt8Array, UnionArray, }, datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType}, }; @@ -45,6 +45,11 @@ pub fn as_struct_array(array: &dyn Array) -> Result<&StructArray> { Ok(downcast_value!(array, StructArray)) } +// Downcast ArrayRef to UInt8Array +pub fn as_uint8_array(array: &dyn Array) -> Result<&UInt8Array> { + Ok(downcast_value!(array, UInt8Array)) +} + // Downcast ArrayRef to Int32Array pub fn as_int32_array(array: &dyn Array) -> Result<&Int32Array> { Ok(downcast_value!(array, Int32Array)) diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt new file mode 100644 index 000000000000..df9edce0b1df --- /dev/null +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -0,0 +1,206 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############# +## Array expressions Tests +############# + +# array scalar function #1 +query ??? rowsort +select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); +---- +[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] + +# array scalar function #2 +query ??? rowsort +select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]); +---- +[1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]] + +# array scalar function #3 +query ?? rowsort +select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]); +---- +[[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] + +# array scalar function #4 +query ?? rowsort +select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o'); +---- +[[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o] + +# array scalar function #5 +query ? rowsort +select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12)))) +---- +[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] + +# array_append scalar function +query ??? rowsort +select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_prepend scalar function +query ??? rowsort +select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_fill scalar function #1 +query ??? rowsort +select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)), array_fill(2, make_array(2)); +---- +[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2] + +# array_fill scalar function #2 +query ?? rowsort +select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2, 2)); +---- +[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]] + +# array_concat scalar function #1 +query ?? rowsort +select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# array_concat scalar function #2 +query ? rowsort +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8))); +---- +[[1, 2], [3, 4], [5, 6], [7, 8]] + +# array_concat scalar function #3 +query ? rowsort +select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9])); +---- +[[1], [2], [3], [4], [5], [6], [7], [8], [9]] + +# array_concat scalar function #4 +query ? rowsort +select array_concat(make_array([[1]]), make_array([[2]])); +---- +[[[1]], [[2]]] + +# array_position scalar function #1 +query III +select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1); +---- +3 5 1 + +# array_position scalar function #2 +query III +select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 5, 4, 5], 5, 4), array_position([1, 1, 1], 1, 2); +---- +4 5 2 + +# array_positions scalar function +query III +select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); +---- +[3, 4] [5] [1, 2, 3] + +# array_replace scalar function +query ??? +select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), array_replace(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +# array_to_string scalar function +query ??? +select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string scalar function #2 +query ??? +select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\'); +---- +11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 + +# cardinality scalar function +query III +select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o')); +---- +5 3 5 + +# cardinality scalar function #2 +query II +select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3, array[3, 2, 3])); +---- +6 18 + +# trim_array scalar function +query ??? +select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l', 'o'], 3), trim_array([1.0, 2.0, 3.0], 2); +---- +[1, 2, 3] [h, e] [1.0] + +# trim_array scalar function #2 +query ?? +select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4, 2]), 2); +---- +[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]] + +# array_length scalar function +query III rowsort +select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); +---- +5 3 3 + +# array_length scalar function #2 +query III rowsort +select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1); +---- +5 3 3 + +# array_length scalar function #3 +query III rowsort +select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2); +---- +NULL NULL 2 + +# array_length scalar function #4 +query IIII rowsort +select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4); +---- +3 2 5 NULL + +# array_dims scalar function +query III rowsort +select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]])); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +# array_dims scalar function #2 +query II rowsort +select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4])); +---- +[1, 2, 3] [2, 5, 4] + +# array_ndims scalar function +query III rowsort +select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]])); +---- +1 2 5 + +# array_ndims scalar function #2 +query II rowsort +select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); +---- +3 21 diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 3911939b4ca6..2272997fae06 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -102,9 +102,39 @@ pub enum BuiltinScalarFunction { /// trunc Trunc, - // string functions + // array functions + /// array_append + ArrayAppend, + /// array_concat + ArrayConcat, + /// array_dims + ArrayDims, + /// array_fill + ArrayFill, + /// array_length + ArrayLength, + /// array_ndims + ArrayNdims, + /// array_position + ArrayPosition, + /// array_positions + ArrayPositions, + /// array_prepend + ArrayPrepend, + /// array_remove + ArrayRemove, + /// array_replace + ArrayReplace, + /// array_to_string + ArrayToString, + /// cardinality + Cardinality, /// construct an array from columns MakeArray, + /// trim_array + TrimArray, + + // string functions /// ascii Ascii, /// bit_length @@ -280,7 +310,21 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Tan => Volatility::Immutable, BuiltinScalarFunction::Tanh => Volatility::Immutable, BuiltinScalarFunction::Trunc => Volatility::Immutable, + BuiltinScalarFunction::ArrayAppend => Volatility::Immutable, + BuiltinScalarFunction::ArrayConcat => Volatility::Immutable, + BuiltinScalarFunction::ArrayDims => Volatility::Immutable, + BuiltinScalarFunction::ArrayFill => Volatility::Immutable, + BuiltinScalarFunction::ArrayLength => Volatility::Immutable, + BuiltinScalarFunction::ArrayNdims => Volatility::Immutable, + BuiltinScalarFunction::ArrayPosition => Volatility::Immutable, + BuiltinScalarFunction::ArrayPositions => Volatility::Immutable, + BuiltinScalarFunction::ArrayPrepend => Volatility::Immutable, + BuiltinScalarFunction::ArrayRemove => Volatility::Immutable, + BuiltinScalarFunction::ArrayReplace => Volatility::Immutable, + BuiltinScalarFunction::ArrayToString => Volatility::Immutable, + BuiltinScalarFunction::Cardinality => Volatility::Immutable, BuiltinScalarFunction::MakeArray => Volatility::Immutable, + BuiltinScalarFunction::TrimArray => Volatility::Immutable, BuiltinScalarFunction::Ascii => Volatility::Immutable, BuiltinScalarFunction::BitLength => Volatility::Immutable, BuiltinScalarFunction::Btrim => Volatility::Immutable, @@ -444,7 +488,21 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] { BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"], // array functions + BuiltinScalarFunction::ArrayAppend => &["array_append"], + BuiltinScalarFunction::ArrayConcat => &["array_concat"], + BuiltinScalarFunction::ArrayDims => &["array_dims"], + BuiltinScalarFunction::ArrayFill => &["array_fill"], + BuiltinScalarFunction::ArrayLength => &["array_length"], + BuiltinScalarFunction::ArrayNdims => &["array_ndims"], + BuiltinScalarFunction::ArrayPosition => &["array_position"], + BuiltinScalarFunction::ArrayPositions => &["array_positions"], + BuiltinScalarFunction::ArrayPrepend => &["array_prepend"], + BuiltinScalarFunction::ArrayRemove => &["array_remove"], + BuiltinScalarFunction::ArrayReplace => &["array_replace"], + BuiltinScalarFunction::ArrayToString => &["array_to_string"], + BuiltinScalarFunction::Cardinality => &["cardinality"], BuiltinScalarFunction::MakeArray => &["make_array"], + BuiltinScalarFunction::TrimArray => &["trim_array"], } } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 6b0a09baf945..819f8e4aa7fc 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -520,6 +520,92 @@ scalar_expr!( scalar_expr!(Uuid, uuid, , "Returns uuid v4 as a string value"); scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`"); +// array functions +scalar_expr!( + ArrayAppend, + array_append, + array element, + "appends an element to the end of an array." +); +nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays."); +scalar_expr!( + ArrayDims, + array_dims, + array, + "returns an array of the array's dimensions." +); +scalar_expr!( + ArrayFill, + array_fill, + element array, + "returns an array filled with copies of the given value." +); +scalar_expr!( + ArrayLength, + array_length, + array dimension, + "returns the length of the array dimension." +); +scalar_expr!( + ArrayNdims, + array_ndims, + array, + "returns the number of dimensions of the array." +); +scalar_expr!( + ArrayPosition, + array_position, + array element index, + "searches for an element in the array, returns first occurrence." +); +scalar_expr!( + ArrayPositions, + array_positions, + array element, + "searches for an element in the array, returns all occurrences." +); +scalar_expr!( + ArrayPrepend, + array_prepend, + array element, + "prepends an element to the beginning of an array." +); +scalar_expr!( + ArrayRemove, + array_remove, + array element, + "removes all elements equal to the given value from the array." +); +scalar_expr!( + ArrayReplace, + array_replace, + array from to, + "replaces a specified element with another specified element." +); +scalar_expr!( + ArrayToString, + array_to_string, + array delimeter, + "converts each element to its text representation." +); +scalar_expr!( + Cardinality, + cardinality, + array, + "returns the total number of elements in the array." +); +nary_scalar_expr!( + MakeArray, + array, + "returns an Arrow array using the specified input expressions." +); +scalar_expr!( + TrimArray, + trim_array, + array n, + "removes the last n elements from the array." +); + // string functions scalar_expr!(Ascii, ascii, chr, "ASCII code value of the character"); scalar_expr!( @@ -610,11 +696,6 @@ nary_scalar_expr!( btrim, "removes all characters, spaces by default, from both sides of a string" ); -nary_scalar_expr!( - MakeArray, - array, - "returns an array of fixed size with each argument on it." -); nary_scalar_expr!(Coalesce, coalesce, "returns `coalesce(args...)`, which evaluates to the value of the first [Expr] which is not NULL"); //there is a func concat_ws before, so use concat_ws_expr as name.c nary_scalar_expr!( @@ -884,6 +965,21 @@ mod test { test_scalar_expr!(DateBin, date_bin, stride, source, origin); test_scalar_expr!(FromUnixtime, from_unixtime, unixtime); + test_scalar_expr!(ArrayAppend, array_append, array, element); + test_unary_scalar_expr!(ArrayDims, array_dims); + test_scalar_expr!(ArrayFill, array_fill, element, array); + test_scalar_expr!(ArrayLength, array_length, array, dimension); + test_unary_scalar_expr!(ArrayNdims, array_ndims); + test_scalar_expr!(ArrayPosition, array_position, array, element, index); + test_scalar_expr!(ArrayPositions, array_positions, array, element); + test_scalar_expr!(ArrayPrepend, array_prepend, array, element); + test_scalar_expr!(ArrayRemove, array_remove, array, element); + test_scalar_expr!(ArrayReplace, array_replace, array, from, to); + test_scalar_expr!(ArrayToString, array_to_string, array, delimiter); + test_unary_scalar_expr!(Cardinality, cardinality); + test_nary_scalar_expr!(MakeArray, array, input); + test_scalar_expr!(TrimArray, trim_array, array, n); + test_unary_scalar_expr!(ArrowTypeof, arrow_typeof); } diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 5ba685224857..ed5df5766720 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -22,8 +22,8 @@ use crate::nullif::SUPPORTED_NULLIF_TYPES; use crate::type_coercion::functions::data_types; use crate::ColumnarValue; use crate::{ - array_expressions, conditional_expressions, struct_expressions, Accumulator, - BuiltinScalarFunction, Signature, TypeSignature, + conditional_expressions, struct_expressions, Accumulator, BuiltinScalarFunction, + Signature, TypeSignature, }; use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit}; use datafusion_common::{DataFusionError, Result}; @@ -115,10 +115,92 @@ pub fn return_type( // the return type of the built in function. // Some built-in functions' return type depends on the incoming type. match fun { - BuiltinScalarFunction::MakeArray => Ok(DataType::FixedSizeList( - Arc::new(Field::new("item", input_expr_types[0].clone(), true)), - input_expr_types.len() as i32, - )), + BuiltinScalarFunction::ArrayAppend => match &input_expr_types[0] { + DataType::List(field) => Ok(DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + )))), + _ => Err(DataFusionError::Internal(format!( + "The {fun} function can only accept list as the first argument" + ))), + }, + BuiltinScalarFunction::ArrayConcat => match &input_expr_types[0] { + DataType::List(field) => Ok(DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + )))), + _ => Err(DataFusionError::Internal(format!( + "The {fun} function can only accept fixed size list as the args." + ))), + }, + BuiltinScalarFunction::ArrayDims => Ok(DataType::UInt8), + BuiltinScalarFunction::ArrayFill => Ok(DataType::List(Arc::new(Field::new( + "item", + input_expr_types[0].clone(), + true, + )))), + BuiltinScalarFunction::ArrayLength => Ok(DataType::UInt8), + BuiltinScalarFunction::ArrayNdims => Ok(DataType::UInt8), + BuiltinScalarFunction::ArrayPosition => Ok(DataType::UInt8), + BuiltinScalarFunction::ArrayPositions => Ok(DataType::UInt8), + BuiltinScalarFunction::ArrayPrepend => match &input_expr_types[1] { + DataType::List(field) => Ok(DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + )))), + _ => Err(DataFusionError::Internal(format!( + "The {fun} function can only accept list as the first argument" + ))), + }, + BuiltinScalarFunction::ArrayRemove => match &input_expr_types[0] { + DataType::List(field) => Ok(DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + )))), + _ => Err(DataFusionError::Internal(format!( + "The {fun} function can only accept list as the first argument" + ))), + }, + BuiltinScalarFunction::ArrayReplace => match &input_expr_types[0] { + DataType::List(field) => Ok(DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + )))), + _ => Err(DataFusionError::Internal(format!( + "The {fun} function can only accept list as the first argument" + ))), + }, + BuiltinScalarFunction::ArrayToString => match &input_expr_types[0] { + DataType::List(field) => Ok(DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + )))), + _ => Err(DataFusionError::Internal(format!( + "The {fun} function can only accept list as the first argument" + ))), + }, + BuiltinScalarFunction::Cardinality => Ok(DataType::UInt64), + BuiltinScalarFunction::MakeArray => Ok(DataType::List(Arc::new(Field::new( + "item", + input_expr_types[0].clone(), + true, + )))), + BuiltinScalarFunction::TrimArray => match &input_expr_types[0] { + DataType::List(field) => Ok(DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + )))), + _ => Err(DataFusionError::Internal(format!( + "The {fun} function can only accept list as the first argument" + ))), + }, BuiltinScalarFunction::Ascii => Ok(DataType::Int32), BuiltinScalarFunction::BitLength => { utf8_to_int_type(&input_expr_types[0], "bit_length") @@ -322,10 +404,21 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature { // for now, the list is small, as we do not have many built-in functions. match fun { - BuiltinScalarFunction::MakeArray => Signature::variadic( - array_expressions::SUPPORTED_ARRAY_TYPES.to_vec(), - fun.volatility(), - ), + BuiltinScalarFunction::ArrayAppend => Signature::any(2, fun.volatility()), + BuiltinScalarFunction::ArrayConcat => Signature::variadic_any(fun.volatility()), + BuiltinScalarFunction::ArrayDims => Signature::any(1, fun.volatility()), + BuiltinScalarFunction::ArrayFill => Signature::any(2, fun.volatility()), + BuiltinScalarFunction::ArrayLength => Signature::variadic_any(fun.volatility()), + BuiltinScalarFunction::ArrayNdims => Signature::any(1, fun.volatility()), + BuiltinScalarFunction::ArrayPosition => Signature::variadic_any(fun.volatility()), + BuiltinScalarFunction::ArrayPositions => Signature::any(2, fun.volatility()), + BuiltinScalarFunction::ArrayPrepend => Signature::any(2, fun.volatility()), + BuiltinScalarFunction::ArrayRemove => Signature::any(2, fun.volatility()), + BuiltinScalarFunction::ArrayReplace => Signature::variadic_any(fun.volatility()), + BuiltinScalarFunction::ArrayToString => Signature::variadic_any(fun.volatility()), + BuiltinScalarFunction::Cardinality => Signature::any(1, fun.volatility()), + BuiltinScalarFunction::MakeArray => Signature::variadic_any(fun.volatility()), + BuiltinScalarFunction::TrimArray => Signature::any(2, fun.volatility()), BuiltinScalarFunction::Struct => Signature::variadic( struct_expressions::SUPPORTED_STRUCT_TYPES.to_vec(), fun.volatility(), diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 7c41299e9e6f..631ca376fc05 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -18,7 +18,11 @@ //! Array expressions use arrow::array::*; -use arrow::datatypes::DataType; +use arrow::buffer::Buffer; +use arrow::compute; +use arrow::datatypes::{DataType, Field}; +use core::any::type_name; +use datafusion_common::ScalarValue; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::ColumnarValue; use std::sync::Arc; @@ -57,7 +61,7 @@ macro_rules! array { let builder = new_builder!($BUILDER_TYPE, args[0].len()); let mut builder = - FixedSizeListBuilder::<$BUILDER_TYPE>::new(builder, args.len() as i32); + ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, args.len()); // for each entry in the array for index in 0..args[0].len() { for arg in &args { @@ -77,11 +81,40 @@ fn array_array(args: &[ArrayRef]) -> Result { // do not accept 0 arguments. if args.is_empty() { return Err(DataFusionError::Internal( - "array requires at least one argument".to_string(), + "Array requires at least one argument".to_string(), )); } - let res = match args[0].data_type() { + let data_type = args[0].data_type(); + let res = match data_type { + DataType::List(..) => { + let arrays = + downcast_vec!(args, ListArray).collect::>>()?; + let len: i32 = arrays.len() as i32; + let capacity = + Capacities::Array(arrays.iter().map(|a| a.get_array_memory_size()).sum()); + let array_data: Vec<_> = + arrays.iter().map(|a| a.to_data()).collect::>(); + let array_data = array_data.iter().collect(); + let mut mutable = + MutableArrayData::with_capacities(array_data, false, capacity); + + for (i, a) in arrays.iter().enumerate() { + mutable.extend(i, 0, a.len()) + } + + let list_data_type = + DataType::List(Arc::new(Field::new("item", data_type.clone(), true))); + + let list_data = ArrayData::builder(list_data_type) + .len(1) + .add_buffer(Buffer::from_slice_ref([0, len])) + .add_child_data(mutable.freeze()) + .build() + .unwrap(); + + Arc::new(ListArray::from(list_data)) + } DataType::Utf8 => array!(args, StringArray, StringBuilder), DataType::LargeUtf8 => array!(args, LargeStringArray, LargeStringBuilder), DataType::Boolean => array!(args, BooleanArray, BooleanBuilder), @@ -101,6 +134,7 @@ fn array_array(args: &[ArrayRef]) -> Result { ))) } }; + Ok(res) } @@ -115,3 +149,1558 @@ pub fn array(values: &[ColumnarValue]) -> Result { .collect(); Ok(ColumnarValue::Array(array_array(arrays.as_slice())?)) } + +macro_rules! downcast_arg { + ($ARG:expr, $ARRAY_TYPE:ident) => {{ + $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| { + DataFusionError::Internal(format!( + "could not cast to {}", + type_name::<$ARRAY_TYPE>() + )) + })? + }}; +} + +macro_rules! append { + ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ + let child_array = + downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); + let concat = compute::concat(&[child_array, element])?; + let mut scalars = vec![]; + for i in 0..concat.len() { + scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array( + &concat, i, + )?)); + } + scalars + }}; +} + +/// Array_append SQL function +pub fn array_append(args: &[ColumnarValue]) -> Result { + if args.len() != 2 { + return Err(DataFusionError::Internal(format!( + "Array_append function requires two arguments, got {}", + args.len() + ))); + } + + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let element = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_append function requires scalar element".to_string(), + )) + } + }; + + let data_type = arr.data_type(); + let arrays = match data_type { + DataType::List(field) => { + match (field.data_type(), element.data_type()) { + (DataType::Utf8, DataType::Utf8) => append!(arr, element, StringArray), + (DataType::LargeUtf8, DataType::LargeUtf8) => append!(arr, element, LargeStringArray), + (DataType::Boolean, DataType::Boolean) => append!(arr, element, BooleanArray), + (DataType::Float32, DataType::Float32) => append!(arr, element, Float32Array), + (DataType::Float64, DataType::Float64) => append!(arr, element, Float64Array), + (DataType::Int8, DataType::Int8) => append!(arr, element, Int8Array), + (DataType::Int16, DataType::Int16) => append!(arr, element, Int16Array), + (DataType::Int32, DataType::Int32) => append!(arr, element, Int32Array), + (DataType::Int64, DataType::Int64) => append!(arr, element, Int64Array), + (DataType::UInt8, DataType::UInt8) => append!(arr, element, UInt8Array), + (DataType::UInt16, DataType::UInt16) => append!(arr, element, UInt16Array), + (DataType::UInt32, DataType::UInt32) => append!(arr, element, UInt32Array), + (DataType::UInt64, DataType::UInt64) => append!(arr, element, UInt64Array), + (array_data_type, element_data_type) => { + return Err(DataFusionError::NotImplemented(format!( + "Array_append is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'." + ))) + } + } + } + data_type => { + return Err(DataFusionError::Internal(format!( + "Array is not type '{data_type:?}'." + ))) + } + }; + + array(arrays.as_slice()) +} + +macro_rules! prepend { + ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ + let child_array = + downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); + let concat = compute::concat(&[element, child_array])?; + let mut scalars = vec![]; + for i in 0..concat.len() { + scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array( + &concat, i, + )?)); + } + scalars + }}; +} + +/// Array_prepend SQL function +pub fn array_prepend(args: &[ColumnarValue]) -> Result { + if args.len() != 2 { + return Err(DataFusionError::Internal(format!( + "Array_prepend function requires two arguments, got {}", + args.len() + ))); + } + + let element = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_prepend function requires scalar element".to_string(), + )) + } + }; + + let arr = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let data_type = arr.data_type(); + let arrays = match data_type { + DataType::List(field) => { + match (field.data_type(), element.data_type()) { + (DataType::Utf8, DataType::Utf8) => prepend!(arr, element, StringArray), + (DataType::LargeUtf8, DataType::LargeUtf8) => prepend!(arr, element, LargeStringArray), + (DataType::Boolean, DataType::Boolean) => prepend!(arr, element, BooleanArray), + (DataType::Float32, DataType::Float32) => prepend!(arr, element, Float32Array), + (DataType::Float64, DataType::Float64) => prepend!(arr, element, Float64Array), + (DataType::Int8, DataType::Int8) => prepend!(arr, element, Int8Array), + (DataType::Int16, DataType::Int16) => prepend!(arr, element, Int16Array), + (DataType::Int32, DataType::Int32) => prepend!(arr, element, Int32Array), + (DataType::Int64, DataType::Int64) => prepend!(arr, element, Int64Array), + (DataType::UInt8, DataType::UInt8) => prepend!(arr, element, UInt8Array), + (DataType::UInt16, DataType::UInt16) => prepend!(arr, element, UInt16Array), + (DataType::UInt32, DataType::UInt32) => prepend!(arr, element, UInt32Array), + (DataType::UInt64, DataType::UInt64) => prepend!(arr, element, UInt64Array), + (array_data_type, element_data_type) => { + return Err(DataFusionError::NotImplemented(format!( + "Array_prepend is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'." + ))) + } + } + } + data_type => { + return Err(DataFusionError::Internal(format!( + "Array is not type '{data_type:?}'." + ))) + } + }; + + array(arrays.as_slice()) +} + +/// Array_concat/Array_cat SQL function +pub fn array_concat(args: &[ColumnarValue]) -> Result { + let arrays: Vec = args + .iter() + .map(|x| match x { + ColumnarValue::Array(array) => array.clone(), + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + }) + .collect(); + let data_type = arrays[0].data_type(); + match data_type { + DataType::List(..) => { + let list_arrays = + downcast_vec!(arrays, ListArray).collect::>>()?; + let len: usize = list_arrays.iter().map(|a| a.values().len()).sum(); + let capacity = Capacities::Array(list_arrays.iter().map(|a| a.len()).sum()); + let array_data: Vec<_> = + list_arrays.iter().map(|a| a.to_data()).collect::>(); + let array_data = array_data.iter().collect(); + let mut mutable = + MutableArrayData::with_capacities(array_data, false, capacity); + + for (i, a) in list_arrays.iter().enumerate() { + mutable.extend(i, 0, a.len()) + } + + let builder = mutable.into_builder(); + let list = builder + .len(1) + .buffers(vec![Buffer::from_slice_ref([0, len as i32])]) + .build() + .unwrap(); + + return Ok(ColumnarValue::Array(Arc::new(make_array(list)))); + } + _ => Err(DataFusionError::NotImplemented(format!( + "Array is not type '{data_type:?}'." + ))), + } +} + +macro_rules! fill { + ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ + let arr = downcast_arg!($ARRAY, $ARRAY_TYPE); + + let mut acc = ColumnarValue::Scalar($ELEMENT); + for value in arr.iter().rev() { + match value { + Some(value) => { + let mut repeated = vec![]; + for _ in 0..value { + repeated.push(acc.clone()); + } + acc = array(repeated.as_slice()).unwrap(); + } + _ => { + return Err(DataFusionError::Internal(format!( + "Array_fill function requires non nullable array" + ))); + } + } + } + + acc + }}; +} + +/// Array_fill SQL function +pub fn array_fill(args: &[ColumnarValue]) -> Result { + if args.len() != 2 { + return Err(DataFusionError::Internal(format!( + "Array_fill function requires two arguments, got {}", + args.len() + ))); + } + + let element = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_fill function requires scalar element".to_string(), + )) + } + }; + + let arr = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let res = match arr.data_type() { + DataType::List(..) => { + let arr = downcast_arg!(arr, ListArray); + let array_values = arr.values(); + match arr.value_type() { + DataType::Int8 => fill!(array_values, element, Int8Array), + DataType::Int16 => fill!(array_values, element, Int16Array), + DataType::Int32 => fill!(array_values, element, Int32Array), + DataType::Int64 => fill!(array_values, element, Int64Array), + DataType::UInt8 => fill!(array_values, element, UInt8Array), + DataType::UInt16 => fill!(array_values, element, UInt16Array), + DataType::UInt32 => fill!(array_values, element, UInt32Array), + DataType::UInt64 => fill!(array_values, element, UInt64Array), + data_type => { + return Err(DataFusionError::Internal(format!( + "Array_fill is not implemented for type '{data_type:?}'." + ))); + } + } + } + data_type => { + return Err(DataFusionError::Internal(format!( + "Array is not type '{data_type:?}'." + ))); + } + }; + + Ok(res) +} + +macro_rules! position { + ($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{ + let child_array = + downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0); + + match child_array + .iter() + .skip($INDEX) + .position(|x| x == Some(element)) + { + Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some( + (value + $INDEX + 1) as u8, + )))), + None => Ok(ColumnarValue::Scalar(ScalarValue::Null)), + } + }}; +} + +/// Array_position SQL function +pub fn array_position(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let element = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_position function requires scalar element".to_string(), + )) + } + }; + + let mut index: usize = 0; + if args.len() == 3 { + let scalar = + match &args[2] { + ColumnarValue::Scalar(scalar) => scalar.clone(), + _ => return Err(DataFusionError::Internal( + "Array_position function requires positive integer scalar element" + .to_string(), + )), + }; + + index = + match scalar { + ScalarValue::Int8(Some(value)) => value as usize, + ScalarValue::Int16(Some(value)) => value as usize, + ScalarValue::Int32(Some(value)) => value as usize, + ScalarValue::Int64(Some(value)) => value as usize, + ScalarValue::UInt8(Some(value)) => value as usize, + ScalarValue::UInt16(Some(value)) => value as usize, + ScalarValue::UInt32(Some(value)) => value as usize, + ScalarValue::UInt64(Some(value)) => value as usize, + _ => return Err(DataFusionError::Internal( + "Array_position function requires positive integer scalar element" + .to_string(), + )), + }; + + if index == 0 { + index = 0; + } else { + index -= 1; + } + } + + match arr.data_type() { + DataType::List(field) => match field.data_type() { + DataType::Utf8 => position!(arr, element, index, StringArray), + DataType::LargeUtf8 => position!(arr, element, index, LargeStringArray), + DataType::Boolean => position!(arr, element, index, BooleanArray), + DataType::Float32 => position!(arr, element, index, Float32Array), + DataType::Float64 => position!(arr, element, index, Float64Array), + DataType::Int8 => position!(arr, element, index, Int8Array), + DataType::Int16 => position!(arr, element, index, Int16Array), + DataType::Int32 => position!(arr, element, index, Int32Array), + DataType::Int64 => position!(arr, element, index, Int64Array), + DataType::UInt8 => position!(arr, element, index, UInt8Array), + DataType::UInt16 => position!(arr, element, index, UInt16Array), + DataType::UInt32 => position!(arr, element, index, UInt32Array), + DataType::UInt64 => position!(arr, element, index, UInt64Array), + data_type => Err(DataFusionError::NotImplemented(format!( + "Array_position is not implemented for types '{data_type:?}'." + ))), + }, + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not type '{data_type:?}'." + ))), + } +} + +macro_rules! positions { + ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ + let child_array = + downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0); + + let mut res = vec![]; + for (i, x) in child_array.iter().enumerate() { + if x == Some(element) { + res.push(ScalarValue::UInt8(Some((i + 1) as u8))); + } + } + + let field = Arc::new(Field::new("item", DataType::UInt8, true)); + Ok(ColumnarValue::Scalar(ScalarValue::List(Some(res), field))) + }}; +} + +/// Array_positions SQL function +pub fn array_positions(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let element = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_positions function requires scalar element".to_string(), + )) + } + }; + + match arr.data_type() { + DataType::List(field) => match field.data_type() { + DataType::Utf8 => positions!(arr, element, StringArray), + DataType::LargeUtf8 => positions!(arr, element, LargeStringArray), + DataType::Boolean => positions!(arr, element, BooleanArray), + DataType::Float32 => positions!(arr, element, Float32Array), + DataType::Float64 => positions!(arr, element, Float64Array), + DataType::Int8 => positions!(arr, element, Int8Array), + DataType::Int16 => positions!(arr, element, Int16Array), + DataType::Int32 => positions!(arr, element, Int32Array), + DataType::Int64 => positions!(arr, element, Int64Array), + DataType::UInt8 => positions!(arr, element, UInt8Array), + DataType::UInt16 => positions!(arr, element, UInt16Array), + DataType::UInt32 => positions!(arr, element, UInt32Array), + DataType::UInt64 => positions!(arr, element, UInt64Array), + data_type => Err(DataFusionError::NotImplemented(format!( + "Array_positions is not implemented for types '{data_type:?}'." + ))), + }, + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not type '{data_type:?}'." + ))), + } +} + +macro_rules! remove { + ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{ + let child_array = + downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0); + let mut builder = new_builder!($BUILDER_TYPE, child_array.len()); + + for x in child_array { + match x { + Some(x) => { + if x != element { + builder.append_value(x); + } + } + None => builder.append_null(), + } + } + let arr = builder.finish(); + + let mut scalars = vec![]; + for i in 0..arr.len() { + scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&arr, i)?)); + } + scalars + }}; +} + +/// Array_remove SQL function +pub fn array_remove(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let element = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_remove function requires scalar element".to_string(), + )) + } + }; + + let data_type = arr.data_type(); + let res = match data_type { + DataType::List(field) => { + match (field.data_type(), element.data_type()) { + (DataType::Utf8, DataType::Utf8) => remove!(arr, element, StringArray, StringBuilder), + (DataType::LargeUtf8, DataType::LargeUtf8) => remove!(arr, element, LargeStringArray, LargeStringBuilder), + (DataType::Boolean, DataType::Boolean) => remove!(arr, element, BooleanArray, BooleanBuilder), + (DataType::Float32, DataType::Float32) => remove!(arr, element, Float32Array, Float32Builder), + (DataType::Float64, DataType::Float64) => remove!(arr, element, Float64Array, Float64Builder), + (DataType::Int8, DataType::Int8) => remove!(arr, element, Int8Array, Int8Builder), + (DataType::Int16, DataType::Int16) => remove!(arr, element, Int16Array, Int16Builder), + (DataType::Int32, DataType::Int32) => remove!(arr, element, Int32Array, Int32Builder), + (DataType::Int64, DataType::Int64) => remove!(arr, element, Int64Array, Int64Builder), + (DataType::UInt8, DataType::UInt8) => remove!(arr, element, UInt8Array, UInt8Builder), + (DataType::UInt16, DataType::UInt16) => remove!(arr, element, UInt16Array, UInt16Builder), + (DataType::UInt32, DataType::UInt32) => remove!(arr, element, UInt32Array, UInt32Builder), + (DataType::UInt64, DataType::UInt64) => remove!(arr, element, UInt64Array, UInt64Builder), + (array_data_type, element_data_type) => { + return Err(DataFusionError::NotImplemented(format!( + "Array_remove is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'." + ))) + } + } + } + data_type => { + return Err(DataFusionError::Internal(format!( + "Array is not type '{data_type:?}'." + ))) + } + }; + + array(res.as_slice()) +} + +macro_rules! replace { + ($ARRAY:expr, $FROM:expr, $TO:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{ + let child_array = + downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let from = downcast_arg!($FROM, $ARRAY_TYPE).value(0); + let to = downcast_arg!($TO, $ARRAY_TYPE).value(0); + let mut builder = new_builder!($BUILDER_TYPE, child_array.len()); + + for x in child_array { + match x { + Some(x) => { + if x == from { + builder.append_value(to); + } else { + builder.append_value(x); + } + } + None => builder.append_null(), + } + } + let arr = builder.finish(); + + let mut scalars = vec![]; + for i in 0..arr.len() { + scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&arr, i)?)); + } + scalars + }}; +} + +/// Array_replace SQL function +pub fn array_replace(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let from = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + _ => { + return Err(DataFusionError::Internal( + "array_replace function requires scalar element".to_string(), + )) + } + }; + + let to = match &args[2] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + _ => { + return Err(DataFusionError::Internal( + "array_replace function requires scalar element".to_string(), + )) + } + }; + + if from.data_type() != to.data_type() { + return Err(DataFusionError::Internal( + "array_replace function requires scalar element".to_string(), + )); + } + + let data_type = arr.data_type(); + let res = match data_type { + DataType::List(field) => { + match (field.data_type(), from.data_type()) { + (DataType::Utf8, DataType::Utf8) => replace!(arr, from, to, StringArray, StringBuilder), + (DataType::LargeUtf8, DataType::LargeUtf8) => replace!(arr, from, to, LargeStringArray, LargeStringBuilder), + (DataType::Boolean, DataType::Boolean) => replace!(arr, from, to, BooleanArray, BooleanBuilder), + (DataType::Float32, DataType::Float32) => replace!(arr, from, to, Float32Array, Float32Builder), + (DataType::Float64, DataType::Float64) => replace!(arr, from, to, Float64Array, Float64Builder), + (DataType::Int8, DataType::Int8) => replace!(arr, from, to, Int8Array, Int8Builder), + (DataType::Int16, DataType::Int16) => replace!(arr, from, to, Int16Array, Int16Builder), + (DataType::Int32, DataType::Int32) => replace!(arr, from, to, Int32Array, Int32Builder), + (DataType::Int64, DataType::Int64) => replace!(arr, from, to, Int64Array, Int64Builder), + (DataType::UInt8, DataType::UInt8) => replace!(arr, from, to, UInt8Array, UInt8Builder), + (DataType::UInt16, DataType::UInt16) => replace!(arr, from, to, UInt16Array, UInt16Builder), + (DataType::UInt32, DataType::UInt32) => replace!(arr, from, to, UInt32Array, UInt32Builder), + (DataType::UInt64, DataType::UInt64) => replace!(arr, from, to, UInt64Array, UInt64Builder), + (array_data_type, element_data_type) => { + return Err(DataFusionError::NotImplemented(format!( + "Array_replace is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'." + ))) + } + } + } + data_type => { + return Err(DataFusionError::Internal(format!( + "Array is not type '{data_type:?}'." + ))) + } + }; + + array(res.as_slice()) +} + +macro_rules! to_string { + ($ARG:expr, $ARRAY:expr, $DELIMETER:expr, $ARRAY_TYPE:ident) => {{ + let arr = downcast_arg!($ARRAY, $ARRAY_TYPE); + for x in arr { + match x { + Some(x) => { + $ARG.push_str(&x.to_string()); + $ARG.push_str($DELIMETER); + } + None => {} + } + } + + Ok($ARG) + }}; +} + +/// Array_to_string SQL function +pub fn array_to_string(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let scalar = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_to_string function requires scalar element".to_string(), + )) + } + }; + + let delimeter = match scalar { + ScalarValue::Utf8(Some(value)) => String::from(&value), + _ => { + return Err(DataFusionError::Internal( + "Array_to_string function requires positive integer scalar element" + .to_string(), + )) + } + }; + + fn compute_array_to_string( + arg: &mut String, + arr: ArrayRef, + delimeter: String, + ) -> Result<&mut String> { + match arr.data_type() { + DataType::List(..) => { + let list_array = downcast_arg!(arr, ListArray); + + for i in 0..list_array.len() { + compute_array_to_string(arg, list_array.value(i), delimeter.clone())?; + } + + Ok(arg) + } + DataType::Utf8 => to_string!(arg, arr, &delimeter, StringArray), + DataType::LargeUtf8 => to_string!(arg, arr, &delimeter, LargeStringArray), + DataType::Boolean => to_string!(arg, arr, &delimeter, BooleanArray), + DataType::Float32 => to_string!(arg, arr, &delimeter, Float32Array), + DataType::Float64 => to_string!(arg, arr, &delimeter, Float64Array), + DataType::Int8 => to_string!(arg, arr, &delimeter, Int8Array), + DataType::Int16 => to_string!(arg, arr, &delimeter, Int16Array), + DataType::Int32 => to_string!(arg, arr, &delimeter, Int32Array), + DataType::Int64 => to_string!(arg, arr, &delimeter, Int64Array), + DataType::UInt8 => to_string!(arg, arr, &delimeter, UInt8Array), + DataType::UInt16 => to_string!(arg, arr, &delimeter, UInt16Array), + DataType::UInt32 => to_string!(arg, arr, &delimeter, UInt32Array), + DataType::UInt64 => to_string!(arg, arr, &delimeter, UInt64Array), + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not implemented for type '{data_type:?}'." + ))), + } + } + + let mut arg = String::from(""); + let mut res = compute_array_to_string(&mut arg, arr, delimeter.clone())?.clone(); + res.truncate(res.len() - delimeter.len()); + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(res)))) +} + +/// Trim_array SQL function +pub fn trim_array(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + let scalar = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.clone(), + _ => { + return Err(DataFusionError::Internal( + "Trim_array function requires positive integer scalar element" + .to_string(), + )) + } + }; + + let n = match scalar { + ScalarValue::Int8(Some(value)) => value as usize, + ScalarValue::Int16(Some(value)) => value as usize, + ScalarValue::Int32(Some(value)) => value as usize, + ScalarValue::Int64(Some(value)) => value as usize, + ScalarValue::UInt8(Some(value)) => value as usize, + ScalarValue::UInt16(Some(value)) => value as usize, + ScalarValue::UInt32(Some(value)) => value as usize, + ScalarValue::UInt64(Some(value)) => value as usize, + _ => { + return Err(DataFusionError::Internal( + "Trim_array function requires positive integer scalar element" + .to_string(), + )) + } + }; + + let list_array = downcast_arg!(arr, ListArray); + let values = list_array.value(0); + let res = values.slice(0, values.len() - n); + + let mut scalars = vec![]; + for i in 0..res.len() { + scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&res, i)?)); + } + array(scalars.as_slice()) +} + +/// Cardinality SQL function +pub fn cardinality(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + + fn compute_cardinality(arg: &mut u64, arr: ArrayRef) -> Result<&mut u64> { + match arr.data_type() { + DataType::List(..) => { + let list_array = downcast_arg!(arr, ListArray); + for i in 0..list_array.len() { + compute_cardinality(arg, list_array.value(i))?; + } + + Ok(arg) + } + DataType::Null + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Boolean + | DataType::Float32 + | DataType::Float64 + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => { + *arg += arr.len() as u64; + Ok(arg) + } + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not implemented for type '{data_type:?}'." + ))), + } + } + let mut arg: u64 = 0; + Ok(ColumnarValue::Array(Arc::new(UInt64Array::from(vec![ + *compute_cardinality(&mut arg, arr)?, + ])))) +} + +/// Array_length SQL function +pub fn array_length(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + ColumnarValue::Array(arr) => arr.clone(), + }; + let mut element: u8 = 1; + if args.len() == 2 { + let scalar = match &args[1] { + ColumnarValue::Scalar(scalar) => scalar.clone(), + _ => { + return Err(DataFusionError::Internal( + "Array_length function requires positive integer scalar element" + .to_string(), + )) + } + }; + + element = match scalar { + ScalarValue::Int8(Some(value)) => value as u8, + ScalarValue::Int16(Some(value)) => value as u8, + ScalarValue::Int32(Some(value)) => value as u8, + ScalarValue::Int64(Some(value)) => value as u8, + ScalarValue::UInt8(Some(value)) => value, + ScalarValue::UInt16(Some(value)) => value as u8, + ScalarValue::UInt32(Some(value)) => value as u8, + ScalarValue::UInt64(Some(value)) => value as u8, + _ => { + return Err(DataFusionError::Internal( + "Array_length function requires positive integer scalar element" + .to_string(), + )) + } + }; + + if element == 0 { + return Err(DataFusionError::Internal( + "Array_length function requires positive integer scalar element" + .to_string(), + )); + } + } + + fn compute_array_length(arg: u8, array: ArrayRef, element: u8) -> Result> { + match array.data_type() { + DataType::List(..) => { + let list_array = downcast_arg!(array, ListArray); + if arg == element + 1 { + Ok(Some(list_array.len() as u8)) + } else { + compute_array_length(arg + 1, list_array.value(0), element) + } + } + DataType::Null + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Boolean + | DataType::Float32 + | DataType::Float64 + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => { + if arg == element + 1 { + Ok(Some(array.len() as u8)) + } else { + Ok(None) + } + } + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not implemented for type '{data_type:?}'." + ))), + } + } + let arg: u8 = 1; + Ok(ColumnarValue::Array(Arc::new(UInt8Array::from(vec![ + compute_array_length(arg, arr, element)?, + ])))) +} + +/// Array_dims SQL function +pub fn array_dims(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Array(arr) => arr.clone(), + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + }; + + fn compute_array_dims( + arg: &mut Vec, + arr: ArrayRef, + ) -> Result<&mut Vec> { + match arr.data_type() { + DataType::List(..) => { + let list_array = downcast_arg!(arr, ListArray).value(0); + arg.push(ScalarValue::UInt8(Some(list_array.len() as u8))); + return compute_array_dims(arg, list_array); + } + DataType::Null + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Boolean + | DataType::Float32 + | DataType::Float64 + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => Ok(arg), + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not implemented for type '{data_type:?}'." + ))), + } + } + + let list_field = Arc::new(Field::new("item", DataType::UInt8, true)); + let mut arg: Vec = vec![]; + Ok(ColumnarValue::Scalar(ScalarValue::List( + Some(compute_array_dims(&mut arg, arr)?.clone()), + list_field, + ))) +} + +/// Array_ndims SQL function +pub fn array_ndims(args: &[ColumnarValue]) -> Result { + let arr = match &args[0] { + ColumnarValue::Array(arr) => arr.clone(), + ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), + }; + + fn compute_array_ndims(arg: u8, arr: ArrayRef) -> Result { + match arr.data_type() { + DataType::List(..) => { + let list_array = downcast_arg!(arr, ListArray); + compute_array_ndims(arg + 1, list_array.value(0)) + } + DataType::Null + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Boolean + | DataType::Float32 + | DataType::Float64 + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => Ok(arg), + data_type => Err(DataFusionError::NotImplemented(format!( + "Array is not implemented for type '{data_type:?}'." + ))), + } + } + let arg: u8 = 0; + Ok(ColumnarValue::Array(Arc::new(UInt8Array::from(vec![ + compute_array_ndims(arg, arr)?, + ])))) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::UInt8Array; + use datafusion_common::cast::{ + as_generic_string_array, as_list_array, as_uint64_array, as_uint8_array, + }; + use datafusion_common::scalar::ScalarValue; + + #[test] + fn test_array() { + // make_array(1, 2, 3) = [1, 2, 3] + let args = [ + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ]; + let array = array(&args) + .expect("failed to initialize function array") + .into_array(1); + let result = as_list_array(&array).expect("failed to initialize function array"); + assert_eq!(result.len(), 1); + assert_eq!( + &[1, 2, 3], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ) + } + + #[test] + fn test_nested_array() { + // make_array([1, 3, 5], [2, 4, 6]) = [[1, 3, 5], [2, 4, 6]] + let args = [ + ColumnarValue::Array(Arc::new(Int64Array::from(vec![1, 2]))), + ColumnarValue::Array(Arc::new(Int64Array::from(vec![3, 4]))), + ColumnarValue::Array(Arc::new(Int64Array::from(vec![5, 6]))), + ]; + let array = array(&args) + .expect("failed to initialize function array") + .into_array(1); + let result = as_list_array(&array).expect("failed to initialize function array"); + assert_eq!(result.len(), 2); + assert_eq!( + &[1, 3, 5], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + assert_eq!( + &[2, 4, 6], + result + .value(1) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_append() { + // array_append([1, 2, 3], 4) = [1, 2, 3, 4] + let args = [ + ColumnarValue::Scalar(ScalarValue::List( + Some(vec![ + ScalarValue::Int64(Some(1)), + ScalarValue::Int64(Some(2)), + ScalarValue::Int64(Some(3)), + ]), + Arc::new(Field::new("item", DataType::Int64, false)), + )), + ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + ]; + + let array = array_append(&args) + .expect("failed to initialize function array_append") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_append"); + + assert_eq!( + &[1, 2, 3, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_prepend() { + // array_prepend(1, [2, 3, 4]) = [1, 2, 3, 4] + let args = [ + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ColumnarValue::Scalar(ScalarValue::List( + Some(vec![ + ScalarValue::Int64(Some(2)), + ScalarValue::Int64(Some(3)), + ScalarValue::Int64(Some(4)), + ]), + Arc::new(Field::new("item", DataType::Int64, false)), + )), + ]; + + let array = array_prepend(&args) + .expect("failed to initialize function array_append") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_append"); + + assert_eq!( + &[1, 2, 3, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_concat() { + // array_concat([1, 2, 3], [4, 5, 6], [7, 8, 9]) = [1, 2, 3, 4, 5, 6, 7, 8, 9] + let args = [ + ColumnarValue::Scalar(ScalarValue::List( + Some(vec![ + ScalarValue::Int64(Some(1)), + ScalarValue::Int64(Some(2)), + ScalarValue::Int64(Some(3)), + ]), + Arc::new(Field::new("item", DataType::Int64, false)), + )), + ColumnarValue::Scalar(ScalarValue::List( + Some(vec![ + ScalarValue::Int64(Some(4)), + ScalarValue::Int64(Some(5)), + ScalarValue::Int64(Some(6)), + ]), + Arc::new(Field::new("item", DataType::Int64, false)), + )), + ColumnarValue::Scalar(ScalarValue::List( + Some(vec![ + ScalarValue::Int64(Some(7)), + ScalarValue::Int64(Some(8)), + ScalarValue::Int64(Some(9)), + ]), + Arc::new(Field::new("item", DataType::Int64, false)), + )), + ]; + + let array = array_concat(&args) + .expect("failed to initialize function array_concat") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_concat"); + + assert_eq!( + &[1, 2, 3, 4, 5, 6, 7, 8, 9], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_fill() { + // array_fill(4, [5]) = [4, 4, 4, 4, 4] + let args = [ + ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + ColumnarValue::Scalar(ScalarValue::List( + Some(vec![ScalarValue::Int64(Some(5))]), + Arc::new(Field::new("item", DataType::Int64, false)), + )), + ]; + + let array = array_fill(&args) + .expect("failed to initialize function array_fill") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_fill"); + + assert_eq!(result.len(), 1); + assert_eq!( + &[4, 4, 4, 4, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_position() { + // array_position([1, 2, 3, 4], 3) = 3 + let list_array = return_array(); + let array = array_position(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ]) + .expect("failed to initialize function array_position") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_position"); + + assert_eq!(result, &UInt8Array::from(vec![3])); + } + + #[test] + fn test_array_positions() { + // array_positions([1, 2, 3, 4], 3) = [3] + let list_array = return_array(); + let array = array_positions(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ]) + .expect("failed to initialize function array_position") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_position"); + + assert_eq!(result.len(), 1); + assert_eq!( + &[3], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_remove() { + // array_remove([1, 2, 3, 4], 3) = [1, 2, 4] + let list_array = return_array(); + let arr = array_remove(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ]) + .expect("failed to initialize function array_remove") + .into_array(1); + let result = + as_list_array(&arr).expect("failed to initialize function array_remove"); + + assert_eq!(result.len(), 1); + assert_eq!( + &[1, 2, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_replace() { + // array_replace([1, 2, 3, 4], 3, 4) = [1, 2, 4, 4] + let list_array = return_array(); + let array = array_replace(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + ]) + .expect("failed to initialize function array_replace") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_replace"); + + assert_eq!(result.len(), 1); + assert_eq!( + &[1, 2, 4, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_to_string() { + // array_to_string([1, 2, 3, 4], ',') = 1,2,3,4 + let list_array = return_array(); + let array = array_to_string(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from(",")))), + ]) + .expect("failed to initialize function array_to_string") + .into_array(1); + let result = as_generic_string_array::(&array) + .expect("failed to initialize function array_to_string"); + + assert_eq!(result.len(), 1); + assert_eq!("1,2,3,4", result.value(0)); + } + + #[test] + fn test_nested_array_to_string() { + // array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], '-') = 1-2-3-4-5-6-7-8 + let list_array = return_nested_array(); + let array = array_to_string(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("-")))), + ]) + .expect("failed to initialize function array_to_string") + .into_array(1); + let result = as_generic_string_array::(&array) + .expect("failed to initialize function array_to_string"); + + assert_eq!(result.len(), 1); + assert_eq!("1-2-3-4-5-6-7-8", result.value(0)); + } + + #[test] + fn test_trim_array() { + // trim_array([1, 2, 3, 4], 1) = [1, 2, 3] + let list_array = return_array(); + let arr = trim_array(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ]) + .expect("failed to initialize function trim_array") + .into_array(1); + let result = + as_list_array(&arr).expect("failed to initialize function trim_array"); + + assert_eq!(result.len(), 1); + assert_eq!( + &[1, 2, 3], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + + // trim_array([1, 2, 3, 4], 3) = [1] + let list_array = return_array(); + let arr = trim_array(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ]) + .expect("failed to initialize function trim_array") + .into_array(1); + let result = + as_list_array(&arr).expect("failed to initialize function trim_array"); + + assert_eq!(result.len(), 1); + assert_eq!( + &[1], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_nested_trim_array() { + // trim_array([[1, 2, 3, 4], [5, 6, 7, 8]], 1) = [[1, 2, 3, 4]] + let list_array = return_nested_array(); + let arr = trim_array(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ]) + .expect("failed to initialize function trim_array") + .into_array(1); + let binding = as_list_array(&arr) + .expect("failed to initialize function trim_array") + .value(0); + let result = + as_list_array(&binding).expect("failed to initialize function trim_array"); + + assert_eq!(result.len(), 1); + assert_eq!( + &[1, 2, 3, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_cardinality() { + // cardinality([1, 2, 3, 4]) = 4 + let list_array = return_array(); + let arr = cardinality(&[list_array]) + .expect("failed to initialize function cardinality") + .into_array(1); + let result = + as_uint64_array(&arr).expect("failed to initialize function cardinality"); + + assert_eq!(result, &UInt64Array::from(vec![4])); + } + + #[test] + fn test_nested_cardinality() { + // cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]) = 8 + let list_array = return_nested_array(); + let arr = cardinality(&[list_array]) + .expect("failed to initialize function cardinality") + .into_array(1); + let result = + as_uint64_array(&arr).expect("failed to initialize function cardinality"); + + assert_eq!(result, &UInt64Array::from(vec![8])); + } + + #[test] + fn test_array_length() { + // array_length([1, 2, 3, 4]) = 4 + let list_array = return_array(); + let array = array_length(&[list_array.clone()]) + .expect("failed to initialize function array_ndims") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_ndims"); + + assert_eq!(result, &UInt8Array::from(vec![4])); + + // array_length([1, 2, 3, 4], 1) = 2 + let array = array_length(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::UInt8(Some(1_u8))), + ]) + .expect("failed to initialize function array_ndims") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_ndims"); + + assert_eq!(result, &UInt8Array::from(vec![4])); + } + + #[test] + fn test_nested_array_length() { + let list_array = return_nested_array(); + + // array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 1) = 2 + let array = array_length(&[ + list_array.clone(), + ColumnarValue::Scalar(ScalarValue::UInt8(Some(1_u8))), + ]) + .expect("failed to initialize function array_length") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_length"); + + assert_eq!(result, &UInt8Array::from(vec![2])); + + // array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 2) = 4 + let array = array_length(&[ + list_array.clone(), + ColumnarValue::Scalar(ScalarValue::UInt8(Some(2_u8))), + ]) + .expect("failed to initialize function array_length") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_length"); + + assert_eq!(result, &UInt8Array::from(vec![4])); + + // array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 3) = NULL + let array = array_length(&[ + list_array, + ColumnarValue::Scalar(ScalarValue::UInt8(Some(3_u8))), + ]) + .expect("failed to initialize function array_length") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_length"); + + assert_eq!(result, &UInt8Array::from(vec![None])); + } + + #[test] + fn test_array_dims() { + // array_dims([1, 2, 3, 4]) = [4] + let list_array = return_array(); + + let array = array_dims(&[list_array]) + .expect("failed to initialize function array_dims") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_dims"); + + assert_eq!( + &[4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_nested_array_dims() { + // array_dims([[1, 2, 3, 4], [5, 6, 7, 8]]) = [2, 4] + let list_array = return_nested_array(); + + let array = array_dims(&[list_array]) + .expect("failed to initialize function array_dims") + .into_array(1); + let result = + as_list_array(&array).expect("failed to initialize function array_dims"); + + assert_eq!( + &[2, 4], + result + .value(0) + .as_any() + .downcast_ref::() + .unwrap() + .values() + ); + } + + #[test] + fn test_array_ndims() { + // array_ndims([1, 2]) = 1 + let list_array = return_array(); + + let array = array_ndims(&[list_array]) + .expect("failed to initialize function array_ndims") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_ndims"); + + assert_eq!(result, &UInt8Array::from(vec![1])); + } + + #[test] + fn test_nested_array_ndims() { + // array_ndims([[1, 2], [3, 4]]) = 2 + let list_array = return_nested_array(); + + let array = array_ndims(&[list_array]) + .expect("failed to initialize function array_ndims") + .into_array(1); + let result = + as_uint8_array(&array).expect("failed to initialize function array_ndims"); + + assert_eq!(result, &UInt8Array::from(vec![2])); + } + + fn return_array() -> ColumnarValue { + let args = [ + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + ]; + let result = array(&args) + .expect("failed to initialize function array") + .into_array(1); + ColumnarValue::Array(result.clone()) + } + + fn return_nested_array() -> ColumnarValue { + let args = [ + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + ]; + let arr1 = array(&args) + .expect("failed to initialize function array") + .into_array(1); + + let args = [ + ColumnarValue::Scalar(ScalarValue::Int64(Some(5))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(6))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(7))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(8))), + ]; + let arr2 = array(&args) + .expect("failed to initialize function array") + .into_array(1); + + let args = [ColumnarValue::Array(arr1), ColumnarValue::Array(arr2)]; + let result = array(&args) + .expect("failed to initialize function array") + .into_array(1); + ColumnarValue::Array(result.clone()) + } +} diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 7020dda8b122..15728854ff3d 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -384,8 +384,30 @@ pub fn create_physical_fun( Arc::new(|args| make_scalar_function(math_expressions::log)(args)) } - // string functions + // array functions + BuiltinScalarFunction::ArrayAppend => Arc::new(array_expressions::array_append), + BuiltinScalarFunction::ArrayConcat => Arc::new(array_expressions::array_concat), + BuiltinScalarFunction::ArrayDims => Arc::new(array_expressions::array_dims), + BuiltinScalarFunction::ArrayFill => Arc::new(array_expressions::array_fill), + BuiltinScalarFunction::ArrayLength => Arc::new(array_expressions::array_length), + BuiltinScalarFunction::ArrayNdims => Arc::new(array_expressions::array_ndims), + BuiltinScalarFunction::ArrayPosition => { + Arc::new(array_expressions::array_position) + } + BuiltinScalarFunction::ArrayPositions => { + Arc::new(array_expressions::array_positions) + } + BuiltinScalarFunction::ArrayPrepend => Arc::new(array_expressions::array_prepend), + BuiltinScalarFunction::ArrayRemove => Arc::new(array_expressions::array_remove), + BuiltinScalarFunction::ArrayReplace => Arc::new(array_expressions::array_replace), + BuiltinScalarFunction::ArrayToString => { + Arc::new(array_expressions::array_to_string) + } + BuiltinScalarFunction::Cardinality => Arc::new(array_expressions::cardinality), BuiltinScalarFunction::MakeArray => Arc::new(array_expressions::array), + BuiltinScalarFunction::TrimArray => Arc::new(array_expressions::trim_array), + + // string functions BuiltinScalarFunction::Struct => Arc::new(struct_expressions::struct_expr), BuiltinScalarFunction::Ascii => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { @@ -793,12 +815,12 @@ mod tests { use arrow::{ array::{ Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float64Array, - Int32Array, StringArray, UInt32Array, UInt64Array, + Int32Array, StringArray, UInt64Array, }, datatypes::Field, record_batch::RecordBatch, }; - use datafusion_common::cast::{as_fixed_size_list_array, as_uint64_array}; + use datafusion_common::cast::as_uint64_array; use datafusion_common::{Result, ScalarValue}; /// $FUNC function to test @@ -2785,73 +2807,6 @@ mod tests { Ok(()) } - fn generic_test_array( - value1: ArrayRef, - value2: ArrayRef, - expected_type: DataType, - expected: &str, - ) -> Result<()> { - // any type works here: we evaluate against a literal of `value` - let schema = Schema::new(vec![ - Field::new("a", value1.data_type().clone(), false), - Field::new("b", value2.data_type().clone(), false), - ]); - let columns: Vec = vec![value1, value2]; - let execution_props = ExecutionProps::new(); - - let expr = create_physical_expr_with_type_coercion( - &BuiltinScalarFunction::MakeArray, - &[col("a", &schema)?, col("b", &schema)?], - &schema, - &execution_props, - )?; - - // type is correct - assert_eq!( - expr.data_type(&schema)?, - // type equals to a common coercion - DataType::FixedSizeList(Arc::new(Field::new("item", expected_type, true)), 2) - ); - - // evaluate works - let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?; - let result = expr.evaluate(&batch)?.into_array(batch.num_rows()); - - // downcast works - let result = as_fixed_size_list_array(&result)?; - - // value is correct - assert_eq!(format!("{:?}", result.value(0)), expected); - - Ok(()) - } - - #[test] - fn test_array() -> Result<()> { - generic_test_array( - Arc::new(StringArray::from_slice(["aa"])), - Arc::new(StringArray::from_slice(["bb"])), - DataType::Utf8, - "StringArray\n[\n \"aa\",\n \"bb\",\n]", - )?; - - // different types, to validate that casting happens - generic_test_array( - Arc::new(UInt32Array::from_slice([1u32])), - Arc::new(UInt64Array::from_slice([1u64])), - DataType::UInt64, - "PrimitiveArray\n[\n 1,\n 1,\n]", - )?; - - // different types (another order), to validate that casting happens - generic_test_array( - Arc::new(UInt64Array::from_slice([1u64])), - Arc::new(UInt32Array::from_slice([1u32])), - DataType::UInt64, - "PrimitiveArray\n[\n 1,\n 1,\n]", - ) - } - #[test] #[cfg(feature = "regex_expressions")] fn test_regexp_match() -> Result<()> { diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index c23d585e61d7..9b05dea71294 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -549,6 +549,20 @@ enum ScalarFunction { Factorial = 83; Lcm = 84; Gcd = 85; + ArrayAppend = 86; + ArrayConcat = 87; + ArrayDims = 88; + ArrayFill = 89; + ArrayLength = 90; + ArrayNdims = 91; + ArrayPosition = 92; + ArrayPositions = 93; + ArrayPrepend = 94; + ArrayRemove = 95; + ArrayReplace = 96; + ArrayToString = 97; + Cardinality = 98; + TrimArray = 99; } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 369cc0b24e71..890fe7221a8e 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -17851,6 +17851,20 @@ impl serde::Serialize for ScalarFunction { Self::Factorial => "Factorial", Self::Lcm => "Lcm", Self::Gcd => "Gcd", + Self::ArrayAppend => "ArrayAppend", + Self::ArrayConcat => "ArrayConcat", + Self::ArrayDims => "ArrayDims", + Self::ArrayFill => "ArrayFill", + Self::ArrayLength => "ArrayLength", + Self::ArrayNdims => "ArrayNdims", + Self::ArrayPosition => "ArrayPosition", + Self::ArrayPositions => "ArrayPositions", + Self::ArrayPrepend => "ArrayPrepend", + Self::ArrayRemove => "ArrayRemove", + Self::ArrayReplace => "ArrayReplace", + Self::ArrayToString => "ArrayToString", + Self::Cardinality => "Cardinality", + Self::TrimArray => "TrimArray", }; serializer.serialize_str(variant) } @@ -17948,6 +17962,20 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Factorial", "Lcm", "Gcd", + "ArrayAppend", + "ArrayConcat", + "ArrayDims", + "ArrayFill", + "ArrayLength", + "ArrayNdims", + "ArrayPosition", + "ArrayPositions", + "ArrayPrepend", + "ArrayRemove", + "ArrayReplace", + "ArrayToString", + "Cardinality", + "TrimArray", ]; struct GeneratedVisitor; @@ -18076,6 +18104,20 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Factorial" => Ok(ScalarFunction::Factorial), "Lcm" => Ok(ScalarFunction::Lcm), "Gcd" => Ok(ScalarFunction::Gcd), + "ArrayAppend" => Ok(ScalarFunction::ArrayAppend), + "ArrayConcat" => Ok(ScalarFunction::ArrayConcat), + "ArrayDims" => Ok(ScalarFunction::ArrayDims), + "ArrayFill" => Ok(ScalarFunction::ArrayFill), + "ArrayLength" => Ok(ScalarFunction::ArrayLength), + "ArrayNdims" => Ok(ScalarFunction::ArrayNdims), + "ArrayPosition" => Ok(ScalarFunction::ArrayPosition), + "ArrayPositions" => Ok(ScalarFunction::ArrayPositions), + "ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend), + "ArrayRemove" => Ok(ScalarFunction::ArrayRemove), + "ArrayReplace" => Ok(ScalarFunction::ArrayReplace), + "ArrayToString" => Ok(ScalarFunction::ArrayToString), + "Cardinality" => Ok(ScalarFunction::Cardinality), + "TrimArray" => Ok(ScalarFunction::TrimArray), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 4cf50d70bf0e..b1ae0058dcb2 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2208,6 +2208,20 @@ pub enum ScalarFunction { Factorial = 83, Lcm = 84, Gcd = 85, + ArrayAppend = 86, + ArrayConcat = 87, + ArrayDims = 88, + ArrayFill = 89, + ArrayLength = 90, + ArrayNdims = 91, + ArrayPosition = 92, + ArrayPositions = 93, + ArrayPrepend = 94, + ArrayRemove = 95, + ArrayReplace = 96, + ArrayToString = 97, + Cardinality = 98, + TrimArray = 99, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -2302,6 +2316,20 @@ impl ScalarFunction { ScalarFunction::Factorial => "Factorial", ScalarFunction::Lcm => "Lcm", ScalarFunction::Gcd => "Gcd", + ScalarFunction::ArrayAppend => "ArrayAppend", + ScalarFunction::ArrayConcat => "ArrayConcat", + ScalarFunction::ArrayDims => "ArrayDims", + ScalarFunction::ArrayFill => "ArrayFill", + ScalarFunction::ArrayLength => "ArrayLength", + ScalarFunction::ArrayNdims => "ArrayNdims", + ScalarFunction::ArrayPosition => "ArrayPosition", + ScalarFunction::ArrayPositions => "ArrayPositions", + ScalarFunction::ArrayPrepend => "ArrayPrepend", + ScalarFunction::ArrayRemove => "ArrayRemove", + ScalarFunction::ArrayReplace => "ArrayReplace", + ScalarFunction::ArrayToString => "ArrayToString", + ScalarFunction::Cardinality => "Cardinality", + ScalarFunction::TrimArray => "TrimArray", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2393,6 +2421,20 @@ impl ScalarFunction { "Factorial" => Some(Self::Factorial), "Lcm" => Some(Self::Lcm), "Gcd" => Some(Self::Gcd), + "ArrayAppend" => Some(Self::ArrayAppend), + "ArrayConcat" => Some(Self::ArrayConcat), + "ArrayDims" => Some(Self::ArrayDims), + "ArrayFill" => Some(Self::ArrayFill), + "ArrayLength" => Some(Self::ArrayLength), + "ArrayNdims" => Some(Self::ArrayNdims), + "ArrayPosition" => Some(Self::ArrayPosition), + "ArrayPositions" => Some(Self::ArrayPositions), + "ArrayPrepend" => Some(Self::ArrayPrepend), + "ArrayRemove" => Some(Self::ArrayRemove), + "ArrayReplace" => Some(Self::ArrayReplace), + "ArrayToString" => Some(Self::ArrayToString), + "Cardinality" => Some(Self::Cardinality), + "TrimArray" => Some(Self::TrimArray), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 1150220bef4a..ab2985f448a8 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -36,9 +36,12 @@ use datafusion_common::{ }; use datafusion_expr::expr::Placeholder; use datafusion_expr::{ - abs, acos, acosh, array, ascii, asin, asinh, atan, atan2, atanh, bit_length, btrim, - cbrt, ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, - date_bin, date_part, date_trunc, degrees, digest, exp, + abs, acos, acosh, array, array_append, array_concat, array_dims, array_fill, + array_length, array_ndims, array_position, array_positions, array_prepend, + array_remove, array_replace, array_to_string, ascii, asin, asinh, atan, atan2, atanh, + bit_length, btrim, cardinality, cbrt, ceil, character_length, chr, coalesce, + concat_expr, concat_ws_expr, cos, cosh, date_bin, date_part, date_trunc, degrees, + digest, exp, expr::{self, InList, Sort, WindowFunction}, factorial, floor, from_unixtime, gcd, lcm, left, ln, log, log10, log2, logical_plan::{PlanType, StringifiedPlan}, @@ -46,7 +49,8 @@ use datafusion_expr::{ regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part, sqrt, starts_with, strpos, substr, substring, tan, tanh, to_hex, to_timestamp_micros, - to_timestamp_millis, to_timestamp_seconds, translate, trim, trunc, upper, uuid, + to_timestamp_millis, to_timestamp_seconds, translate, trim, trim_array, trunc, upper, + uuid, window_frame::regularize, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr, GetIndexedField, GroupingSet, @@ -444,7 +448,21 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Ltrim => Self::Ltrim, ScalarFunction::Rtrim => Self::Rtrim, ScalarFunction::ToTimestamp => Self::ToTimestamp, + ScalarFunction::ArrayAppend => Self::ArrayAppend, + ScalarFunction::ArrayConcat => Self::ArrayConcat, + ScalarFunction::ArrayDims => Self::ArrayDims, + ScalarFunction::ArrayFill => Self::ArrayFill, + ScalarFunction::ArrayLength => Self::ArrayLength, + ScalarFunction::ArrayNdims => Self::ArrayNdims, + ScalarFunction::ArrayPosition => Self::ArrayPosition, + ScalarFunction::ArrayPositions => Self::ArrayPositions, + ScalarFunction::ArrayPrepend => Self::ArrayPrepend, + ScalarFunction::ArrayRemove => Self::ArrayRemove, + ScalarFunction::ArrayReplace => Self::ArrayReplace, + ScalarFunction::ArrayToString => Self::ArrayToString, + ScalarFunction::Cardinality => Self::Cardinality, ScalarFunction::Array => Self::MakeArray, + ScalarFunction::TrimArray => Self::TrimArray, ScalarFunction::NullIf => Self::NullIf, ScalarFunction::DatePart => Self::DatePart, ScalarFunction::DateTrunc => Self::DateTrunc, @@ -1160,6 +1178,63 @@ pub fn parse_expr( .map(|expr| parse_expr(expr, registry)) .collect::, _>>()?, )), + ScalarFunction::ArrayAppend => Ok(array_append( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::ArrayPrepend => Ok(array_prepend( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::ArrayConcat => Ok(array_concat( + args.to_owned() + .iter() + .map(|expr| parse_expr(expr, registry)) + .collect::, _>>()?, + )), + ScalarFunction::ArrayFill => Ok(array_fill( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::ArrayPosition => Ok(array_position( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + parse_expr(&args[2], registry)?, + )), + ScalarFunction::ArrayPositions => Ok(array_positions( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::ArrayRemove => Ok(array_remove( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::ArrayReplace => Ok(array_replace( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + parse_expr(&args[2], registry)?, + )), + ScalarFunction::ArrayToString => Ok(array_to_string( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::Cardinality => { + Ok(cardinality(parse_expr(&args[0], registry)?)) + } + ScalarFunction::TrimArray => Ok(trim_array( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::ArrayLength => Ok(array_length( + parse_expr(&args[0], registry)?, + parse_expr(&args[1], registry)?, + )), + ScalarFunction::ArrayDims => { + Ok(array_dims(parse_expr(&args[0], registry)?)) + } + ScalarFunction::ArrayNdims => { + Ok(array_ndims(parse_expr(&args[0], registry)?)) + } ScalarFunction::Sqrt => Ok(sqrt(parse_expr(&args[0], registry)?)), ScalarFunction::Cbrt => Ok(cbrt(parse_expr(&args[0], registry)?)), ScalarFunction::Sin => Ok(sin(parse_expr(&args[0], registry)?)), diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 191c49194407..dbf9432bdc2d 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1336,7 +1336,21 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Ltrim => Self::Ltrim, BuiltinScalarFunction::Rtrim => Self::Rtrim, BuiltinScalarFunction::ToTimestamp => Self::ToTimestamp, + BuiltinScalarFunction::ArrayAppend => Self::ArrayAppend, + BuiltinScalarFunction::ArrayConcat => Self::ArrayConcat, + BuiltinScalarFunction::ArrayDims => Self::ArrayDims, + BuiltinScalarFunction::ArrayFill => Self::ArrayFill, + BuiltinScalarFunction::ArrayLength => Self::ArrayLength, + BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims, + BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition, + BuiltinScalarFunction::ArrayPositions => Self::ArrayPositions, + BuiltinScalarFunction::ArrayPrepend => Self::ArrayPrepend, + BuiltinScalarFunction::ArrayRemove => Self::ArrayRemove, + BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace, + BuiltinScalarFunction::ArrayToString => Self::ArrayToString, + BuiltinScalarFunction::Cardinality => Self::Cardinality, BuiltinScalarFunction::MakeArray => Self::Array, + BuiltinScalarFunction::TrimArray => Self::TrimArray, BuiltinScalarFunction::NullIf => Self::NullIf, BuiltinScalarFunction::DatePart => Self::DatePart, BuiltinScalarFunction::DateTrunc => Self::DateTrunc, diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index f2305cb90824..07f5923a6a34 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -177,6 +177,26 @@ Unlike to some databases the math functions in Datafusion works the same way as | trim(string) | Removes all characters, space by default from the string (`string`) | | upper | Converts all characters in the string into upper case. Example: `upper('hello') -> HELLO` | +## Array Expressions + +| Function | Notes | +| ------------------------------------ | --------------------------------------------------------------- | +| array_append(array, element) | Appends an element to the end of an array. | +| array_concat(array[, ..., array_n]) | Concatenates arrays. | +| array_dims(array) | Returns an array of the array's dimensions. | +| array_fill(element, array) | Returns an array filled with copies of the given value. | +| array_length(array, dimension) | Returns the length of the array dimension. | +| array_ndims(array) | Returns the number of dimensions of the array. | +| array_position(array, element) | Searches for an element in the array, returns first occurrence. | +| array_positions(array, element) | Searches for an element in the array, returns all occurrences. | +| array_prepend(array, element) | Prepends an element to the beginning of an array. | +| array_remove(array, element) | Removes all elements equal to the given value from the array. | +| array_replace(array, from, to) | Replaces a specified element with another specified element. | +| array_to_string(array, delimeter) | Converts each element to its text representation. | +| cardinality(array) | Returns the total number of elements in the array. | +| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. | +| trim_array(array, n) | Removes the last n elements from the array. | + ## Regular Expressions | Function | Notes | diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index cbf9e2fa1864..4b2a556806b7 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1376,6 +1376,234 @@ from_unixtime(expression) - **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +## Array Functions + +- [array_append](#array_append) +- [array_concat](#array_concat) +- [array_dims](#array_dims) +- [array_fill](#array_fill) +- [array_length](#array_length) +- [array_ndims](#array_ndims) +- [array_position](#array_position) +- [array_positions](#array_positions) +- [array_prepend](#array_prepend) +- [array_remove](#array_remove) +- [array_replace](#array_replace) +- [array_to_string](#array_to_string) +- [cardinality](#cardinality) +- [make_array](#make_array) +- [trim_array](#trim_array) + +### `array_append` + +Appends an element to the end of an array. + +``` +array_append(array, element) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to append to the array. + +### `array_concat` + +Concatenates arrays. + +``` +array_concat(array[, ..., array_n]) +``` + +#### Arguments + +- **array**: Array expression to concatenate. + Can be a constant, column, or function, and any combination of array operators. +- **array_n**: Subsequent array column or literal array to concatenate. + +### `array_dims` + +Returns an array of the array's dimensions. + +``` +array_dims(array) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +### `array_fill` + +Returns an array filled with copies of the given value. + +``` +array_fill(element, array) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to copy to the array. + +### `array_length` + +Returns the length of the array dimension. + +``` +array_length(array, dimension) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **dimension**: Array dimension. + +### `array_ndims` + +Returns the number of dimensions of the array. + +``` +array_ndims(array, element) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +### `array_position` + +Returns a string with an input string repeated a specified number. + +``` +array_position(array, element) +array_position(array, element, index) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to search for position in the array. +- **index**: Index at which to start searching. + +### `array_positions` + +Searches for an element in the array, returns all occurrences. + +``` +array_positions(array, element) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to search for positions in the array. + +### `array_prepend` + +Prepends an element to the beginning of an array. + +``` +array_prepend(element, array) +``` + +#### Arguments + +- **element**: Element to prepend to the array. +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +### `array_remove` + +Removes all elements equal to the given value from the array. + +``` +array_remove(array, element) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to be removed from the array. + +### `array_replace` + +Replaces a specified element with another specified element. + +``` +array_replace(array, from, to) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **from**: Initial element. +- **to**: Final element. + +### `array_to_string` + +Converts each element to its text representation. + +``` +array_to_string(array, delimeter) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **delimeter**: Array element separator. + +### `cardinality` + +Returns the total number of elements in the array. + +``` +cardinality(array) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +### `make_array` + +Returns an Arrow array using the specified input expressions. + +``` +make_array(expression1[, ..., expression_n]) +``` + +#### Arguments + +- **expression_n**: Expression to include in the output array. + Can be a constant, column, or function, and any combination of arithmetic or + string operators. + +### `trim_array` + +Removes the last n elements from the array. + +``` +trim_array(array, n) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **n**: Element to trim the array. + ## Hashing Functions - [digest](#digest) @@ -1476,25 +1704,10 @@ sha512(expression) ## Other Functions -- [make_array](#make_array) - [arrow_cast](#arrow_cast) - [arrow_typeof](#arrow_typeof) - [struct](#struct) -### `make_array` - -Returns an Arrow array using the specified input expressions. - -``` -make_array(expression1[, ..., expression_n]) -``` - -#### Arguments - -- **expression_n**: Expression to include in the output array. - Can be a constant, column, or function, and any combination of arithmetic or - string operators. - ### `arrow_cast` Casts a value to a specific Arrow data type: