diff --git a/datafusion-examples/examples/simple_udf.rs b/datafusion-examples/examples/simple_udf.rs index 0ffec44a3720..1c58af2ebb10 100644 --- a/datafusion-examples/examples/simple_udf.rs +++ b/datafusion-examples/examples/simple_udf.rs @@ -22,6 +22,7 @@ use datafusion::arrow::{ util::pretty, }; +use datafusion::execution::context::ExecutionProps; use datafusion::prelude::*; use datafusion::{error::Result, physical_plan::functions::make_scalar_function}; use std::sync::Arc; @@ -60,7 +61,7 @@ async fn main() -> Result<()> { let mut ctx = create_context()?; // First, declare the actual implementation of the calculation - let pow = |args: &[ArrayRef]| { + let pow = |args: &[ArrayRef], _: &ExecutionProps| { // in DataFusion, all `args` and output are dynamically-typed arrays, which means that we need to: // 1. cast the values to the type we want // 2. perform the computation for every element in the array (using a loop or SIMD) and construct the result diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 7b51d1c9af93..801406e0311e 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -476,7 +476,8 @@ impl ExecutionContext { &self, logical_plan: &LogicalPlan, ) -> Result> { - let state = self.state.lock().unwrap(); + let mut state = self.state.lock().unwrap(); + state.execution_props.start_execution(); state .config .query_planner @@ -746,13 +747,13 @@ impl ExecutionConfig { } } -/// Holds per-execution properties and data (such as starting timestamps, etc). -/// An instance of this struct is created each time a [`LogicalPlan`] is prepared for -/// execution (optimized). If the same plan is optimized multiple times, a new -/// `ExecutionProps` is created each time. +/// Holds per-execution properties and data (such as starting timestamps, etc). +/// An instance of this struct is created each time a [`LogicalPlan`] is prepared for +/// execution (optimized). If the same plan is optimized multiple times, a new +/// `ExecutionProps` is created each time. #[derive(Clone)] pub struct ExecutionProps { - pub(crate) query_execution_start_time: Option>, + pub(crate) query_execution_start_time: DateTime, } /// Execution context for registering data sources and executing queries @@ -776,15 +777,13 @@ impl ExecutionProps { /// Creates a new execution props pub fn new() -> Self { ExecutionProps { - query_execution_start_time: None, + query_execution_start_time: chrono::Utc::now(), } } /// Marks the execution of query started pub fn start_execution(&mut self) -> &Self { - if self.query_execution_start_time.is_none() { - self.query_execution_start_time = Some(chrono::Utc::now()); - } + self.query_execution_start_time = chrono::Utc::now(); &*self } } @@ -2096,7 +2095,7 @@ mod tests { ctx.register_table("t", test::table_with_sequence(1, 1).unwrap()) .unwrap(); - let myfunc = |args: &[ArrayRef]| Ok(Arc::clone(&args[0])); + let myfunc = |args: &[ArrayRef], _: &ExecutionProps| Ok(Arc::clone(&args[0])); let myfunc = make_scalar_function(myfunc); ctx.register_udf(create_udf( @@ -2376,7 +2375,7 @@ mod tests { let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]])?; ctx.register_table("t", Arc::new(provider))?; - let myfunc = |args: &[ArrayRef]| { + let myfunc = |args: &[ArrayRef], _: &ExecutionProps| { let l = &args[0] .as_any() .downcast_ref::() diff --git a/datafusion/src/execution/dataframe_impl.rs b/datafusion/src/execution/dataframe_impl.rs index 2a0c39aa48eb..eceafb141157 100644 --- a/datafusion/src/execution/dataframe_impl.rs +++ b/datafusion/src/execution/dataframe_impl.rs @@ -304,7 +304,7 @@ mod tests { // declare the udf let my_fn: ScalarFunctionImplementation = - Arc::new(|_: &[ColumnarValue]| unimplemented!("my_fn is not implemented")); + Arc::new(|_: &[ColumnarValue], _| unimplemented!("my_fn is not implemented")); // create and register the udf ctx.register_udf(create_udf( diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 7407fbc3b3da..95b071fa9ff9 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -29,7 +29,6 @@ use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::utils; use crate::physical_plan::functions::BuiltinScalarFunction; use crate::scalar::ScalarValue; -use chrono::{DateTime, Utc}; /// Optimizer that simplifies comparison expressions involving boolean literals. /// @@ -215,7 +214,6 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { } => Expr::Literal(ScalarValue::TimestampNanosecond(Some( self.execution_props .query_execution_start_time - .unwrap() .timestamp_nanos(), ))), expr => { @@ -235,6 +233,7 @@ mod tests { }; use arrow::datatypes::*; + use chrono::{DateTime, Utc}; fn test_table_scan() -> Result { let schema = Schema::new(vec![ @@ -623,7 +622,7 @@ mod tests { ) -> String { let rule = ConstantFolding::new(); let execution_props = ExecutionProps { - query_execution_start_time: Some(date_time.clone()), + query_execution_start_time: date_time.clone(), }; let optimized_plan = rule diff --git a/datafusion/src/physical_plan/array_expressions.rs b/datafusion/src/physical_plan/array_expressions.rs index a7e03b70e5d2..c4e7f7b05a4f 100644 --- a/datafusion/src/physical_plan/array_expressions.rs +++ b/datafusion/src/physical_plan/array_expressions.rs @@ -23,6 +23,7 @@ use arrow::datatypes::DataType; use std::sync::Arc; use super::ColumnarValue; +use crate::execution::context::ExecutionProps; macro_rules! downcast_vec { ($ARGS:expr, $ARRAY_TYPE:ident) => {{ @@ -90,7 +91,7 @@ fn array_array(args: &[&dyn Array]) -> Result { } /// put values in an array. -pub fn array(values: &[ColumnarValue]) -> Result { +pub fn array(values: &[ColumnarValue], _: &ExecutionProps) -> Result { let arrays: Vec<&dyn Array> = values .iter() .map(|value| { diff --git a/datafusion/src/physical_plan/crypto_expressions.rs b/datafusion/src/physical_plan/crypto_expressions.rs index 8ad876b24d0c..8e1cef1d16d9 100644 --- a/datafusion/src/physical_plan/crypto_expressions.rs +++ b/datafusion/src/physical_plan/crypto_expressions.rs @@ -34,6 +34,7 @@ use arrow::{ }; use super::{string_expressions::unary_string_function, ColumnarValue}; +use crate::execution::context::ExecutionProps; /// Computes the md5 of a string. fn md5_process(input: &str) -> String { @@ -144,7 +145,7 @@ fn md5_array( } /// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`] -pub fn md5(args: &[ColumnarValue]) -> Result { +pub fn md5(args: &[ColumnarValue], _: &ExecutionProps) -> Result { match &args[0] { ColumnarValue::Array(a) => match a.data_type() { DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(md5_array::(&[ @@ -178,21 +179,21 @@ pub fn md5(args: &[ColumnarValue]) -> Result { } /// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`] -pub fn sha224(args: &[ColumnarValue]) -> Result { +pub fn sha224(args: &[ColumnarValue], _: &ExecutionProps) -> Result { handle(args, sha_process::, "ssh224") } /// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`] -pub fn sha256(args: &[ColumnarValue]) -> Result { +pub fn sha256(args: &[ColumnarValue], _: &ExecutionProps) -> Result { handle(args, sha_process::, "sha256") } /// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`] -pub fn sha384(args: &[ColumnarValue]) -> Result { +pub fn sha384(args: &[ColumnarValue], _: &ExecutionProps) -> Result { handle(args, sha_process::, "sha384") } /// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`] -pub fn sha512(args: &[ColumnarValue]) -> Result { +pub fn sha512(args: &[ColumnarValue], _: &ExecutionProps) -> Result { handle(args, sha_process::, "sha512") } diff --git a/datafusion/src/physical_plan/datetime_expressions.rs b/datafusion/src/physical_plan/datetime_expressions.rs index 5eb63ee5fb34..2c30faa7044d 100644 --- a/datafusion/src/physical_plan/datetime_expressions.rs +++ b/datafusion/src/physical_plan/datetime_expressions.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use super::ColumnarValue; +use crate::execution::context::ExecutionProps; use crate::{ error::{DataFusionError, Result}, scalar::{ScalarType, ScalarValue}, @@ -260,7 +261,7 @@ where } /// to_timestamp SQL function -pub fn to_timestamp(args: &[ColumnarValue]) -> Result { +pub fn to_timestamp(args: &[ColumnarValue], _: &ExecutionProps) -> Result { handle::( args, string_to_timestamp_nanos, @@ -269,9 +270,12 @@ pub fn to_timestamp(args: &[ColumnarValue]) -> Result { } /// now SQL function -pub fn now(_: &[ColumnarValue]) -> Result { +pub fn now( + _: &[ColumnarValue], + execution_props: &ExecutionProps, +) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( - Some(chrono::Utc::now().timestamp_nanos()), + Some(execution_props.query_execution_start_time.timestamp_nanos()), ))) } @@ -315,7 +319,7 @@ fn date_trunc_single(granularity: &str, value: i64) -> Result { } /// date_trunc SQL function -pub fn date_trunc(args: &[ColumnarValue]) -> Result { +pub fn date_trunc(args: &[ColumnarValue], _: &ExecutionProps) -> Result { let (granularity, array) = (&args[0], &args[1]); let granularity = @@ -404,7 +408,7 @@ macro_rules! extract_date_part { } /// DATE_PART SQL function -pub fn date_part(args: &[ColumnarValue]) -> Result { +pub fn date_part(args: &[ColumnarValue], _: &ExecutionProps) -> Result { if args.len() != 2 { return Err(DataFusionError::Execution( "Expected two arguments in DATE_PART".to_string(), @@ -470,7 +474,7 @@ mod tests { let string_array = ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef); - let parsed_timestamps = to_timestamp(&[string_array]) + let parsed_timestamps = to_timestamp(&[string_array], &ExecutionProps::new()) .expect("that to_timestamp parsed values without error"); if let ColumnarValue::Array(parsed_array) = parsed_timestamps { assert_eq!(parsed_array.len(), 2); @@ -550,7 +554,7 @@ mod tests { let expected_err = "Internal error: Unsupported data type Int64 for function to_timestamp"; - match to_timestamp(&[int64array]) { + match to_timestamp(&[int64array], &ExecutionProps::new()) { Ok(_) => panic!("Expected error but got success"), Err(e) => { assert!( diff --git a/datafusion/src/physical_plan/expressions/nullif.rs b/datafusion/src/physical_plan/expressions/nullif.rs index 7cc58ed2318f..483aa056542b 100644 --- a/datafusion/src/physical_plan/expressions/nullif.rs +++ b/datafusion/src/physical_plan/expressions/nullif.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use super::ColumnarValue; use crate::error::{DataFusionError, Result}; +use crate::execution::context::ExecutionProps; use crate::scalar::ScalarValue; use arrow::array::Array; use arrow::array::{ @@ -71,7 +72,7 @@ macro_rules! primitive_bool_array_op { /// Args: 0 - left expr is any array /// 1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed. /// -pub fn nullif_func(args: &[ColumnarValue]) -> Result { +pub fn nullif_func(args: &[ColumnarValue], _: &ExecutionProps) -> Result { if args.len() != 2 { return Err(DataFusionError::Internal(format!( "{:?} args were supplied but NULLIF takes exactly two args", @@ -142,7 +143,7 @@ mod tests { let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32))); - let result = nullif_func(&[a, lit_array])?; + let result = nullif_func(&[a, lit_array], &ExecutionProps::new())?; let result = result.into_array(0); let expected = Arc::new(Int32Array::from(vec![ @@ -168,7 +169,7 @@ mod tests { let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32))); - let result = nullif_func(&[a, lit_array])?; + let result = nullif_func(&[a, lit_array], &ExecutionProps::new())?; let result = result.into_array(0); let expected = Arc::new(Int32Array::from(vec![ diff --git a/datafusion/src/physical_plan/functions.rs b/datafusion/src/physical_plan/functions.rs index a7c3132b50c9..5f5ac532c517 100644 --- a/datafusion/src/physical_plan/functions.rs +++ b/datafusion/src/physical_plan/functions.rs @@ -33,6 +33,7 @@ use super::{ type_coercion::{coerce, data_types}, ColumnarValue, PhysicalExpr, }; +use crate::execution::context::ExecutionProps; use crate::physical_plan::array_expressions; use crate::physical_plan::datetime_expressions; use crate::physical_plan::expressions::{nullif_func, SUPPORTED_NULLIF_TYPES}; @@ -76,7 +77,7 @@ pub enum Signature { /// Scalar function pub type ScalarFunctionImplementation = - Arc Result + Send + Sync>; + Arc Result + Send + Sync>; /// A function's return type pub type ReturnTypeFunction = @@ -281,7 +282,7 @@ impl FromStr for BuiltinScalarFunction { return Err(DataFusionError::Plan(format!( "There is no built-in function named {}", name - ))) + ))); } }) } @@ -705,411 +706,469 @@ pub fn create_physical_expr( fun: &BuiltinScalarFunction, args: &[Arc], input_schema: &Schema, + execution_props: &ExecutionProps, ) -> Result> { - let fun_expr: ScalarFunctionImplementation = Arc::new(match fun { - // math functions - BuiltinScalarFunction::Abs => math_expressions::abs, - BuiltinScalarFunction::Acos => math_expressions::acos, - BuiltinScalarFunction::Asin => math_expressions::asin, - BuiltinScalarFunction::Atan => math_expressions::atan, - BuiltinScalarFunction::Ceil => math_expressions::ceil, - BuiltinScalarFunction::Cos => math_expressions::cos, - BuiltinScalarFunction::Exp => math_expressions::exp, - BuiltinScalarFunction::Floor => math_expressions::floor, - BuiltinScalarFunction::Log => math_expressions::ln, - BuiltinScalarFunction::Log10 => math_expressions::log10, - BuiltinScalarFunction::Log2 => math_expressions::log2, - BuiltinScalarFunction::Round => math_expressions::round, - BuiltinScalarFunction::Signum => math_expressions::signum, - BuiltinScalarFunction::Sin => math_expressions::sin, - BuiltinScalarFunction::Sqrt => math_expressions::sqrt, - BuiltinScalarFunction::Tan => math_expressions::tan, - BuiltinScalarFunction::Trunc => math_expressions::trunc, - - // string functions - BuiltinScalarFunction::Array => array_expressions::array, - BuiltinScalarFunction::Ascii => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::ascii::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::ascii::)(args) + let fun_expr: ScalarFunctionImplementation = + Arc::new(match fun { + // math functions + BuiltinScalarFunction::Abs => math_expressions::abs, + BuiltinScalarFunction::Acos => math_expressions::acos, + BuiltinScalarFunction::Asin => math_expressions::asin, + BuiltinScalarFunction::Atan => math_expressions::atan, + BuiltinScalarFunction::Ceil => math_expressions::ceil, + BuiltinScalarFunction::Cos => math_expressions::cos, + BuiltinScalarFunction::Exp => math_expressions::exp, + BuiltinScalarFunction::Floor => math_expressions::floor, + BuiltinScalarFunction::Log => math_expressions::ln, + BuiltinScalarFunction::Log10 => math_expressions::log10, + BuiltinScalarFunction::Log2 => math_expressions::log2, + BuiltinScalarFunction::Round => math_expressions::round, + BuiltinScalarFunction::Signum => math_expressions::signum, + BuiltinScalarFunction::Sin => math_expressions::sin, + BuiltinScalarFunction::Sqrt => math_expressions::sqrt, + BuiltinScalarFunction::Tan => math_expressions::tan, + BuiltinScalarFunction::Trunc => math_expressions::trunc, + + // string functions + BuiltinScalarFunction::Array => array_expressions::array, + BuiltinScalarFunction::Ascii => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::ascii::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::ascii::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function ascii", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function ascii", - other, - ))), - }, - BuiltinScalarFunction::BitLength => |args| match &args[0] { - ColumnarValue::Array(v) => Ok(ColumnarValue::Array(bit_length(v.as_ref())?)), - ColumnarValue::Scalar(v) => match v { - ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32( - v.as_ref().map(|x| (x.len() * 8) as i32), - ))), - ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar( - ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as i64)), - )), - _ => unreachable!(), + BuiltinScalarFunction::BitLength => |args, _| match &args[0] { + ColumnarValue::Array(v) => { + Ok(ColumnarValue::Array(bit_length(v.as_ref())?)) + } + ColumnarValue::Scalar(v) => match v { + ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar( + ScalarValue::Int32(v.as_ref().map(|x| (x.len() * 8) as i32)), + )), + ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar( + ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as i64)), + )), + _ => unreachable!(), + }, }, - }, - BuiltinScalarFunction::Btrim => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::btrim::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::btrim::)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function btrim", - other, - ))), - }, - BuiltinScalarFunction::CharacterLength => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_unicode_expressions_feature_flag!( - character_length, - Int32Type, - "character_length" - ); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = invoke_if_unicode_expressions_feature_flag!( - character_length, - Int64Type, - "character_length" - ); - make_scalar_function(func)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function character_length", - other, - ))), - }, - BuiltinScalarFunction::Chr => { - |args| make_scalar_function(string_expressions::chr)(args) - } - BuiltinScalarFunction::Concat => string_expressions::concat, - BuiltinScalarFunction::ConcatWithSeparator => { - |args| make_scalar_function(string_expressions::concat_ws)(args) - } - BuiltinScalarFunction::DatePart => datetime_expressions::date_part, - BuiltinScalarFunction::DateTrunc => datetime_expressions::date_trunc, - BuiltinScalarFunction::Now => datetime_expressions::now, - BuiltinScalarFunction::InitCap => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::initcap::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::initcap::)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function initcap", - other, - ))), - }, - BuiltinScalarFunction::Left => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_unicode_expressions_feature_flag!(left, i32, "left"); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = invoke_if_unicode_expressions_feature_flag!(left, i64, "left"); - make_scalar_function(func)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function left", - other, - ))), - }, - BuiltinScalarFunction::Lower => string_expressions::lower, - BuiltinScalarFunction::Lpad => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_unicode_expressions_feature_flag!(lpad, i32, "lpad"); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = invoke_if_unicode_expressions_feature_flag!(lpad, i64, "lpad"); - make_scalar_function(func)(args) + BuiltinScalarFunction::Btrim => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::btrim::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::btrim::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function btrim", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function lpad", - other, - ))), - }, - BuiltinScalarFunction::Ltrim => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::ltrim::)(args) + BuiltinScalarFunction::CharacterLength => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + character_length, + Int32Type, + "character_length" + ); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + character_length, + Int64Type, + "character_length" + ); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function character_length", + other, + ))), + } } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::ltrim::)(args) + BuiltinScalarFunction::Chr => |args, execution_props| { + make_scalar_function(string_expressions::chr)(args, execution_props) + }, + BuiltinScalarFunction::Concat => string_expressions::concat, + BuiltinScalarFunction::ConcatWithSeparator => |args, execution_props| { + make_scalar_function(string_expressions::concat_ws)(args, execution_props) + }, + BuiltinScalarFunction::DatePart => datetime_expressions::date_part, + BuiltinScalarFunction::DateTrunc => datetime_expressions::date_trunc, + BuiltinScalarFunction::Now => datetime_expressions::now, + BuiltinScalarFunction::InitCap => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::initcap::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::initcap::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function initcap", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function ltrim", - other, - ))), - }, - BuiltinScalarFunction::MD5 => { - invoke_if_crypto_expressions_feature_flag!(md5, "md5") - } - BuiltinScalarFunction::NullIf => nullif_func, - BuiltinScalarFunction::OctetLength => |args| match &args[0] { - ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)), - ColumnarValue::Scalar(v) => match v { - ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32( - v.as_ref().map(|x| x.len() as i32), + BuiltinScalarFunction::Left => |args, execution_props| match args[0] + .data_type() + { + DataType::Utf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(left, i32, "left"); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(left, i64, "left"); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function left", + other, ))), - ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar( - ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)), - )), - _ => unreachable!(), }, - }, - BuiltinScalarFunction::RegexpMatch => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_regex_expressions_feature_flag!( - regexp_match, - i32, - "regexp_match" - ); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = invoke_if_regex_expressions_feature_flag!( - regexp_match, - i64, - "regexp_match" - ); - make_scalar_function(func)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function regexp_match", - other - ))), - }, - BuiltinScalarFunction::RegexpReplace => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_regex_expressions_feature_flag!( - regexp_replace, - i32, - "regexp_replace" - ); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = invoke_if_regex_expressions_feature_flag!( - regexp_replace, - i64, - "regexp_replace" - ); - make_scalar_function(func)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function regexp_replace", - other, - ))), - }, - BuiltinScalarFunction::Repeat => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::repeat::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::repeat::)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function repeat", - other, - ))), - }, - BuiltinScalarFunction::Replace => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::replace::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::replace::)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function replace", - other, - ))), - }, - BuiltinScalarFunction::Reverse => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = - invoke_if_unicode_expressions_feature_flag!(reverse, i32, "reverse"); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = - invoke_if_unicode_expressions_feature_flag!(reverse, i64, "reverse"); - make_scalar_function(func)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function reverse", - other, - ))), - }, - BuiltinScalarFunction::Right => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = - invoke_if_unicode_expressions_feature_flag!(right, i32, "right"); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = - invoke_if_unicode_expressions_feature_flag!(right, i64, "right"); - make_scalar_function(func)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function right", - other, - ))), - }, - BuiltinScalarFunction::Rpad => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_unicode_expressions_feature_flag!(rpad, i32, "rpad"); - make_scalar_function(func)(args) - } - DataType::LargeUtf8 => { - let func = invoke_if_unicode_expressions_feature_flag!(rpad, i64, "rpad"); - make_scalar_function(func)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function rpad", - other, - ))), - }, - BuiltinScalarFunction::Rtrim => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::rtrim::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::rtrim::)(args) - } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function rtrim", - other, - ))), - }, - BuiltinScalarFunction::SHA224 => { - invoke_if_crypto_expressions_feature_flag!(sha224, "sha224") - } - BuiltinScalarFunction::SHA256 => { - invoke_if_crypto_expressions_feature_flag!(sha256, "sha256") - } - BuiltinScalarFunction::SHA384 => { - invoke_if_crypto_expressions_feature_flag!(sha384, "sha384") - } - BuiltinScalarFunction::SHA512 => { - invoke_if_crypto_expressions_feature_flag!(sha512, "sha512") - } - BuiltinScalarFunction::SplitPart => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::split_part::)(args) + BuiltinScalarFunction::Lower => string_expressions::lower, + BuiltinScalarFunction::Lpad => |args, execution_props| match args[0] + .data_type() + { + DataType::Utf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(lpad, i32, "lpad"); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(lpad, i64, "lpad"); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function lpad", + other, + ))), + }, + BuiltinScalarFunction::Ltrim => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::ltrim::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::ltrim::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function ltrim", + other, + ))), + } } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::split_part::)(args) + BuiltinScalarFunction::MD5 => { + invoke_if_crypto_expressions_feature_flag!(md5, "md5") + } + BuiltinScalarFunction::NullIf => nullif_func, + BuiltinScalarFunction::OctetLength => |args, _| match &args[0] { + ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)), + ColumnarValue::Scalar(v) => match v { + ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar( + ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)), + )), + ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar( + ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)), + )), + _ => unreachable!(), + }, + }, + BuiltinScalarFunction::RegexpMatch => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => { + let func = invoke_if_regex_expressions_feature_flag!( + regexp_match, + i32, + "regexp_match" + ); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = invoke_if_regex_expressions_feature_flag!( + regexp_match, + i64, + "regexp_match" + ); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function regexp_match", + other + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function split_part", - other, - ))), - }, - BuiltinScalarFunction::StartsWith => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::starts_with::)(args) + BuiltinScalarFunction::RegexpReplace => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => { + let func = invoke_if_regex_expressions_feature_flag!( + regexp_replace, + i32, + "regexp_replace" + ); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = invoke_if_regex_expressions_feature_flag!( + regexp_replace, + i64, + "regexp_replace" + ); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function regexp_replace", + other, + ))), + } } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::starts_with::)(args) + BuiltinScalarFunction::Repeat => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::repeat::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::repeat::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function repeat", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function starts_with", - other, - ))), - }, - BuiltinScalarFunction::Strpos => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_unicode_expressions_feature_flag!( - strpos, Int32Type, "strpos" - ); - make_scalar_function(func)(args) + BuiltinScalarFunction::Replace => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::replace::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::replace::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function replace", + other, + ))), + } } - DataType::LargeUtf8 => { - let func = invoke_if_unicode_expressions_feature_flag!( - strpos, Int64Type, "strpos" - ); - make_scalar_function(func)(args) + BuiltinScalarFunction::Reverse => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + reverse, i32, "reverse" + ); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + reverse, i64, "reverse" + ); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function reverse", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function strpos", - other, - ))), - }, - BuiltinScalarFunction::Substr => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = - invoke_if_unicode_expressions_feature_flag!(substr, i32, "substr"); - make_scalar_function(func)(args) + BuiltinScalarFunction::Right => |args, execution_props| match args[0] + .data_type() + { + DataType::Utf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(right, i32, "right"); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(right, i64, "right"); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function right", + other, + ))), + }, + BuiltinScalarFunction::Rpad => |args, execution_props| match args[0] + .data_type() + { + DataType::Utf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(rpad, i32, "rpad"); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = + invoke_if_unicode_expressions_feature_flag!(rpad, i64, "rpad"); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function rpad", + other, + ))), + }, + BuiltinScalarFunction::Rtrim => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::rtrim::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::rtrim::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function rtrim", + other, + ))), + } } - DataType::LargeUtf8 => { - let func = - invoke_if_unicode_expressions_feature_flag!(substr, i64, "substr"); - make_scalar_function(func)(args) + BuiltinScalarFunction::SHA224 => { + invoke_if_crypto_expressions_feature_flag!(sha224, "sha224") + } + BuiltinScalarFunction::SHA256 => { + invoke_if_crypto_expressions_feature_flag!(sha256, "sha256") + } + BuiltinScalarFunction::SHA384 => { + invoke_if_crypto_expressions_feature_flag!(sha384, "sha384") + } + BuiltinScalarFunction::SHA512 => { + invoke_if_crypto_expressions_feature_flag!(sha512, "sha512") + } + BuiltinScalarFunction::SplitPart => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::split_part::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::split_part::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function split_part", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function substr", - other, - ))), - }, - BuiltinScalarFunction::ToHex => |args| match args[0].data_type() { - DataType::Int32 => { - make_scalar_function(string_expressions::to_hex::)(args) + BuiltinScalarFunction::StartsWith => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::starts_with::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::starts_with::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function starts_with", + other, + ))), + } } - DataType::Int64 => { - make_scalar_function(string_expressions::to_hex::)(args) + BuiltinScalarFunction::Strpos => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + strpos, Int32Type, "strpos" + ); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + strpos, Int64Type, "strpos" + ); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function strpos", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function to_hex", - other, - ))), - }, - BuiltinScalarFunction::ToTimestamp => datetime_expressions::to_timestamp, - BuiltinScalarFunction::Translate => |args| match args[0].data_type() { - DataType::Utf8 => { - let func = invoke_if_unicode_expressions_feature_flag!( - translate, - i32, - "translate" - ); - make_scalar_function(func)(args) + BuiltinScalarFunction::Substr => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + substr, i32, "substr" + ); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + substr, i64, "substr" + ); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function substr", + other, + ))), + } } - DataType::LargeUtf8 => { - let func = invoke_if_unicode_expressions_feature_flag!( - translate, - i64, - "translate" - ); - make_scalar_function(func)(args) + BuiltinScalarFunction::ToHex => { + |args, execution_props| match args[0].data_type() { + DataType::Int32 => make_scalar_function( + string_expressions::to_hex::, + )(args, execution_props), + DataType::Int64 => make_scalar_function( + string_expressions::to_hex::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function to_hex", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function translate", - other, - ))), - }, - BuiltinScalarFunction::Trim => |args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function(string_expressions::btrim::)(args) + BuiltinScalarFunction::ToTimestamp => datetime_expressions::to_timestamp, + BuiltinScalarFunction::Translate => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + translate, + i32, + "translate" + ); + make_scalar_function(func)(args, execution_props) + } + DataType::LargeUtf8 => { + let func = invoke_if_unicode_expressions_feature_flag!( + translate, + i64, + "translate" + ); + make_scalar_function(func)(args, execution_props) + } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function translate", + other, + ))), + } } - DataType::LargeUtf8 => { - make_scalar_function(string_expressions::btrim::)(args) + BuiltinScalarFunction::Trim => { + |args, execution_props| match args[0].data_type() { + DataType::Utf8 => make_scalar_function( + string_expressions::btrim::, + )(args, execution_props), + DataType::LargeUtf8 => make_scalar_function( + string_expressions::btrim::, + )(args, execution_props), + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {:?} for function trim", + other, + ))), + } } - other => Err(DataFusionError::Internal(format!( - "Unsupported data type {:?} for function trim", - other, - ))), - }, - BuiltinScalarFunction::Upper => string_expressions::upper, - }); + BuiltinScalarFunction::Upper => string_expressions::upper, + }); // coerce let args = coerce(args, input_schema, &signature(fun))?; @@ -1123,6 +1182,7 @@ pub fn create_physical_expr( fun_expr, args, &return_type(&fun, &arg_types)?, + execution_props, ))) } @@ -1284,6 +1344,7 @@ pub struct ScalarFunctionExpr { name: String, args: Vec>, return_type: DataType, + execution_props: ExecutionProps, } impl Debug for ScalarFunctionExpr { @@ -1304,12 +1365,14 @@ impl ScalarFunctionExpr { fun: ScalarFunctionImplementation, args: Vec>, return_type: &DataType, + execution_props: &ExecutionProps, ) -> Self { Self { fun, name: name.to_owned(), args, return_type: return_type.clone(), + execution_props: execution_props.clone(), } } @@ -1373,7 +1436,8 @@ impl PhysicalExpr for ScalarFunctionExpr { // evaluate the function let fun = self.fun.as_ref(); - (fun)(&inputs) + let execution_props = &self.execution_props; + (fun)(&inputs, execution_props) } } @@ -1381,38 +1445,40 @@ impl PhysicalExpr for ScalarFunctionExpr { /// and vice-versa after evaluation. pub fn make_scalar_function(inner: F) -> ScalarFunctionImplementation where - F: Fn(&[ArrayRef]) -> Result + Sync + Send + 'static, + F: Fn(&[ArrayRef], &ExecutionProps) -> Result + Sync + Send + 'static, { - Arc::new(move |args: &[ColumnarValue]| { - // first, identify if any of the arguments is an Array. If yes, store its `len`, - // as any scalar will need to be converted to an array of len `len`. - let len = args - .iter() - .fold(Option::::None, |acc, arg| match arg { - ColumnarValue::Scalar(_) => acc, - ColumnarValue::Array(a) => Some(a.len()), - }); - - // to array - let args = if let Some(len) = len { - args.iter() - .map(|arg| arg.clone().into_array(len)) - .collect::>() - } else { - args.iter() - .map(|arg| arg.clone().into_array(1)) - .collect::>() - }; + Arc::new( + move |args: &[ColumnarValue], execution_props: &ExecutionProps| { + // first, identify if any of the arguments is an Array. If yes, store its `len`, + // as any scalar will need to be converted to an array of len `len`. + let len = args + .iter() + .fold(Option::::None, |acc, arg| match arg { + ColumnarValue::Scalar(_) => acc, + ColumnarValue::Array(a) => Some(a.len()), + }); + + // to array + let args = if let Some(len) = len { + args.iter() + .map(|arg| arg.clone().into_array(len)) + .collect::>() + } else { + args.iter() + .map(|arg| arg.clone().into_array(1)) + .collect::>() + }; - let result = (inner)(&args); + let result = (inner)(&args, execution_props); - // maybe back to scalar - if len.is_some() { - result.map(ColumnarValue::Array) - } else { - ScalarValue::try_from_array(&result?, 0).map(ColumnarValue::Scalar) - } - }) + // maybe back to scalar + if len.is_some() { + result.map(ColumnarValue::Array) + } else { + ScalarValue::try_from_array(&result?, 0).map(ColumnarValue::Scalar) + } + }, + ) } #[cfg(test)] @@ -1439,7 +1505,7 @@ mod tests { /// $DATA_TYPE is the function to test result type /// $ARRAY_TYPE is the column type after function applied macro_rules! test_function { - ($FUNC:ident, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $DATA_TYPE: ident, $ARRAY_TYPE:ident) => { + ($FUNC:ident, $ARGS:expr, $EXECUTION_PROPS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $DATA_TYPE: ident, $ARRAY_TYPE:ident) => { // used to provide type annotation let expected: Result> = $EXPECTED; @@ -1448,7 +1514,7 @@ mod tests { let columns: Vec = vec![Arc::new(Int32Array::from(vec![1]))]; let expr = - create_physical_expr(&BuiltinScalarFunction::$FUNC, $ARGS, &schema)?; + create_physical_expr(&BuiltinScalarFunction::$FUNC, $ARGS, &schema, $EXECUTION_PROPS)?; // type is correct assert_eq!(expr.data_type(&schema)?, DataType::$DATA_TYPE); @@ -1482,9 +1548,11 @@ mod tests { #[test] fn test_functions() -> Result<()> { + let execution_props = &ExecutionProps::new(); test_function!( Ascii, &[lit(ScalarValue::Utf8(Some("x".to_string())))], + execution_props, Ok(Some(120)), i32, Int32, @@ -1493,6 +1561,7 @@ mod tests { test_function!( Ascii, &[lit(ScalarValue::Utf8(Some("ésoj".to_string())))], + execution_props, Ok(Some(233)), i32, Int32, @@ -1501,6 +1570,7 @@ mod tests { test_function!( Ascii, &[lit(ScalarValue::Utf8(Some("💯".to_string())))], + execution_props, Ok(Some(128175)), i32, Int32, @@ -1509,6 +1579,7 @@ mod tests { test_function!( Ascii, &[lit(ScalarValue::Utf8(Some("💯a".to_string())))], + execution_props, Ok(Some(128175)), i32, Int32, @@ -1517,6 +1588,7 @@ mod tests { test_function!( Ascii, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(0)), i32, Int32, @@ -1525,6 +1597,7 @@ mod tests { test_function!( Ascii, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), i32, Int32, @@ -1533,6 +1606,7 @@ mod tests { test_function!( BitLength, &[lit(ScalarValue::Utf8(Some("chars".to_string())))], + execution_props, Ok(Some(40)), i32, Int32, @@ -1541,6 +1615,7 @@ mod tests { test_function!( BitLength, &[lit(ScalarValue::Utf8(Some("josé".to_string())))], + execution_props, Ok(Some(40)), i32, Int32, @@ -1549,6 +1624,7 @@ mod tests { test_function!( BitLength, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(0)), i32, Int32, @@ -1557,6 +1633,7 @@ mod tests { test_function!( Btrim, &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -1565,6 +1642,7 @@ mod tests { test_function!( Btrim, &[lit(ScalarValue::Utf8(Some(" trim".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -1573,6 +1651,7 @@ mod tests { test_function!( Btrim, &[lit(ScalarValue::Utf8(Some("trim ".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -1581,6 +1660,7 @@ mod tests { test_function!( Btrim, &[lit(ScalarValue::Utf8(Some("\n trim \n".to_string())))], + execution_props, Ok(Some("\n trim \n")), &str, Utf8, @@ -1592,6 +1672,7 @@ mod tests { lit(ScalarValue::Utf8(Some("xyxtrimyyx".to_string()))), lit(ScalarValue::Utf8(Some("xyz".to_string()))), ], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -1603,6 +1684,7 @@ mod tests { lit(ScalarValue::Utf8(Some("\nxyxtrimyyx\n".to_string()))), lit(ScalarValue::Utf8(Some("xyz\n".to_string()))), ], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -1614,6 +1696,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Utf8(Some("xyz".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -1625,6 +1708,7 @@ mod tests { lit(ScalarValue::Utf8(Some("xyxtrimyyx".to_string()))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -1634,6 +1718,7 @@ mod tests { test_function!( CharacterLength, &[lit(ScalarValue::Utf8(Some("chars".to_string())))], + execution_props, Ok(Some(5)), i32, Int32, @@ -1643,6 +1728,7 @@ mod tests { test_function!( CharacterLength, &[lit(ScalarValue::Utf8(Some("josé".to_string())))], + execution_props, Ok(Some(4)), i32, Int32, @@ -1652,6 +1738,7 @@ mod tests { test_function!( CharacterLength, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(0)), i32, Int32, @@ -1661,6 +1748,7 @@ mod tests { test_function!( CharacterLength, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), i32, Int32, @@ -1680,6 +1768,7 @@ mod tests { test_function!( Chr, &[lit(ScalarValue::Int64(Some(128175)))], + execution_props, Ok(Some("💯")), &str, Utf8, @@ -1688,6 +1777,7 @@ mod tests { test_function!( Chr, &[lit(ScalarValue::Int64(None))], + execution_props, Ok(None), &str, Utf8, @@ -1696,6 +1786,7 @@ mod tests { test_function!( Chr, &[lit(ScalarValue::Int64(Some(120)))], + execution_props, Ok(Some("x")), &str, Utf8, @@ -1704,6 +1795,7 @@ mod tests { test_function!( Chr, &[lit(ScalarValue::Int64(Some(128175)))], + execution_props, Ok(Some("💯")), &str, Utf8, @@ -1712,6 +1804,7 @@ mod tests { test_function!( Chr, &[lit(ScalarValue::Int64(None))], + execution_props, Ok(None), &str, Utf8, @@ -1720,6 +1813,7 @@ mod tests { test_function!( Chr, &[lit(ScalarValue::Int64(Some(0)))], + execution_props, Err(DataFusionError::Execution( "null character not permitted.".to_string(), )), @@ -1730,6 +1824,7 @@ mod tests { test_function!( Chr, &[lit(ScalarValue::Int64(Some(i64::MAX)))], + execution_props, Err(DataFusionError::Execution( "requested character too large for encoding.".to_string(), )), @@ -1744,6 +1839,7 @@ mod tests { lit(ScalarValue::Utf8(Some("bb".to_string()))), lit(ScalarValue::Utf8(Some("cc".to_string()))), ], + execution_props, Ok(Some("aabbcc")), &str, Utf8, @@ -1756,6 +1852,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Utf8(Some("cc".to_string()))), ], + execution_props, Ok(Some("aacc")), &str, Utf8, @@ -1764,6 +1861,7 @@ mod tests { test_function!( Concat, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(Some("")), &str, Utf8, @@ -1777,6 +1875,7 @@ mod tests { lit(ScalarValue::Utf8(Some("bb".to_string()))), lit(ScalarValue::Utf8(Some("cc".to_string()))), ], + execution_props, Ok(Some("aa|bb|cc")), &str, Utf8, @@ -1788,6 +1887,7 @@ mod tests { lit(ScalarValue::Utf8(Some("|".to_string()))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -1801,6 +1901,7 @@ mod tests { lit(ScalarValue::Utf8(Some("bb".to_string()))), lit(ScalarValue::Utf8(Some("cc".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -1814,6 +1915,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Utf8(Some("cc".to_string()))), ], + execution_props, Ok(Some("aa|cc")), &str, Utf8, @@ -1822,6 +1924,7 @@ mod tests { test_function!( Exp, &[lit(ScalarValue::Int32(Some(1)))], + execution_props, Ok(Some((1.0_f64).exp())), f64, Float64, @@ -1830,6 +1933,7 @@ mod tests { test_function!( Exp, &[lit(ScalarValue::UInt32(Some(1)))], + execution_props, Ok(Some((1.0_f64).exp())), f64, Float64, @@ -1838,6 +1942,7 @@ mod tests { test_function!( Exp, &[lit(ScalarValue::UInt64(Some(1)))], + execution_props, Ok(Some((1.0_f64).exp())), f64, Float64, @@ -1846,6 +1951,7 @@ mod tests { test_function!( Exp, &[lit(ScalarValue::Float64(Some(1.0)))], + execution_props, Ok(Some((1.0_f64).exp())), f64, Float64, @@ -1854,6 +1960,7 @@ mod tests { test_function!( Exp, &[lit(ScalarValue::Float32(Some(1.0)))], + execution_props, Ok(Some((1.0_f32).exp() as f64)), f64, Float64, @@ -1862,6 +1969,7 @@ mod tests { test_function!( InitCap, &[lit(ScalarValue::Utf8(Some("hi THOMAS".to_string())))], + execution_props, Ok(Some("Hi Thomas")), &str, Utf8, @@ -1870,6 +1978,7 @@ mod tests { test_function!( InitCap, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some("")), &str, Utf8, @@ -1878,6 +1987,7 @@ mod tests { test_function!( InitCap, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some("")), &str, Utf8, @@ -1886,6 +1996,7 @@ mod tests { test_function!( InitCap, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &str, Utf8, @@ -1898,6 +2009,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int8(Some(2))), ], + execution_props, Ok(Some("ab")), &str, Utf8, @@ -1910,6 +2022,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(200))), ], + execution_props, Ok(Some("abcde")), &str, Utf8, @@ -1922,6 +2035,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(-2))), ], + execution_props, Ok(Some("abc")), &str, Utf8, @@ -1934,6 +2048,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(-200))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -1946,6 +2061,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(0))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -1958,6 +2074,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Int64(Some(2))), ], + execution_props, Ok(None), &str, Utf8, @@ -1970,6 +2087,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -1982,6 +2100,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(Some("joséé")), &str, Utf8, @@ -1994,6 +2113,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Int64(Some(-3))), ], + execution_props, Ok(Some("joséé")), &str, Utf8, @@ -2020,6 +2140,7 @@ mod tests { lit(ScalarValue::Utf8(Some("josé".to_string()))), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(Some(" josé")), &str, Utf8, @@ -2032,6 +2153,7 @@ mod tests { lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(Some(" hi")), &str, Utf8, @@ -2044,6 +2166,7 @@ mod tests { lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(Some(0))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -2056,6 +2179,7 @@ mod tests { lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -2068,6 +2192,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(None), &str, Utf8, @@ -2081,6 +2206,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(Some("xyxhi")), &str, Utf8, @@ -2094,6 +2220,7 @@ mod tests { lit(ScalarValue::Int64(Some(21))), lit(ScalarValue::Utf8(Some("abcdef".to_string()))), ], + execution_props, Ok(Some("abcdefabcdefabcdefahi")), &str, Utf8, @@ -2107,6 +2234,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some(" ".to_string()))), ], + execution_props, Ok(Some(" hi")), &str, Utf8, @@ -2120,6 +2248,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some("".to_string()))), ], + execution_props, Ok(Some("hi")), &str, Utf8, @@ -2133,6 +2262,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -2146,6 +2276,7 @@ mod tests { lit(ScalarValue::Int64(None)), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -2159,6 +2290,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -2172,6 +2304,7 @@ mod tests { lit(ScalarValue::Int64(Some(10))), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(Some("xyxyxyjosé")), &str, Utf8, @@ -2185,6 +2318,7 @@ mod tests { lit(ScalarValue::Int64(Some(10))), lit(ScalarValue::Utf8(Some("éñ".to_string()))), ], + execution_props, Ok(Some("éñéñéñjosé")), &str, Utf8, @@ -2207,6 +2341,7 @@ mod tests { test_function!( Ltrim, &[lit(ScalarValue::Utf8(Some(" trim".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -2215,6 +2350,7 @@ mod tests { test_function!( Ltrim, &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))], + execution_props, Ok(Some("trim ")), &str, Utf8, @@ -2223,6 +2359,7 @@ mod tests { test_function!( Ltrim, &[lit(ScalarValue::Utf8(Some("trim ".to_string())))], + execution_props, Ok(Some("trim ")), &str, Utf8, @@ -2231,6 +2368,7 @@ mod tests { test_function!( Ltrim, &[lit(ScalarValue::Utf8(Some("trim".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -2239,6 +2377,7 @@ mod tests { test_function!( Ltrim, &[lit(ScalarValue::Utf8(Some("\n trim ".to_string())))], + execution_props, Ok(Some("\n trim ")), &str, Utf8, @@ -2247,6 +2386,7 @@ mod tests { test_function!( Ltrim, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &str, Utf8, @@ -2256,6 +2396,7 @@ mod tests { test_function!( MD5, &[lit(ScalarValue::Utf8(Some("tom".to_string())))], + execution_props, Ok(Some("34b7da764b21d298ef307d04d8152dc5")), &str, Utf8, @@ -2265,6 +2406,7 @@ mod tests { test_function!( MD5, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some("d41d8cd98f00b204e9800998ecf8427e")), &str, Utf8, @@ -2274,6 +2416,7 @@ mod tests { test_function!( MD5, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &str, Utf8, @@ -2293,6 +2436,7 @@ mod tests { test_function!( OctetLength, &[lit(ScalarValue::Utf8(Some("chars".to_string())))], + execution_props, Ok(Some(5)), i32, Int32, @@ -2301,6 +2445,7 @@ mod tests { test_function!( OctetLength, &[lit(ScalarValue::Utf8(Some("josé".to_string())))], + execution_props, Ok(Some(5)), i32, Int32, @@ -2309,6 +2454,7 @@ mod tests { test_function!( OctetLength, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(0)), i32, Int32, @@ -2317,6 +2463,7 @@ mod tests { test_function!( OctetLength, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), i32, Int32, @@ -2330,6 +2477,7 @@ mod tests { lit(ScalarValue::Utf8(Some(".[mN]a.".to_string()))), lit(ScalarValue::Utf8(Some("M".to_string()))), ], + execution_props, Ok(Some("ThM")), &str, Utf8, @@ -2343,6 +2491,7 @@ mod tests { lit(ScalarValue::Utf8(Some("b..".to_string()))), lit(ScalarValue::Utf8(Some("X".to_string()))), ], + execution_props, Ok(Some("fooXbaz")), &str, Utf8, @@ -2357,6 +2506,7 @@ mod tests { lit(ScalarValue::Utf8(Some("X".to_string()))), lit(ScalarValue::Utf8(Some("g".to_string()))), ], + execution_props, Ok(Some("fooXX")), &str, Utf8, @@ -2371,6 +2521,7 @@ mod tests { lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))), lit(ScalarValue::Utf8(Some("g".to_string()))), ], + execution_props, Ok(Some("fooXarYXazY")), &str, Utf8, @@ -2385,6 +2536,7 @@ mod tests { lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))), lit(ScalarValue::Utf8(Some("g".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -2399,6 +2551,7 @@ mod tests { lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))), lit(ScalarValue::Utf8(Some("g".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -2413,6 +2566,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Utf8(Some("g".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -2427,6 +2581,7 @@ mod tests { lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -2441,6 +2596,7 @@ mod tests { lit(ScalarValue::Utf8(Some("X".to_string()))), lit(ScalarValue::Utf8(Some("gi".to_string()))), ], + execution_props, Ok(Some("XXX")), &str, Utf8, @@ -2455,6 +2611,7 @@ mod tests { lit(ScalarValue::Utf8(Some("X".to_string()))), lit(ScalarValue::Utf8(Some("i".to_string()))), ], + execution_props, Ok(Some("XabcABC")), &str, Utf8, @@ -2481,6 +2638,7 @@ mod tests { lit(ScalarValue::Utf8(Some("Pg".to_string()))), lit(ScalarValue::Int64(Some(4))), ], + execution_props, Ok(Some("PgPgPgPg")), &str, Utf8, @@ -2492,6 +2650,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Int64(Some(4))), ], + execution_props, Ok(None), &str, Utf8, @@ -2503,6 +2662,7 @@ mod tests { lit(ScalarValue::Utf8(Some("Pg".to_string()))), lit(ScalarValue::Int64(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -2512,6 +2672,7 @@ mod tests { test_function!( Reverse, &[lit(ScalarValue::Utf8(Some("abcde".to_string())))], + execution_props, Ok(Some("edcba")), &str, Utf8, @@ -2521,6 +2682,7 @@ mod tests { test_function!( Reverse, &[lit(ScalarValue::Utf8(Some("loẅks".to_string())))], + execution_props, Ok(Some("skẅol")), &str, Utf8, @@ -2530,6 +2692,7 @@ mod tests { test_function!( Reverse, &[lit(ScalarValue::Utf8(Some("loẅks".to_string())))], + execution_props, Ok(Some("skẅol")), &str, Utf8, @@ -2539,6 +2702,7 @@ mod tests { test_function!( Reverse, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &str, Utf8, @@ -2562,6 +2726,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int8(Some(2))), ], + execution_props, Ok(Some("de")), &str, Utf8, @@ -2574,6 +2739,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(200))), ], + execution_props, Ok(Some("abcde")), &str, Utf8, @@ -2586,6 +2752,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(-2))), ], + execution_props, Ok(Some("cde")), &str, Utf8, @@ -2598,6 +2765,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(-200))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -2610,6 +2778,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(Some(0))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -2622,6 +2791,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Int64(Some(2))), ], + execution_props, Ok(None), &str, Utf8, @@ -2634,6 +2804,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abcde".to_string()))), lit(ScalarValue::Int64(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -2646,6 +2817,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(Some("éésoj")), &str, Utf8, @@ -2658,6 +2830,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Int64(Some(-3))), ], + execution_props, Ok(Some("éésoj")), &str, Utf8, @@ -2684,6 +2857,7 @@ mod tests { lit(ScalarValue::Utf8(Some("josé".to_string()))), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(Some("josé ")), &str, Utf8, @@ -2696,6 +2870,7 @@ mod tests { lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(Some("hi ")), &str, Utf8, @@ -2708,6 +2883,7 @@ mod tests { lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(Some(0))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -2720,6 +2896,7 @@ mod tests { lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -2732,6 +2909,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(None), &str, Utf8, @@ -2745,6 +2923,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(Some("hixyx")), &str, Utf8, @@ -2758,6 +2937,7 @@ mod tests { lit(ScalarValue::Int64(Some(21))), lit(ScalarValue::Utf8(Some("abcdef".to_string()))), ], + execution_props, Ok(Some("hiabcdefabcdefabcdefa")), &str, Utf8, @@ -2771,6 +2951,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some(" ".to_string()))), ], + execution_props, Ok(Some("hi ")), &str, Utf8, @@ -2784,6 +2965,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some("".to_string()))), ], + execution_props, Ok(Some("hi")), &str, Utf8, @@ -2797,6 +2979,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -2810,6 +2993,7 @@ mod tests { lit(ScalarValue::Int64(None)), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -2823,6 +3007,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -2836,6 +3021,7 @@ mod tests { lit(ScalarValue::Int64(Some(10))), lit(ScalarValue::Utf8(Some("xy".to_string()))), ], + execution_props, Ok(Some("joséxyxyxy")), &str, Utf8, @@ -2849,6 +3035,7 @@ mod tests { lit(ScalarValue::Int64(Some(10))), lit(ScalarValue::Utf8(Some("éñ".to_string()))), ], + execution_props, Ok(Some("josééñéñéñ")), &str, Utf8, @@ -2871,6 +3058,7 @@ mod tests { test_function!( Rtrim, &[lit(ScalarValue::Utf8(Some("trim ".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -2879,6 +3067,7 @@ mod tests { test_function!( Rtrim, &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))], + execution_props, Ok(Some(" trim")), &str, Utf8, @@ -2887,6 +3076,7 @@ mod tests { test_function!( Rtrim, &[lit(ScalarValue::Utf8(Some(" trim \n".to_string())))], + execution_props, Ok(Some(" trim \n")), &str, Utf8, @@ -2895,6 +3085,7 @@ mod tests { test_function!( Rtrim, &[lit(ScalarValue::Utf8(Some(" trim".to_string())))], + execution_props, Ok(Some(" trim")), &str, Utf8, @@ -2903,6 +3094,7 @@ mod tests { test_function!( Rtrim, &[lit(ScalarValue::Utf8(Some("trim".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -2911,6 +3103,7 @@ mod tests { test_function!( Rtrim, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &str, Utf8, @@ -2920,6 +3113,7 @@ mod tests { test_function!( SHA224, &[lit(ScalarValue::Utf8(Some("tom".to_string())))], + execution_props, Ok(Some(&[ 11u8, 246u8, 203u8, 98u8, 100u8, 156u8, 66u8, 169u8, 174u8, 56u8, 118u8, 171u8, 111u8, 109u8, 146u8, 173u8, 54u8, 203u8, 84u8, 20u8, 228u8, 149u8, @@ -2933,6 +3127,7 @@ mod tests { test_function!( SHA224, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(&[ 209u8, 74u8, 2u8, 140u8, 42u8, 58u8, 43u8, 201u8, 71u8, 97u8, 2u8, 187u8, 40u8, 130u8, 52u8, 196u8, 21u8, 162u8, 176u8, 31u8, 130u8, 142u8, 166u8, @@ -2946,6 +3141,7 @@ mod tests { test_function!( SHA224, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &[u8], Binary, @@ -2966,6 +3162,7 @@ mod tests { test_function!( SHA256, &[lit(ScalarValue::Utf8(Some("tom".to_string())))], + execution_props, Ok(Some(&[ 225u8, 96u8, 143u8, 117u8, 197u8, 215u8, 129u8, 63u8, 61u8, 64u8, 49u8, 203u8, 48u8, 191u8, 183u8, 134u8, 80u8, 125u8, 152u8, 19u8, 117u8, 56u8, @@ -2979,6 +3176,7 @@ mod tests { test_function!( SHA256, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(&[ 227u8, 176u8, 196u8, 66u8, 152u8, 252u8, 28u8, 20u8, 154u8, 251u8, 244u8, 200u8, 153u8, 111u8, 185u8, 36u8, 39u8, 174u8, 65u8, 228u8, 100u8, 155u8, @@ -2992,6 +3190,7 @@ mod tests { test_function!( SHA256, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &[u8], Binary, @@ -3012,6 +3211,7 @@ mod tests { test_function!( SHA384, &[lit(ScalarValue::Utf8(Some("tom".to_string())))], + execution_props, Ok(Some(&[ 9u8, 111u8, 91u8, 104u8, 170u8, 119u8, 132u8, 142u8, 79u8, 223u8, 92u8, 28u8, 11u8, 53u8, 13u8, 226u8, 219u8, 250u8, 214u8, 15u8, 253u8, 124u8, @@ -3027,6 +3227,7 @@ mod tests { test_function!( SHA384, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(&[ 56u8, 176u8, 96u8, 167u8, 81u8, 172u8, 150u8, 56u8, 76u8, 217u8, 50u8, 126u8, 177u8, 177u8, 227u8, 106u8, 33u8, 253u8, 183u8, 17u8, 20u8, 190u8, @@ -3042,6 +3243,7 @@ mod tests { test_function!( SHA384, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &[u8], Binary, @@ -3062,6 +3264,7 @@ mod tests { test_function!( SHA512, &[lit(ScalarValue::Utf8(Some("tom".to_string())))], + execution_props, Ok(Some(&[ 110u8, 27u8, 155u8, 63u8, 232u8, 64u8, 104u8, 14u8, 55u8, 5u8, 31u8, 122u8, 213u8, 233u8, 89u8, 214u8, 243u8, 154u8, 208u8, 248u8, 136u8, @@ -3078,6 +3281,7 @@ mod tests { test_function!( SHA512, &[lit(ScalarValue::Utf8(Some("".to_string())))], + execution_props, Ok(Some(&[ 207u8, 131u8, 225u8, 53u8, 126u8, 239u8, 184u8, 189u8, 241u8, 84u8, 40u8, 80u8, 214u8, 109u8, 128u8, 7u8, 214u8, 32u8, 228u8, 5u8, 11u8, 87u8, @@ -3094,6 +3298,7 @@ mod tests { test_function!( SHA512, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &[u8], Binary, @@ -3117,6 +3322,7 @@ mod tests { lit(ScalarValue::Utf8(Some("~@~".to_string()))), lit(ScalarValue::Int64(Some(2))), ], + execution_props, Ok(Some("def")), &str, Utf8, @@ -3129,6 +3335,7 @@ mod tests { lit(ScalarValue::Utf8(Some("~@~".to_string()))), lit(ScalarValue::Int64(Some(20))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -3141,6 +3348,7 @@ mod tests { lit(ScalarValue::Utf8(Some("~@~".to_string()))), lit(ScalarValue::Int64(Some(-1))), ], + execution_props, Err(DataFusionError::Execution( "field position must be greater than zero".to_string(), )), @@ -3154,6 +3362,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Utf8(Some("alph".to_string()))), ], + execution_props, Ok(Some(true)), bool, Boolean, @@ -3165,6 +3374,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Utf8(Some("blph".to_string()))), ], + execution_props, Ok(Some(false)), bool, Boolean, @@ -3176,6 +3386,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Utf8(Some("alph".to_string()))), ], + execution_props, Ok(None), bool, Boolean, @@ -3187,6 +3398,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(None), bool, Boolean, @@ -3199,6 +3411,7 @@ mod tests { lit(ScalarValue::Utf8(Some("abc".to_string()))), lit(ScalarValue::Utf8(Some("c".to_string()))), ], + execution_props, Ok(Some(3)), i32, Int32, @@ -3211,6 +3424,7 @@ mod tests { lit(ScalarValue::Utf8(Some("josé".to_string()))), lit(ScalarValue::Utf8(Some("é".to_string()))), ], + execution_props, Ok(Some(4)), i32, Int32, @@ -3223,6 +3437,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Utf8(Some("so".to_string()))), ], + execution_props, Ok(Some(6)), i32, Int32, @@ -3235,6 +3450,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Utf8(Some("abc".to_string()))), ], + execution_props, Ok(Some(0)), i32, Int32, @@ -3247,6 +3463,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Utf8(Some("abc".to_string()))), ], + execution_props, Ok(None), i32, Int32, @@ -3259,6 +3476,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(None), i32, Int32, @@ -3285,6 +3503,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Int64(Some(0))), ], + execution_props, Ok(Some("alphabet")), &str, Utf8, @@ -3297,6 +3516,7 @@ mod tests { lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), lit(ScalarValue::Int64(Some(5))), ], + execution_props, Ok(Some("ésoj")), &str, Utf8, @@ -3309,6 +3529,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Int64(Some(1))), ], + execution_props, Ok(Some("alphabet")), &str, Utf8, @@ -3321,6 +3542,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Int64(Some(2))), ], + execution_props, Ok(Some("lphabet")), &str, Utf8, @@ -3333,6 +3555,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Int64(Some(3))), ], + execution_props, Ok(Some("phabet")), &str, Utf8, @@ -3345,6 +3568,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Int64(Some(-3))), ], + execution_props, Ok(Some("alphabet")), &str, Utf8, @@ -3357,6 +3581,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Int64(Some(30))), ], + execution_props, Ok(Some("")), &str, Utf8, @@ -3369,6 +3594,7 @@ mod tests { lit(ScalarValue::Utf8(Some("alphabet".to_string()))), lit(ScalarValue::Int64(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -3382,6 +3608,7 @@ mod tests { lit(ScalarValue::Int64(Some(3))), lit(ScalarValue::Int64(Some(2))), ], + execution_props, Ok(Some("ph")), &str, Utf8, @@ -3395,6 +3622,7 @@ mod tests { lit(ScalarValue::Int64(Some(3))), lit(ScalarValue::Int64(Some(20))), ], + execution_props, Ok(Some("phabet")), &str, Utf8, @@ -3408,6 +3636,7 @@ mod tests { lit(ScalarValue::Int64(None)), lit(ScalarValue::Int64(Some(20))), ], + execution_props, Ok(None), &str, Utf8, @@ -3421,6 +3650,7 @@ mod tests { lit(ScalarValue::Int64(Some(3))), lit(ScalarValue::Int64(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -3434,6 +3664,7 @@ mod tests { lit(ScalarValue::Int64(Some(1))), lit(ScalarValue::Int64(Some(-1))), ], + execution_props, Err(DataFusionError::Execution( "negative substring length not allowed".to_string(), )), @@ -3449,6 +3680,7 @@ mod tests { lit(ScalarValue::Int64(Some(5))), lit(ScalarValue::Int64(Some(2))), ], + execution_props, Ok(Some("és")), &str, Utf8, @@ -3476,6 +3708,7 @@ mod tests { lit(ScalarValue::Utf8(Some("143".to_string()))), lit(ScalarValue::Utf8(Some("ax".to_string()))), ], + execution_props, Ok(Some("a2x5")), &str, Utf8, @@ -3489,6 +3722,7 @@ mod tests { lit(ScalarValue::Utf8(Some("143".to_string()))), lit(ScalarValue::Utf8(Some("ax".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -3502,6 +3736,7 @@ mod tests { lit(ScalarValue::Utf8(None)), lit(ScalarValue::Utf8(Some("ax".to_string()))), ], + execution_props, Ok(None), &str, Utf8, @@ -3515,6 +3750,7 @@ mod tests { lit(ScalarValue::Utf8(Some("143".to_string()))), lit(ScalarValue::Utf8(None)), ], + execution_props, Ok(None), &str, Utf8, @@ -3528,6 +3764,7 @@ mod tests { lit(ScalarValue::Utf8(Some("éñí".to_string()))), lit(ScalarValue::Utf8(Some("óü".to_string()))), ], + execution_props, Ok(Some("ó2ü5")), &str, Utf8, @@ -3551,6 +3788,7 @@ mod tests { test_function!( Trim, &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -3559,6 +3797,7 @@ mod tests { test_function!( Trim, &[lit(ScalarValue::Utf8(Some("trim ".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -3567,6 +3806,7 @@ mod tests { test_function!( Trim, &[lit(ScalarValue::Utf8(Some(" trim".to_string())))], + execution_props, Ok(Some("trim")), &str, Utf8, @@ -3575,6 +3815,7 @@ mod tests { test_function!( Trim, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &str, Utf8, @@ -3583,6 +3824,7 @@ mod tests { test_function!( Upper, &[lit(ScalarValue::Utf8(Some("upper".to_string())))], + execution_props, Ok(Some("UPPER")), &str, Utf8, @@ -3591,6 +3833,7 @@ mod tests { test_function!( Upper, &[lit(ScalarValue::Utf8(Some("UPPER".to_string())))], + execution_props, Ok(Some("UPPER")), &str, Utf8, @@ -3599,6 +3842,7 @@ mod tests { test_function!( Upper, &[lit(ScalarValue::Utf8(None))], + execution_props, Ok(None), &str, Utf8, @@ -3633,11 +3877,13 @@ mod tests { Field::new("b", value2.data_type().clone(), false), ]); let columns: Vec = vec![value1, value2]; + let execution_props = ExecutionProps::new(); let expr = create_physical_expr( &BuiltinScalarFunction::Array, &[col("a"), col("b")], &schema, + &execution_props, )?; // type is correct @@ -3702,6 +3948,7 @@ mod tests { &BuiltinScalarFunction::RegexpMatch, &[col("a"), pattern], &schema, + &ExecutionProps::new(), )?; // type is correct @@ -3739,6 +3986,7 @@ mod tests { &BuiltinScalarFunction::RegexpMatch, &[col_value, pattern], &schema, + &ExecutionProps::new(), )?; // type is correct diff --git a/datafusion/src/physical_plan/math_expressions.rs b/datafusion/src/physical_plan/math_expressions.rs index 382a15f8ccf6..308ea56748ac 100644 --- a/datafusion/src/physical_plan/math_expressions.rs +++ b/datafusion/src/physical_plan/math_expressions.rs @@ -23,6 +23,7 @@ use arrow::datatypes::{DataType, ToByteSlice}; use super::{ColumnarValue, ScalarValue}; use crate::error::{DataFusionError, Result}; +use crate::execution::context::ExecutionProps; macro_rules! compute_op { ($ARRAY:expr, $FUNC:ident, $TYPE:ident) => {{ @@ -93,7 +94,10 @@ macro_rules! unary_primitive_array_op { macro_rules! math_unary_function { ($NAME:expr, $FUNC:ident) => { /// mathematical function that accepts f32 or f64 and returns f64 - pub fn $FUNC(args: &[ColumnarValue]) -> Result { + pub fn $FUNC( + args: &[ColumnarValue], + _: &ExecutionProps, + ) -> Result { unary_primitive_array_op!(&args[0], $NAME, $FUNC) } }; diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index c1f608bbde93..307580a2b179 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -554,7 +554,12 @@ impl DefaultPhysicalPlanner { .iter() .map(|e| self.create_physical_expr(e, input_schema, ctx_state)) .collect::>>()?; - functions::create_physical_expr(fun, &physical_args, input_schema) + functions::create_physical_expr( + fun, + &physical_args, + input_schema, + &ctx_state.execution_props, + ) } Expr::ScalarUDF { fun, args } => { let mut physical_args = vec![]; diff --git a/datafusion/src/physical_plan/regex_expressions.rs b/datafusion/src/physical_plan/regex_expressions.rs index b526e7259ef6..5fd6a88fccb0 100644 --- a/datafusion/src/physical_plan/regex_expressions.rs +++ b/datafusion/src/physical_plan/regex_expressions.rs @@ -25,6 +25,7 @@ use std::any::type_name; use std::sync::Arc; use crate::error::{DataFusionError, Result}; +use crate::execution::context::ExecutionProps; use arrow::array::{ArrayRef, GenericStringArray, StringOffsetSizeTrait}; use arrow::compute; use hashbrown::HashMap; @@ -45,7 +46,10 @@ macro_rules! downcast_string_arg { } /// extract a specific group from a string column, using a regular expression -pub fn regexp_match(args: &[ArrayRef]) -> Result { +pub fn regexp_match( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { match args.len() { 2 => compute::regexp_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T), None) .map_err(DataFusionError::ArrowError), @@ -72,7 +76,10 @@ fn regex_replace_posix_groups(replacement: &str) -> String { /// Replaces substring(s) matching a POSIX regular expression. /// /// example: `regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'` -pub fn regexp_replace(args: &[ArrayRef]) -> Result { +pub fn regexp_replace( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { // creating Regex is expensive so create hashmap for memoization let mut patterns: HashMap = HashMap::new(); diff --git a/datafusion/src/physical_plan/string_expressions.rs b/datafusion/src/physical_plan/string_expressions.rs index 882fe30502fd..eeed816dd27d 100644 --- a/datafusion/src/physical_plan/string_expressions.rs +++ b/datafusion/src/physical_plan/string_expressions.rs @@ -37,6 +37,7 @@ use arrow::{ }; use super::ColumnarValue; +use crate::execution::context::ExecutionProps; macro_rules! downcast_string_arg { ($ARG:expr, $NAME:expr, $T:ident) => {{ @@ -174,7 +175,10 @@ where /// Returns the numeric code of the first character of the argument. /// ascii('x') = 120 -pub fn ascii(args: &[ArrayRef]) -> Result { +pub fn ascii( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let result = string_array @@ -192,7 +196,10 @@ pub fn ascii(args: &[ArrayRef]) -> Result { /// Removes the longest string containing only characters in characters (a space by default) from the start and end of string. /// btrim('xyxtrimyyx', 'xyz') = 'trim' -pub fn btrim(args: &[ArrayRef]) -> Result { +pub fn btrim( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { match args.len() { 1 => { let string_array = downcast_string_arg!(args[0], "string", T); @@ -240,7 +247,7 @@ pub fn btrim(args: &[ArrayRef]) -> Result { /// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character. /// chr(65) = 'A' -pub fn chr(args: &[ArrayRef]) -> Result { +pub fn chr(args: &[ArrayRef], _: &ExecutionProps) -> Result { let integer_array = downcast_arg!(args[0], "integer", Int64Array); // first map is the iterator, second is for the `Option<_>` @@ -271,7 +278,7 @@ pub fn chr(args: &[ArrayRef]) -> Result { /// Concatenates the text representations of all the arguments. NULL arguments are ignored. /// concat('abcde', 2, NULL, 22) = 'abcde222' -pub fn concat(args: &[ColumnarValue]) -> Result { +pub fn concat(args: &[ColumnarValue], _: &ExecutionProps) -> Result { // do not accept 0 arguments. if args.is_empty() { return Err(DataFusionError::Internal(format!( @@ -331,7 +338,7 @@ pub fn concat(args: &[ColumnarValue]) -> Result { /// Concatenates all but the first argument, with separators. The first argument is used as the separator string, and should not be NULL. Other NULL arguments are ignored. /// concat_ws(',', 'abcde', 2, NULL, 22) = 'abcde,2,22' -pub fn concat_ws(args: &[ArrayRef]) -> Result { +pub fn concat_ws(args: &[ArrayRef], _: &ExecutionProps) -> Result { // downcast all arguments to strings let args = downcast_vec!(args, StringArray).collect::>>()?; @@ -370,7 +377,10 @@ pub fn concat_ws(args: &[ArrayRef]) -> Result { /// Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. /// initcap('hi THOMAS') = 'Hi Thomas' -pub fn initcap(args: &[ArrayRef]) -> Result { +pub fn initcap( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); // first map is the iterator, second is for the `Option<_>` @@ -400,13 +410,16 @@ pub fn initcap(args: &[ArrayRef]) -> Result /// Converts the string to all lower case. /// lower('TOM') = 'tom' -pub fn lower(args: &[ColumnarValue]) -> Result { +pub fn lower(args: &[ColumnarValue], _: &ExecutionProps) -> Result { handle(args, |string| string.to_ascii_lowercase(), "lower") } /// Removes the longest string containing only characters in characters (a space by default) from the start of string. /// ltrim('zzzytest', 'xyz') = 'test' -pub fn ltrim(args: &[ArrayRef]) -> Result { +pub fn ltrim( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { match args.len() { 1 => { let string_array = downcast_string_arg!(args[0], "string", T); @@ -445,7 +458,10 @@ pub fn ltrim(args: &[ArrayRef]) -> Result { /// Repeats string the specified number of times. /// repeat('Pg', 4) = 'PgPgPgPg' -pub fn repeat(args: &[ArrayRef]) -> Result { +pub fn repeat( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let number_array = downcast_arg!(args[1], "number", Int64Array); @@ -463,7 +479,10 @@ pub fn repeat(args: &[ArrayRef]) -> Result { /// Replaces all occurrences in string of substring from with substring to. /// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef' -pub fn replace(args: &[ArrayRef]) -> Result { +pub fn replace( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let from_array = downcast_string_arg!(args[1], "from", T); let to_array = downcast_string_arg!(args[2], "to", T); @@ -483,7 +502,10 @@ pub fn replace(args: &[ArrayRef]) -> Result /// Removes the longest string containing only characters in characters (a space by default) from the end of string. /// rtrim('testxxzx', 'xyz') = 'test' -pub fn rtrim(args: &[ArrayRef]) -> Result { +pub fn rtrim( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { match args.len() { 1 => { let string_array = downcast_string_arg!(args[0], "string", T); @@ -522,7 +544,10 @@ pub fn rtrim(args: &[ArrayRef]) -> Result { /// Splits string at occurrences of delimiter and returns the n'th field (counting from one). /// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def' -pub fn split_part(args: &[ArrayRef]) -> Result { +pub fn split_part( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let delimiter_array = downcast_string_arg!(args[1], "delimiter", T); let n_array = downcast_arg!(args[2], "n", Int64Array); @@ -554,7 +579,10 @@ pub fn split_part(args: &[ArrayRef]) -> Result(args: &[ArrayRef]) -> Result { +pub fn starts_with( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let prefix_array = downcast_string_arg!(args[1], "prefix", T); @@ -572,7 +600,10 @@ pub fn starts_with(args: &[ArrayRef]) -> Result(args: &[ArrayRef]) -> Result +pub fn to_hex( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result where T::Native: StringOffsetSizeTrait, { @@ -590,6 +621,6 @@ where /// Converts the string to all upper case. /// upper('tom') = 'TOM' -pub fn upper(args: &[ColumnarValue]) -> Result { +pub fn upper(args: &[ColumnarValue], _: &ExecutionProps) -> Result { handle(args, |string| string.to_ascii_uppercase(), "upper") } diff --git a/datafusion/src/physical_plan/udf.rs b/datafusion/src/physical_plan/udf.rs index 9189da47bd6f..b14b1f5ded84 100644 --- a/datafusion/src/physical_plan/udf.rs +++ b/datafusion/src/physical_plan/udf.rs @@ -31,6 +31,7 @@ use super::{ }, type_coercion::coerce, }; +use crate::execution::context::ExecutionProps; use std::sync::Arc; /// Logical representation of a UDF. @@ -108,5 +109,6 @@ pub fn create_physical_expr( fun.fun.clone(), args, (fun.return_type)(&arg_types)?.as_ref(), + &ExecutionProps::new(), ))) } diff --git a/datafusion/src/physical_plan/unicode_expressions.rs b/datafusion/src/physical_plan/unicode_expressions.rs index 787ea7ea2673..bf7bcdca42d8 100644 --- a/datafusion/src/physical_plan/unicode_expressions.rs +++ b/datafusion/src/physical_plan/unicode_expressions.rs @@ -26,6 +26,7 @@ use std::cmp::Ordering; use std::sync::Arc; use crate::error::{DataFusionError, Result}; +use crate::execution::context::ExecutionProps; use arrow::{ array::{ ArrayRef, GenericStringArray, Int64Array, PrimitiveArray, StringOffsetSizeTrait, @@ -63,7 +64,10 @@ macro_rules! downcast_arg { /// Returns number of characters in the string. /// character_length('josé') = 4 -pub fn character_length(args: &[ArrayRef]) -> Result +pub fn character_length( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result where T::Native: StringOffsetSizeTrait, { @@ -90,7 +94,10 @@ where /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters. /// left('abcde', 2) = 'ab' -pub fn left(args: &[ArrayRef]) -> Result { +pub fn left( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let n_array = downcast_arg!(args[1], "n", Int64Array); @@ -124,7 +131,10 @@ pub fn left(args: &[ArrayRef]) -> Result { /// Extends the string to length 'length' by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right). /// lpad('hi', 5, 'xy') = 'xyxhi' -pub fn lpad(args: &[ArrayRef]) -> Result { +pub fn lpad( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { match args.len() { 2 => { let string_array = downcast_string_arg!(args[0], "string", T); @@ -213,7 +223,10 @@ pub fn lpad(args: &[ArrayRef]) -> Result { /// Reverses the order of the characters in the string. /// reverse('abcde') = 'edcba' -pub fn reverse(args: &[ArrayRef]) -> Result { +pub fn reverse( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let result = string_array @@ -228,7 +241,10 @@ pub fn reverse(args: &[ArrayRef]) -> Result /// Returns last n characters in the string, or when n is negative, returns all but first |n| characters. /// right('abcde', 2) = 'de' -pub fn right(args: &[ArrayRef]) -> Result { +pub fn right( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let n_array = downcast_arg!(args[1], "n", Int64Array); @@ -276,7 +292,10 @@ pub fn right(args: &[ArrayRef]) -> Result { /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated. /// rpad('hi', 5, 'xy') = 'hixyx' -pub fn rpad(args: &[ArrayRef]) -> Result { +pub fn rpad( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { match args.len() { 2 => { let string_array = downcast_string_arg!(args[0], "string", T); @@ -353,7 +372,10 @@ pub fn rpad(args: &[ArrayRef]) -> Result { /// Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.) /// strpos('high', 'ig') = 2 -pub fn strpos(args: &[ArrayRef]) -> Result +pub fn strpos( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result where T::Native: StringOffsetSizeTrait, { @@ -412,7 +434,10 @@ where /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).) /// substr('alphabet', 3) = 'phabet' /// substr('alphabet', 3, 2) = 'ph' -pub fn substr(args: &[ArrayRef]) -> Result { +pub fn substr( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { match args.len() { 2 => { let string_array = downcast_string_arg!(args[0], "string", T); @@ -489,7 +514,10 @@ pub fn substr(args: &[ArrayRef]) -> Result { /// Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted. /// translate('12345', '143', 'ax') = 'a2x5' -pub fn translate(args: &[ArrayRef]) -> Result { +pub fn translate( + args: &[ArrayRef], + _: &ExecutionProps, +) -> Result { let string_array = downcast_string_arg!(args[0], "string", T); let from_array = downcast_string_arg!(args[1], "from", T); let to_array = downcast_string_arg!(args[2], "to", T); diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index a40d0becdcb4..d02d17550373 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -2714,7 +2714,7 @@ mod tests { fn get_function_meta(&self, name: &str) -> Option> { let f: ScalarFunctionImplementation = - Arc::new(|_| Err(DataFusionError::NotImplemented("".to_string()))); + Arc::new(|_, _| Err(DataFusionError::NotImplemented("".to_string()))); match name { "my_sqrt" => Some(Arc::new(create_udf( "my_sqrt", diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index eaa5915ae202..2c9d5020b460 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -31,7 +31,7 @@ use arrow::{ util::display::array_value_to_string, }; -use datafusion::execution::context::ExecutionContext; +use datafusion::execution::context::{ExecutionContext, ExecutionProps}; use datafusion::logical_plan::LogicalPlan; use datafusion::prelude::create_udf; use datafusion::{ @@ -589,7 +589,7 @@ fn create_ctx() -> Result { Ok(ctx) } -fn custom_sqrt(args: &[ColumnarValue]) -> Result { +fn custom_sqrt(args: &[ColumnarValue], _: &ExecutionProps) -> Result { let arg = &args[0]; if let ColumnarValue::Array(v) = arg { let input = v @@ -2739,7 +2739,7 @@ async fn test_cast_expressions() -> Result<()> { } #[tokio::test] -async fn test_timestamp_expressions() -> Result<()> { +async fn test_current_timestamp_expressions() -> Result<()> { let t1 = chrono::Utc::now().timestamp(); let mut ctx = ExecutionContext::new(); let actual = execute(&mut ctx, "SELECT NOW(), NOW() as t2").await; @@ -2756,6 +2756,35 @@ async fn test_timestamp_expressions() -> Result<()> { Ok(()) } +#[tokio::test] +async fn test_current_timestamp_expressions_non_optimized() -> Result<()> { + let t1 = chrono::Utc::now().timestamp(); + let ctx = ExecutionContext::new(); + let sql = "SELECT NOW(), NOW() as t2"; + + let msg = format!("Creating logical plan for '{}'", sql); + let plan = ctx.create_logical_plan(sql).expect(&msg); + + let msg = format!("Creating physical plan for '{}': {:?}", sql, plan); + let plan = ctx.create_physical_plan(&plan).expect(&msg); + + let msg = format!("Executing physical plan for '{}': {:?}", sql, plan); + let res = collect(plan).await.expect(&msg); + let actual = result_vec(&res); + + let res1 = actual[0][0].as_str(); + let res2 = actual[0][1].as_str(); + let t3 = chrono::Utc::now().timestamp(); + let t2_naive = + chrono::NaiveDateTime::parse_from_str(res1, "%Y-%m-%d %H:%M:%S%.6f").unwrap(); + + let t2 = t2_naive.timestamp(); + assert!(t1 <= t2 && t2 <= t3); + assert_eq!(res2, res1); + + Ok(()) +} + #[tokio::test] async fn test_cast_expressions_error() -> Result<()> { // sin(utf8) should error