From 1d8a41bc8e08b56e90d6f8e6ef20e39a126987e4 Mon Sep 17 00:00:00 2001 From: "Reilly.tang" Date: Thu, 21 Mar 2024 07:57:05 +0800 Subject: [PATCH] Move `starts_with`, `to_hex`,` trim`, `upper` to datafusion-functions (and add string_expressions) (#9541) * [task #9539] Move starts_with, to_hex, trim, upper to datafusion-functions Signed-off-by: tangruilin * Export expr_fn, restore tests * fix comments --------- Signed-off-by: tangruilin Co-authored-by: Andrew Lamb --- datafusion/expr/src/built_in_function.rs | 57 +--- datafusion/expr/src/expr_fn.rs | 18 -- datafusion/functions/Cargo.toml | 3 + datafusion/functions/src/lib.rs | 9 +- datafusion/functions/src/string/mod.rs | 292 ++++++++++++++++++ .../functions/src/string/starts_with.rs | 89 ++++++ datafusion/functions/src/string/to_hex.rs | 155 ++++++++++ datafusion/functions/src/string/trim.rs | 78 +++++ datafusion/functions/src/string/upper.rs | 66 ++++ datafusion/physical-expr/src/functions.rs | 118 ------- .../physical-expr/src/string_expressions.rs | 77 +---- datafusion/proto/proto/datafusion.proto | 8 +- datafusion/proto/src/generated/pbjson.rs | 12 - datafusion/proto/src/generated/prost.rs | 16 +- .../proto/src/logical_plan/from_proto.rs | 22 +- datafusion/proto/src/logical_plan/to_proto.rs | 4 - datafusion/sql/src/expr/mod.rs | 2 +- 17 files changed, 720 insertions(+), 306 deletions(-) create mode 100644 datafusion/functions/src/string/mod.rs create mode 100644 datafusion/functions/src/string/starts_with.rs create mode 100644 datafusion/functions/src/string/to_hex.rs create mode 100644 datafusion/functions/src/string/trim.rs create mode 100644 datafusion/functions/src/string/upper.rs diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 79cd6a24ce39..fffe2cf4c9c9 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -147,20 +147,12 @@ pub enum BuiltinScalarFunction { Rtrim, /// split_part SplitPart, - /// starts_with - StartsWith, /// strpos Strpos, /// substr Substr, - /// to_hex - ToHex, /// translate Translate, - /// trim - Trim, - /// upper - Upper, /// uuid Uuid, /// overlay @@ -276,13 +268,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Rpad => Volatility::Immutable, BuiltinScalarFunction::Rtrim => Volatility::Immutable, BuiltinScalarFunction::SplitPart => Volatility::Immutable, - BuiltinScalarFunction::StartsWith => Volatility::Immutable, BuiltinScalarFunction::Strpos => Volatility::Immutable, BuiltinScalarFunction::Substr => Volatility::Immutable, - BuiltinScalarFunction::ToHex => Volatility::Immutable, BuiltinScalarFunction::Translate => Volatility::Immutable, - BuiltinScalarFunction::Trim => Volatility::Immutable, - BuiltinScalarFunction::Upper => Volatility::Immutable, BuiltinScalarFunction::OverLay => Volatility::Immutable, BuiltinScalarFunction::Levenshtein => Volatility::Immutable, BuiltinScalarFunction::SubstrIndex => Volatility::Immutable, @@ -365,7 +353,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::SplitPart => { utf8_to_str_type(&input_expr_types[0], "split_part") } - BuiltinScalarFunction::StartsWith => Ok(Boolean), BuiltinScalarFunction::EndsWith => Ok(Boolean), BuiltinScalarFunction::Strpos => { utf8_to_int_type(&input_expr_types[0], "strpos/instr/position") @@ -373,12 +360,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Substr => { utf8_to_str_type(&input_expr_types[0], "substr") } - BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] { - Int8 | Int16 | Int32 | Int64 => Utf8, - _ => { - return plan_err!("The to_hex function can only accept integers."); - } - }), BuiltinScalarFunction::SubstrIndex => { utf8_to_str_type(&input_expr_types[0], "substr_index") } @@ -388,10 +369,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Translate => { utf8_to_str_type(&input_expr_types[0], "translate") } - BuiltinScalarFunction::Trim => utf8_to_str_type(&input_expr_types[0], "trim"), - BuiltinScalarFunction::Upper => { - utf8_to_str_type(&input_expr_types[0], "upper") - } BuiltinScalarFunction::Factorial | BuiltinScalarFunction::Gcd @@ -476,18 +453,16 @@ impl BuiltinScalarFunction { | BuiltinScalarFunction::InitCap | BuiltinScalarFunction::Lower | BuiltinScalarFunction::OctetLength - | BuiltinScalarFunction::Reverse - | BuiltinScalarFunction::Upper => { + | BuiltinScalarFunction::Reverse => { Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility()) } BuiltinScalarFunction::Btrim | BuiltinScalarFunction::Ltrim - | BuiltinScalarFunction::Rtrim - | BuiltinScalarFunction::Trim => Signature::one_of( + | BuiltinScalarFunction::Rtrim => Signature::one_of( vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])], self.volatility(), ), - BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => { + BuiltinScalarFunction::Chr => { Signature::uniform(1, vec![Int64], self.volatility()) } BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => { @@ -519,17 +494,17 @@ impl BuiltinScalarFunction { self.volatility(), ), - BuiltinScalarFunction::EndsWith - | BuiltinScalarFunction::Strpos - | BuiltinScalarFunction::StartsWith => Signature::one_of( - vec![ - Exact(vec![Utf8, Utf8]), - Exact(vec![Utf8, LargeUtf8]), - Exact(vec![LargeUtf8, Utf8]), - Exact(vec![LargeUtf8, LargeUtf8]), - ], - self.volatility(), - ), + BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => { + Signature::one_of( + vec![ + Exact(vec![Utf8, Utf8]), + Exact(vec![Utf8, LargeUtf8]), + Exact(vec![LargeUtf8, Utf8]), + Exact(vec![LargeUtf8, LargeUtf8]), + ], + self.volatility(), + ) + } BuiltinScalarFunction::Substr => Signature::one_of( vec![ @@ -749,13 +724,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Rpad => &["rpad"], BuiltinScalarFunction::Rtrim => &["rtrim"], BuiltinScalarFunction::SplitPart => &["split_part"], - BuiltinScalarFunction::StartsWith => &["starts_with"], BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"], BuiltinScalarFunction::Substr => &["substr"], - BuiltinScalarFunction::ToHex => &["to_hex"], BuiltinScalarFunction::Translate => &["translate"], - BuiltinScalarFunction::Trim => &["trim"], - BuiltinScalarFunction::Upper => &["upper"], BuiltinScalarFunction::Uuid => &["uuid"], BuiltinScalarFunction::Levenshtein => &["levenshtein"], BuiltinScalarFunction::SubstrIndex => &["substr_index", "substring_index"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index b76164a1c83c..8667f631c507 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of number"); scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number"); scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`"); scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument"); -scalar_expr!( - ToHex, - to_hex, - num, - "returns the hexdecimal representation of an integer" -); scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value"); scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`"); @@ -630,19 +624,11 @@ scalar_expr!( "removes all characters, spaces by default, from the end of a string" ); scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index."); -scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`"); scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`"); scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`"); scalar_expr!(Substr, substr, string position, "substring from the `position` to the end"); scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters"); scalar_expr!(Translate, translate, string from to, "replaces the characters in `from` with the counterpart in `to`"); -scalar_expr!( - Trim, - trim, - string, - "removes all characters, space by default from the string" -); -scalar_expr!(Upper, upper, string, "converts the string to upper case"); //use vec as parameter nary_scalar_expr!( Lpad, @@ -1117,15 +1103,11 @@ mod test { test_nary_scalar_expr!(Rpad, rpad, string, count, characters); test_scalar_expr!(Rtrim, rtrim, string); test_scalar_expr!(SplitPart, split_part, expr, delimiter, index); - test_scalar_expr!(StartsWith, starts_with, string, characters); test_scalar_expr!(EndsWith, ends_with, string, characters); test_scalar_expr!(Strpos, strpos, string, substring); test_scalar_expr!(Substr, substr, string, position); test_scalar_expr!(Substr, substring, string, position, count); - test_scalar_expr!(ToHex, to_hex, string); test_scalar_expr!(Translate, translate, string, from, to); - test_scalar_expr!(Trim, trim, string); - test_scalar_expr!(Upper, upper, string); test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len); test_nary_scalar_expr!(OverLay, overlay, string, characters, position); test_scalar_expr!(Levenshtein, levenshtein, string1, string2); diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 5a6da5345d7c..b12c99e84a90 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -29,6 +29,8 @@ authors = { workspace = true } rust-version = { workspace = true } [features] +# enable string functions +string_expressions = [] # enable core functions core_expressions = [] # enable datetime functions @@ -41,6 +43,7 @@ default = [ "math_expressions", "regex_expressions", "crypto_expressions", + "string_expressions", ] # enable encode/decode functions encoding_expressions = ["base64", "hex"] diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs index 3a2eab8e5f05..f469b343e144 100644 --- a/datafusion/functions/src/lib.rs +++ b/datafusion/functions/src/lib.rs @@ -84,6 +84,10 @@ use log::debug; #[macro_use] pub mod macros; +#[cfg(feature = "string_expressions")] +pub mod string; +make_stub_package!(string, "string_expressions"); + /// Core datafusion expressions /// Enabled via feature flag `core_expressions` #[cfg(feature = "core_expressions")] @@ -134,6 +138,8 @@ pub mod expr_fn { pub use super::math::expr_fn::*; #[cfg(feature = "regex_expressions")] pub use super::regex::expr_fn::*; + #[cfg(feature = "string_expressions")] + pub use super::string::expr_fn::*; } /// Registers all enabled packages with a [`FunctionRegistry`] @@ -144,7 +150,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { .chain(encoding::functions()) .chain(math::functions()) .chain(regex::functions()) - .chain(crypto::functions()); + .chain(crypto::functions()) + .chain(string::functions()); all_functions.try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs new file mode 100644 index 000000000000..08fcbb363bbc --- /dev/null +++ b/datafusion/functions/src/string/mod.rs @@ -0,0 +1,292 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::{ + array::{Array, ArrayRef, GenericStringArray, OffsetSizeTrait}, + datatypes::DataType, +}; +use datafusion_common::{ + cast::as_generic_string_array, exec_err, plan_err, Result, ScalarValue, +}; +use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; +use datafusion_physical_expr::functions::Hint; +use std::{ + fmt::{Display, Formatter}, + sync::Arc, +}; + +/// Creates a function to identify the optimal return type of a string function given +/// the type of its first argument. +/// +/// If the input type is `LargeUtf8` or `LargeBinary` the return type is +/// `$largeUtf8Type`, +/// +/// If the input type is `Utf8` or `Binary` the return type is `$utf8Type`, +macro_rules! get_optimal_return_type { + ($FUNC:ident, $largeUtf8Type:expr, $utf8Type:expr) => { + fn $FUNC(arg_type: &DataType, name: &str) -> Result { + Ok(match arg_type { + // LargeBinary inputs are automatically coerced to Utf8 + DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type, + // Binary inputs are automatically coerced to Utf8 + DataType::Utf8 | DataType::Binary => $utf8Type, + DataType::Null => DataType::Null, + DataType::Dictionary(_, value_type) => match **value_type { + DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type, + DataType::Utf8 | DataType::Binary => $utf8Type, + DataType::Null => DataType::Null, + _ => { + return plan_err!( + "The {} function can only accept strings, but got {:?}.", + name.to_uppercase(), + **value_type + ); + } + }, + data_type => { + return plan_err!( + "The {} function can only accept strings, but got {:?}.", + name.to_uppercase(), + data_type + ); + } + }) + } + }; +} + +// `utf8_to_str_type`: returns either a Utf8 or LargeUtf8 based on the input type size. +get_optimal_return_type!(utf8_to_str_type, DataType::LargeUtf8, DataType::Utf8); + +/// applies a unary expression to `args[0]` that is expected to be downcastable to +/// a `GenericStringArray` and returns a `GenericStringArray` (which may have a different offset) +/// # Errors +/// This function errors when: +/// * the number of arguments is not 1 +/// * the first argument is not castable to a `GenericStringArray` +pub(crate) fn unary_string_function<'a, T, O, F, R>( + args: &[&'a dyn Array], + op: F, + name: &str, +) -> Result> +where + R: AsRef, + O: OffsetSizeTrait, + T: OffsetSizeTrait, + F: Fn(&'a str) -> R, +{ + if args.len() != 1 { + return exec_err!( + "{:?} args were supplied but {} takes exactly one argument", + args.len(), + name + ); + } + + let string_array = as_generic_string_array::(args[0])?; + + // first map is the iterator, second is for the `Option<_>` + Ok(string_array.iter().map(|string| string.map(&op)).collect()) +} + +fn handle<'a, F, R>(args: &'a [ColumnarValue], op: F, name: &str) -> Result +where + R: AsRef, + F: Fn(&'a str) -> R, +{ + match &args[0] { + ColumnarValue::Array(a) => match a.data_type() { + DataType::Utf8 => { + Ok(ColumnarValue::Array(Arc::new(unary_string_function::< + i32, + i32, + _, + _, + >( + &[a.as_ref()], op, name + )?))) + } + DataType::LargeUtf8 => { + Ok(ColumnarValue::Array(Arc::new(unary_string_function::< + i64, + i64, + _, + _, + >( + &[a.as_ref()], op, name + )?))) + } + other => exec_err!("Unsupported data type {other:?} for function {name}"), + }, + ColumnarValue::Scalar(scalar) => match scalar { + ScalarValue::Utf8(a) => { + let result = a.as_ref().map(|x| (op)(x).as_ref().to_string()); + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result))) + } + ScalarValue::LargeUtf8(a) => { + let result = a.as_ref().map(|x| (op)(x).as_ref().to_string()); + Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result))) + } + other => exec_err!("Unsupported data type {other:?} for function {name}"), + }, + } +} + +// TODO: mode allow[(dead_code)] after move ltrim and rtrim +enum TrimType { + #[allow(dead_code)] + Left, + #[allow(dead_code)] + Right, + Both, +} + +impl Display for TrimType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + TrimType::Left => write!(f, "ltrim"), + TrimType::Right => write!(f, "rtrim"), + TrimType::Both => write!(f, "btrim"), + } + } +} + +fn general_trim( + args: &[ArrayRef], + trim_type: TrimType, +) -> Result { + let func = match trim_type { + TrimType::Left => |input, pattern: &str| { + let pattern = pattern.chars().collect::>(); + str::trim_start_matches::<&[char]>(input, pattern.as_ref()) + }, + TrimType::Right => |input, pattern: &str| { + let pattern = pattern.chars().collect::>(); + str::trim_end_matches::<&[char]>(input, pattern.as_ref()) + }, + TrimType::Both => |input, pattern: &str| { + let pattern = pattern.chars().collect::>(); + str::trim_end_matches::<&[char]>( + str::trim_start_matches::<&[char]>(input, pattern.as_ref()), + pattern.as_ref(), + ) + }, + }; + + let string_array = as_generic_string_array::(&args[0])?; + + match args.len() { + 1 => { + let result = string_array + .iter() + .map(|string| string.map(|string: &str| func(string, " "))) + .collect::>(); + + Ok(Arc::new(result) as ArrayRef) + } + 2 => { + let characters_array = as_generic_string_array::(&args[1])?; + + let result = string_array + .iter() + .zip(characters_array.iter()) + .map(|(string, characters)| match (string, characters) { + (Some(string), Some(characters)) => Some(func(string, characters)), + _ => None, + }) + .collect::>(); + + Ok(Arc::new(result) as ArrayRef) + } + other => { + exec_err!( + "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2." + ) + } + } +} + +pub(super) fn make_scalar_function( + inner: F, + hints: Vec, +) -> ScalarFunctionImplementation +where + F: Fn(&[ArrayRef]) -> Result + Sync + Send + 'static, +{ + Arc::new(move |args: &[ColumnarValue]| { + // first, identify if any of the arguments is an Array. If yes, store its `len`, + // as any scalar will need to be converted to an array of len `len`. + let len = args + .iter() + .fold(Option::::None, |acc, arg| match arg { + ColumnarValue::Scalar(_) => acc, + ColumnarValue::Array(a) => Some(a.len()), + }); + + let is_scalar = len.is_none(); + + let inferred_length = len.unwrap_or(1); + let args = args + .iter() + .zip(hints.iter().chain(std::iter::repeat(&Hint::Pad))) + .map(|(arg, hint)| { + // Decide on the length to expand this scalar to depending + // on the given hints. + let expansion_len = match hint { + Hint::AcceptsSingular => 1, + Hint::Pad => inferred_length, + }; + arg.clone().into_array(expansion_len) + }) + .collect::>>()?; + + let result = (inner)(&args); + if is_scalar { + // If all inputs are scalar, keeps output as scalar + let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); + result.map(ColumnarValue::Scalar) + } else { + result.map(ColumnarValue::Array) + } + }) +} + +mod starts_with; +mod to_hex; +mod trim; +mod upper; +// create UDFs +make_udf_function!(starts_with::StartsWithFunc, STARTS_WITH, starts_with); +make_udf_function!(to_hex::ToHexFunc, TO_HEX, to_hex); +make_udf_function!(trim::TrimFunc, TRIM, trim); +make_udf_function!(upper::UpperFunc, UPPER, upper); + +export_functions!( + ( + starts_with, + arg1 arg2, + "Returns true if string starts with prefix."), + ( + to_hex, + arg1, + "Converts an integer to a hexadecimal string."), + (trim, + arg1, + "removes all characters, space by default from the string"), + (upper, + arg1, + "Converts a string to uppercase.")); diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs new file mode 100644 index 000000000000..1fce399d1e70 --- /dev/null +++ b/datafusion/functions/src/string/starts_with.rs @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, OffsetSizeTrait}; +use arrow::datatypes::DataType; +use datafusion_common::{cast::as_generic_string_array, internal_err, Result}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +use crate::string::make_scalar_function; + +/// Returns true if string starts with prefix. +/// starts_with('alphabet', 'alph') = 't' +pub fn starts_with(args: &[ArrayRef]) -> Result { + let left = as_generic_string_array::(&args[0])?; + let right = as_generic_string_array::(&args[1])?; + + let result = arrow::compute::kernels::comparison::starts_with(left, right)?; + + Ok(Arc::new(result) as ArrayRef) +} + +#[derive(Debug)] +pub(super) struct StartsWithFunc { + signature: Signature, +} +impl StartsWithFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Utf8, Utf8]), + Exact(vec![Utf8, LargeUtf8]), + Exact(vec![LargeUtf8, Utf8]), + Exact(vec![LargeUtf8, LargeUtf8]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for StartsWithFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "starts_with" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + use DataType::*; + + Ok(Boolean) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Utf8 => make_scalar_function(starts_with::, vec![])(args), + DataType::LargeUtf8 => { + return make_scalar_function(starts_with::, vec![])(args); + } + _ => internal_err!("Unsupported data type"), + } + } +} diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs new file mode 100644 index 000000000000..4dfc84887da2 --- /dev/null +++ b/datafusion/functions/src/string/to_hex.rs @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; +use arrow::datatypes::{ + ArrowNativeType, ArrowPrimitiveType, DataType, Int32Type, Int64Type, +}; +use datafusion_common::cast::as_primitive_array; +use datafusion_common::Result; +use datafusion_common::{exec_err, plan_err}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +use super::make_scalar_function; + +/// Converts the number to its equivalent hexadecimal representation. +/// to_hex(2147483647) = '7fffffff' +pub fn to_hex(args: &[ArrayRef]) -> Result +where + T::Native: OffsetSizeTrait, +{ + let integer_array = as_primitive_array::(&args[0])?; + + let result = integer_array + .iter() + .map(|integer| { + if let Some(value) = integer { + if let Some(value_usize) = value.to_usize() { + Ok(Some(format!("{value_usize:x}"))) + } else if let Some(value_isize) = value.to_isize() { + Ok(Some(format!("{value_isize:x}"))) + } else { + exec_err!("Unsupported data type {integer:?} for function to_hex") + } + } else { + Ok(None) + } + }) + .collect::>>()?; + + Ok(Arc::new(result) as ArrayRef) +} + +#[derive(Debug)] +pub(super) struct ToHexFunc { + signature: Signature, +} +impl ToHexFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::uniform(1, vec![Int64], Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for ToHexFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "to_hex" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + use DataType::*; + + Ok(match arg_types[0] { + Int8 | Int16 | Int32 | Int64 => Utf8, + _ => { + return plan_err!("The to_hex function can only accept integers."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Int32 => make_scalar_function(to_hex::, vec![])(args), + DataType::Int64 => make_scalar_function(to_hex::, vec![])(args), + other => exec_err!("Unsupported data type {other:?} for function to_hex"), + } + } +} + +#[cfg(test)] +mod tests { + use arrow::{ + array::{Int32Array, StringArray}, + datatypes::Int32Type, + }; + + use datafusion_common::cast::as_string_array; + + use super::*; + + #[test] + // Test to_hex function for zero + fn to_hex_zero() -> Result<()> { + let array = vec![0].into_iter().collect::(); + let array_ref = Arc::new(array); + let hex_value_arc = to_hex::(&[array_ref])?; + let hex_value = as_string_array(&hex_value_arc)?; + let expected = StringArray::from(vec![Some("0")]); + assert_eq!(&expected, hex_value); + + Ok(()) + } + + #[test] + // Test to_hex function for positive number + fn to_hex_positive_number() -> Result<()> { + let array = vec![100].into_iter().collect::(); + let array_ref = Arc::new(array); + let hex_value_arc = to_hex::(&[array_ref])?; + let hex_value = as_string_array(&hex_value_arc)?; + let expected = StringArray::from(vec![Some("64")]); + assert_eq!(&expected, hex_value); + + Ok(()) + } + + #[test] + // Test to_hex function for negative number + fn to_hex_negative_number() -> Result<()> { + let array = vec![-1].into_iter().collect::(); + let array_ref = Arc::new(array); + let hex_value_arc = to_hex::(&[array_ref])?; + let hex_value = as_string_array(&hex_value_arc)?; + let expected = StringArray::from(vec![Some("ffffffffffffffff")]); + assert_eq!(&expected, hex_value); + + Ok(()) + } +} diff --git a/datafusion/functions/src/string/trim.rs b/datafusion/functions/src/string/trim.rs new file mode 100644 index 000000000000..e04a171722e3 --- /dev/null +++ b/datafusion/functions/src/string/trim.rs @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, OffsetSizeTrait}; +use arrow::datatypes::DataType; +use datafusion_common::exec_err; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; + +use crate::string::{make_scalar_function, utf8_to_str_type}; + +use super::{general_trim, TrimType}; + +/// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed. +/// btrim('xyxtrimyyx', 'xyz') = 'trim' +pub fn btrim(args: &[ArrayRef]) -> Result { + general_trim::(args, TrimType::Both) +} + +#[derive(Debug)] +pub(super) struct TrimFunc { + signature: Signature, +} + +impl TrimFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for TrimFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "trim" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_str_type(&arg_types[0], "trim") + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Utf8 => make_scalar_function(btrim::, vec![])(args), + DataType::LargeUtf8 => make_scalar_function(btrim::, vec![])(args), + other => exec_err!("Unsupported data type {other:?} for function trim"), + } + } +} diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs new file mode 100644 index 000000000000..ed41487699aa --- /dev/null +++ b/datafusion/functions/src/string/upper.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::datatypes::DataType; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; + +use crate::string::utf8_to_str_type; + +use super::handle; + +#[derive(Debug)] +pub(super) struct UpperFunc { + signature: Signature, +} + +impl UpperFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::uniform( + 1, + vec![Utf8, LargeUtf8], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for UpperFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "upper" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_str_type(&arg_types[0], "upper") + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + handle(args, |string| string.to_uppercase(), "upper") + } +} diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index e76e7f56dc95..f2c93c3ec1dd 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -447,17 +447,6 @@ pub fn create_physical_fun( exec_err!("Unsupported data type {other:?} for function split_part") } }), - BuiltinScalarFunction::StartsWith => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::starts_with::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::starts_with::)(args) - } - other => { - exec_err!("Unsupported data type {other:?} for function starts_with") - } - }), BuiltinScalarFunction::EndsWith => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { make_scalar_function_inner(string_expressions::ends_with::)(args) @@ -497,15 +486,6 @@ pub fn create_physical_fun( } other => exec_err!("Unsupported data type {other:?} for function substr"), }), - BuiltinScalarFunction::ToHex => Arc::new(|args| match args[0].data_type() { - DataType::Int32 => { - make_scalar_function_inner(string_expressions::to_hex::)(args) - } - DataType::Int64 => { - make_scalar_function_inner(string_expressions::to_hex::)(args) - } - other => exec_err!("Unsupported data type {other:?} for function to_hex"), - }), BuiltinScalarFunction::Translate => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { let func = invoke_if_unicode_expressions_feature_flag!( @@ -527,16 +507,6 @@ pub fn create_physical_fun( exec_err!("Unsupported data type {other:?} for function translate") } }), - BuiltinScalarFunction::Trim => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::btrim::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::btrim::)(args) - } - other => exec_err!("Unsupported data type {other:?} for function trim"), - }), - BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper), BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid), BuiltinScalarFunction::OverLay => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { @@ -1797,38 +1767,6 @@ mod tests { Utf8, StringArray ); - test_function!( - StartsWith, - &[lit("alphabet"), lit("alph"),], - Ok(Some(true)), - bool, - Boolean, - BooleanArray - ); - test_function!( - StartsWith, - &[lit("alphabet"), lit("blph"),], - Ok(Some(false)), - bool, - Boolean, - BooleanArray - ); - test_function!( - StartsWith, - &[lit(ScalarValue::Utf8(None)), lit("alph"),], - Ok(None), - bool, - Boolean, - BooleanArray - ); - test_function!( - StartsWith, - &[lit("alphabet"), lit(ScalarValue::Utf8(None)),], - Ok(None), - bool, - Boolean, - BooleanArray - ); test_function!( EndsWith, &[lit("alphabet"), lit("alph"),], @@ -2149,62 +2087,6 @@ mod tests { Utf8, StringArray ); - test_function!( - Trim, - &[lit(" trim ")], - Ok(Some("trim")), - &str, - Utf8, - StringArray - ); - test_function!( - Trim, - &[lit("trim ")], - Ok(Some("trim")), - &str, - Utf8, - StringArray - ); - test_function!( - Trim, - &[lit(" trim")], - Ok(Some("trim")), - &str, - Utf8, - StringArray - ); - test_function!( - Trim, - &[lit(ScalarValue::Utf8(None))], - Ok(None), - &str, - Utf8, - StringArray - ); - test_function!( - Upper, - &[lit("upper")], - Ok(Some("UPPER")), - &str, - Utf8, - StringArray - ); - test_function!( - Upper, - &[lit("UPPER")], - Ok(Some("UPPER")), - &str, - Utf8, - StringArray - ); - test_function!( - Upper, - &[lit(ScalarValue::Utf8(None))], - Ok(None), - &str, - Utf8, - StringArray - ); Ok(()) } diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs index ace7ef2888a3..86c0092a220d 100644 --- a/datafusion/physical-expr/src/string_expressions.rs +++ b/datafusion/physical-expr/src/string_expressions.rs @@ -32,16 +32,14 @@ use arrow::{ Array, ArrayRef, GenericStringArray, Int32Array, Int64Array, OffsetSizeTrait, StringArray, }, - datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType}, + datatypes::DataType, }; use uuid::Uuid; use datafusion_common::utils::datafusion_strsim; use datafusion_common::Result; use datafusion_common::{ - cast::{ - as_generic_string_array, as_int64_array, as_primitive_array, as_string_array, - }, + cast::{as_generic_string_array, as_int64_array, as_string_array}, exec_err, ScalarValue, }; use datafusion_expr::ColumnarValue; @@ -526,34 +524,6 @@ pub fn ends_with(args: &[ArrayRef]) -> Result { Ok(Arc::new(result) as ArrayRef) } -/// Converts the number to its equivalent hexadecimal representation. -/// to_hex(2147483647) = '7fffffff' -pub fn to_hex(args: &[ArrayRef]) -> Result -where - T::Native: OffsetSizeTrait, -{ - let integer_array = as_primitive_array::(&args[0])?; - - let result = integer_array - .iter() - .map(|integer| { - if let Some(value) = integer { - if let Some(value_usize) = value.to_usize() { - Ok(Some(format!("{value_usize:x}"))) - } else if let Some(value_isize) = value.to_isize() { - Ok(Some(format!("{value_isize:x}"))) - } else { - exec_err!("Unsupported data type {integer:?} for function to_hex") - } - } else { - Ok(None) - } - }) - .collect::>>()?; - - Ok(Arc::new(result) as ArrayRef) -} - /// Converts the string to all upper case. /// upper('tom') = 'TOM' pub fn upper(args: &[ColumnarValue]) -> Result { @@ -709,54 +679,13 @@ pub fn levenshtein(args: &[ArrayRef]) -> Result { #[cfg(test)] mod tests { - use arrow::{array::Int32Array, datatypes::Int32Type}; + use arrow::array::Int32Array; use arrow_array::Int64Array; use datafusion_common::cast::as_int32_array; - use crate::string_expressions; - use super::*; - #[test] - // Test to_hex function for zero - fn to_hex_zero() -> Result<()> { - let array = vec![0].into_iter().collect::(); - let array_ref = Arc::new(array); - let hex_value_arc = string_expressions::to_hex::(&[array_ref])?; - let hex_value = as_string_array(&hex_value_arc)?; - let expected = StringArray::from(vec![Some("0")]); - assert_eq!(&expected, hex_value); - - Ok(()) - } - - #[test] - // Test to_hex function for positive number - fn to_hex_positive_number() -> Result<()> { - let array = vec![100].into_iter().collect::(); - let array_ref = Arc::new(array); - let hex_value_arc = string_expressions::to_hex::(&[array_ref])?; - let hex_value = as_string_array(&hex_value_arc)?; - let expected = StringArray::from(vec![Some("64")]); - assert_eq!(&expected, hex_value); - - Ok(()) - } - - #[test] - // Test to_hex function for negative number - fn to_hex_negative_number() -> Result<()> { - let array = vec![-1].into_iter().collect::(); - let array_ref = Arc::new(array); - let hex_value_arc = string_expressions::to_hex::(&[array_ref])?; - let hex_value = as_string_array(&hex_value_arc)?; - let expected = StringArray::from(vec![Some("ffffffffffffffff")]); - assert_eq!(&expected, hex_value); - - Ok(()) - } - #[test] fn to_overlay() -> Result<()> { let string = diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 10f79a2b8cc8..c009682d5a4d 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -592,18 +592,18 @@ enum ScalarFunction { // 48 was SHA384 // 49 was SHA512 SplitPart = 50; - StartsWith = 51; + // StartsWith = 51; Strpos = 52; Substr = 53; - ToHex = 54; + // ToHex = 54; // 55 was ToTimestamp // 56 was ToTimestampMillis // 57 was ToTimestampMicros // 58 was ToTimestampSeconds // 59 was Now Translate = 60; - Trim = 61; - Upper = 62; + // Trim = 61; + // Upper = 62; Coalesce = 63; Power = 64; // 65 was StructFun diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 7757a64ef359..58683dba6dff 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22949,13 +22949,9 @@ impl serde::Serialize for ScalarFunction { Self::Rpad => "Rpad", Self::Rtrim => "Rtrim", Self::SplitPart => "SplitPart", - Self::StartsWith => "StartsWith", Self::Strpos => "Strpos", Self::Substr => "Substr", - Self::ToHex => "ToHex", Self::Translate => "Translate", - Self::Trim => "Trim", - Self::Upper => "Upper", Self::Coalesce => "Coalesce", Self::Power => "Power", Self::Atan2 => "Atan2", @@ -23027,13 +23023,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Rpad", "Rtrim", "SplitPart", - "StartsWith", "Strpos", "Substr", - "ToHex", "Translate", - "Trim", - "Upper", "Coalesce", "Power", "Atan2", @@ -23134,13 +23126,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Rpad" => Ok(ScalarFunction::Rpad), "Rtrim" => Ok(ScalarFunction::Rtrim), "SplitPart" => Ok(ScalarFunction::SplitPart), - "StartsWith" => Ok(ScalarFunction::StartsWith), "Strpos" => Ok(ScalarFunction::Strpos), "Substr" => Ok(ScalarFunction::Substr), - "ToHex" => Ok(ScalarFunction::ToHex), "Translate" => Ok(ScalarFunction::Translate), - "Trim" => Ok(ScalarFunction::Trim), - "Upper" => Ok(ScalarFunction::Upper), "Coalesce" => Ok(ScalarFunction::Coalesce), "Power" => Ok(ScalarFunction::Power), "Atan2" => Ok(ScalarFunction::Atan2), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index ab0ddb14ebfc..8eabb3b18603 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2891,18 +2891,18 @@ pub enum ScalarFunction { /// 48 was SHA384 /// 49 was SHA512 SplitPart = 50, - StartsWith = 51, + /// StartsWith = 51; Strpos = 52, Substr = 53, - ToHex = 54, + /// ToHex = 54; /// 55 was ToTimestamp /// 56 was ToTimestampMillis /// 57 was ToTimestampMicros /// 58 was ToTimestampSeconds /// 59 was Now Translate = 60, - Trim = 61, - Upper = 62, + /// Trim = 61; + /// Upper = 62; Coalesce = 63, Power = 64, /// 65 was StructFun @@ -3022,13 +3022,9 @@ impl ScalarFunction { ScalarFunction::Rpad => "Rpad", ScalarFunction::Rtrim => "Rtrim", ScalarFunction::SplitPart => "SplitPart", - ScalarFunction::StartsWith => "StartsWith", ScalarFunction::Strpos => "Strpos", ScalarFunction::Substr => "Substr", - ScalarFunction::ToHex => "ToHex", ScalarFunction::Translate => "Translate", - ScalarFunction::Trim => "Trim", - ScalarFunction::Upper => "Upper", ScalarFunction::Coalesce => "Coalesce", ScalarFunction::Power => "Power", ScalarFunction::Atan2 => "Atan2", @@ -3094,13 +3090,9 @@ impl ScalarFunction { "Rpad" => Some(Self::Rpad), "Rtrim" => Some(Self::Rtrim), "SplitPart" => Some(Self::SplitPart), - "StartsWith" => Some(Self::StartsWith), "Strpos" => Some(Self::Strpos), "Substr" => Some(Self::Substr), - "ToHex" => Some(Self::ToHex), "Translate" => Some(Self::Translate), - "Trim" => Some(Self::Trim), - "Upper" => Some(Self::Upper), "Coalesce" => Some(Self::Coalesce), "Power" => Some(Self::Power), "Atan2" => Some(Self::Atan2), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 8581156e2bb8..64ceb37d2961 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -57,10 +57,9 @@ use datafusion_expr::{ logical_plan::{PlanType, StringifiedPlan}, lower, lpad, ltrim, nanvl, octet_length, overlay, pi, power, radians, random, repeat, replace, reverse, right, round, rpad, rtrim, signum, sin, sinh, split_part, sqrt, - starts_with, strpos, substr, substr_index, substring, to_hex, translate, trim, trunc, - upper, uuid, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, - BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, - GroupingSet, + strpos, substr, substr_index, substring, translate, trunc, uuid, AggregateFunction, + Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr, + GetFieldAccess, GetIndexedField, GroupingSet, GroupingSet::GroupingSets, JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -462,8 +461,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::OctetLength => Self::OctetLength, ScalarFunction::Concat => Self::Concat, ScalarFunction::Lower => Self::Lower, - ScalarFunction::Upper => Self::Upper, - ScalarFunction::Trim => Self::Trim, ScalarFunction::Ltrim => Self::Ltrim, ScalarFunction::Rtrim => Self::Rtrim, ScalarFunction::Log2 => Self::Log2, @@ -485,10 +482,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Right => Self::Right, ScalarFunction::Rpad => Self::Rpad, ScalarFunction::SplitPart => Self::SplitPart, - ScalarFunction::StartsWith => Self::StartsWith, ScalarFunction::Strpos => Self::Strpos, ScalarFunction::Substr => Self::Substr, - ScalarFunction::ToHex => Self::ToHex, ScalarFunction::Uuid => Self::Uuid, ScalarFunction::Translate => Self::Translate, ScalarFunction::Coalesce => Self::Coalesce, @@ -1444,10 +1439,6 @@ pub fn parse_expr( ScalarFunction::Lower => { Ok(lower(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::Upper => { - Ok(upper(parse_expr(&args[0], registry, codec)?)) - } - ScalarFunction::Trim => Ok(trim(parse_expr(&args[0], registry, codec)?)), ScalarFunction::Ltrim => { Ok(ltrim(parse_expr(&args[0], registry, codec)?)) } @@ -1532,10 +1523,6 @@ pub fn parse_expr( parse_expr(&args[1], registry, codec)?, parse_expr(&args[2], registry, codec)?, )), - ScalarFunction::StartsWith => Ok(starts_with( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), ScalarFunction::EndsWith => Ok(ends_with( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, @@ -1563,9 +1550,6 @@ pub fn parse_expr( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, )), - ScalarFunction::ToHex => { - Ok(to_hex(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::Translate => Ok(translate( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 05a29ff6d42b..89bd93550a04 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1481,8 +1481,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::OctetLength => Self::OctetLength, BuiltinScalarFunction::Concat => Self::Concat, BuiltinScalarFunction::Lower => Self::Lower, - BuiltinScalarFunction::Upper => Self::Upper, - BuiltinScalarFunction::Trim => Self::Trim, BuiltinScalarFunction::Ltrim => Self::Ltrim, BuiltinScalarFunction::Rtrim => Self::Rtrim, BuiltinScalarFunction::Log2 => Self::Log2, @@ -1505,10 +1503,8 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Right => Self::Right, BuiltinScalarFunction::Rpad => Self::Rpad, BuiltinScalarFunction::SplitPart => Self::SplitPart, - BuiltinScalarFunction::StartsWith => Self::StartsWith, BuiltinScalarFunction::Strpos => Self::Strpos, BuiltinScalarFunction::Substr => Self::Substr, - BuiltinScalarFunction::ToHex => Self::ToHex, BuiltinScalarFunction::Translate => Self::Translate, BuiltinScalarFunction::Coalesce => Self::Coalesce, BuiltinScalarFunction::Pi => Self::Pi, diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 5e9c0623a265..c34b42193cec 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -747,7 +747,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Some(TrimWhereField::Leading) => BuiltinScalarFunction::Ltrim, Some(TrimWhereField::Trailing) => BuiltinScalarFunction::Rtrim, Some(TrimWhereField::Both) => BuiltinScalarFunction::Btrim, - None => BuiltinScalarFunction::Trim, + None => BuiltinScalarFunction::Btrim, }; let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;