Skip to content

Commit

Permalink
Move starts_with, to_hex, trim, upper to datafusion-functions…
Browse files Browse the repository at this point in the history
… (and add string_expressions) (apache#9541)

* [task apache#9539] Move starts_with, to_hex, trim, upper to datafusion-functions

Signed-off-by: tangruilin <[email protected]>

* Export expr_fn, restore tests

* fix comments

---------

Signed-off-by: tangruilin <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
Tangruilin and alamb authored Mar 20, 2024
1 parent b72d25c commit 1d8a41b
Show file tree
Hide file tree
Showing 17 changed files with 720 additions and 306 deletions.
57 changes: 14 additions & 43 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,20 +147,12 @@ pub enum BuiltinScalarFunction {
Rtrim,
/// split_part
SplitPart,
/// starts_with
StartsWith,
/// strpos
Strpos,
/// substr
Substr,
/// to_hex
ToHex,
/// translate
Translate,
/// trim
Trim,
/// upper
Upper,
/// uuid
Uuid,
/// overlay
Expand Down Expand Up @@ -276,13 +268,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rpad => Volatility::Immutable,
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
BuiltinScalarFunction::Strpos => Volatility::Immutable,
BuiltinScalarFunction::Substr => Volatility::Immutable,
BuiltinScalarFunction::ToHex => Volatility::Immutable,
BuiltinScalarFunction::Translate => Volatility::Immutable,
BuiltinScalarFunction::Trim => Volatility::Immutable,
BuiltinScalarFunction::Upper => Volatility::Immutable,
BuiltinScalarFunction::OverLay => Volatility::Immutable,
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
Expand Down Expand Up @@ -365,20 +353,13 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::SplitPart => {
utf8_to_str_type(&input_expr_types[0], "split_part")
}
BuiltinScalarFunction::StartsWith => Ok(Boolean),
BuiltinScalarFunction::EndsWith => Ok(Boolean),
BuiltinScalarFunction::Strpos => {
utf8_to_int_type(&input_expr_types[0], "strpos/instr/position")
}
BuiltinScalarFunction::Substr => {
utf8_to_str_type(&input_expr_types[0], "substr")
}
BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] {
Int8 | Int16 | Int32 | Int64 => Utf8,
_ => {
return plan_err!("The to_hex function can only accept integers.");
}
}),
BuiltinScalarFunction::SubstrIndex => {
utf8_to_str_type(&input_expr_types[0], "substr_index")
}
Expand All @@ -388,10 +369,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Translate => {
utf8_to_str_type(&input_expr_types[0], "translate")
}
BuiltinScalarFunction::Trim => utf8_to_str_type(&input_expr_types[0], "trim"),
BuiltinScalarFunction::Upper => {
utf8_to_str_type(&input_expr_types[0], "upper")
}

BuiltinScalarFunction::Factorial
| BuiltinScalarFunction::Gcd
Expand Down Expand Up @@ -476,18 +453,16 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::InitCap
| BuiltinScalarFunction::Lower
| BuiltinScalarFunction::OctetLength
| BuiltinScalarFunction::Reverse
| BuiltinScalarFunction::Upper => {
| BuiltinScalarFunction::Reverse => {
Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility())
}
BuiltinScalarFunction::Btrim
| BuiltinScalarFunction::Ltrim
| BuiltinScalarFunction::Rtrim
| BuiltinScalarFunction::Trim => Signature::one_of(
| BuiltinScalarFunction::Rtrim => Signature::one_of(
vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
self.volatility(),
),
BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
BuiltinScalarFunction::Chr => {
Signature::uniform(1, vec![Int64], self.volatility())
}
BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
Expand Down Expand Up @@ -519,17 +494,17 @@ impl BuiltinScalarFunction {
self.volatility(),
),

BuiltinScalarFunction::EndsWith
| BuiltinScalarFunction::Strpos
| BuiltinScalarFunction::StartsWith => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8]),
Exact(vec![Utf8, LargeUtf8]),
Exact(vec![LargeUtf8, Utf8]),
Exact(vec![LargeUtf8, LargeUtf8]),
],
self.volatility(),
),
BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => {
Signature::one_of(
vec![
Exact(vec![Utf8, Utf8]),
Exact(vec![Utf8, LargeUtf8]),
Exact(vec![LargeUtf8, Utf8]),
Exact(vec![LargeUtf8, LargeUtf8]),
],
self.volatility(),
)
}

BuiltinScalarFunction::Substr => Signature::one_of(
vec![
Expand Down Expand Up @@ -749,13 +724,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rpad => &["rpad"],
BuiltinScalarFunction::Rtrim => &["rtrim"],
BuiltinScalarFunction::SplitPart => &["split_part"],
BuiltinScalarFunction::StartsWith => &["starts_with"],
BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
BuiltinScalarFunction::Substr => &["substr"],
BuiltinScalarFunction::ToHex => &["to_hex"],
BuiltinScalarFunction::Translate => &["translate"],
BuiltinScalarFunction::Trim => &["trim"],
BuiltinScalarFunction::Upper => &["upper"],
BuiltinScalarFunction::Uuid => &["uuid"],
BuiltinScalarFunction::Levenshtein => &["levenshtein"],
BuiltinScalarFunction::SubstrIndex => &["substr_index", "substring_index"],
Expand Down
18 changes: 0 additions & 18 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of number");
scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number");
scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`");
scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument");
scalar_expr!(
ToHex,
to_hex,
num,
"returns the hexdecimal representation of an integer"
);
scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");

Expand Down Expand Up @@ -630,19 +624,11 @@ scalar_expr!(
"removes all characters, spaces by default, from the end of a string"
);
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index.");
scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`");
scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`");
scalar_expr!(Substr, substr, string position, "substring from the `position` to the end");
scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters");
scalar_expr!(Translate, translate, string from to, "replaces the characters in `from` with the counterpart in `to`");
scalar_expr!(
Trim,
trim,
string,
"removes all characters, space by default from the string"
);
scalar_expr!(Upper, upper, string, "converts the string to upper case");
//use vec as parameter
nary_scalar_expr!(
Lpad,
Expand Down Expand Up @@ -1117,15 +1103,11 @@ mod test {
test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
test_scalar_expr!(Rtrim, rtrim, string);
test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
test_scalar_expr!(StartsWith, starts_with, string, characters);
test_scalar_expr!(EndsWith, ends_with, string, characters);
test_scalar_expr!(Strpos, strpos, string, substring);
test_scalar_expr!(Substr, substr, string, position);
test_scalar_expr!(Substr, substring, string, position, count);
test_scalar_expr!(ToHex, to_hex, string);
test_scalar_expr!(Translate, translate, string, from, to);
test_scalar_expr!(Trim, trim, string);
test_scalar_expr!(Upper, upper, string);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
Expand Down
3 changes: 3 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ authors = { workspace = true }
rust-version = { workspace = true }

[features]
# enable string functions
string_expressions = []
# enable core functions
core_expressions = []
# enable datetime functions
Expand All @@ -41,6 +43,7 @@ default = [
"math_expressions",
"regex_expressions",
"crypto_expressions",
"string_expressions",
]
# enable encode/decode functions
encoding_expressions = ["base64", "hex"]
Expand Down
9 changes: 8 additions & 1 deletion datafusion/functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ use log::debug;
#[macro_use]
pub mod macros;

#[cfg(feature = "string_expressions")]
pub mod string;
make_stub_package!(string, "string_expressions");

/// Core datafusion expressions
/// Enabled via feature flag `core_expressions`
#[cfg(feature = "core_expressions")]
Expand Down Expand Up @@ -134,6 +138,8 @@ pub mod expr_fn {
pub use super::math::expr_fn::*;
#[cfg(feature = "regex_expressions")]
pub use super::regex::expr_fn::*;
#[cfg(feature = "string_expressions")]
pub use super::string::expr_fn::*;
}

/// Registers all enabled packages with a [`FunctionRegistry`]
Expand All @@ -144,7 +150,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
.chain(encoding::functions())
.chain(math::functions())
.chain(regex::functions())
.chain(crypto::functions());
.chain(crypto::functions())
.chain(string::functions());

all_functions.try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
Expand Down
Loading

0 comments on commit 1d8a41b

Please sign in to comment.