From da55f93b215d30b10064c140ac192acaf4c810e1 Mon Sep 17 00:00:00 2001 From: Remzi Yang <59198230+HaoYang670@users.noreply.github.com> Date: Mon, 28 Nov 2022 23:01:29 +0800 Subject: [PATCH] Clean the `expr_fn` - use `scalar_expr` to create unary scalar expr functions, remove macro `unary_scalar_functions` (#4357) * update scalar_expr Signed-off-by: remzi <13716567376yh@gmail.com> * delete unary macro Signed-off-by: remzi <13716567376yh@gmail.com> * update nary macro Signed-off-by: remzi <13716567376yh@gmail.com> * clean Signed-off-by: remzi <13716567376yh@gmail.com> * fmt Signed-off-by: remzi <13716567376yh@gmail.com> Signed-off-by: remzi <13716567376yh@gmail.com> --- datafusion/expr/src/expr_fn.rs | 297 ++++++++++++++++++--------------- 1 file changed, 164 insertions(+), 133 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 7dd28a4c4bb8..f63de79f1dbf 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -305,22 +305,9 @@ pub fn is_not_unknown(expr: Expr) -> Expr { Expr::IsNotUnknown(Box::new(expr)) } -/// Create an convenience function representing a unary scalar function -macro_rules! unary_scalar_expr { - ($ENUM:ident, $FUNC:ident, $DOC:expr) => { - #[doc = $DOC ] - pub fn $FUNC(e: Expr) -> Expr { - Expr::ScalarFunction { - fun: built_in_function::BuiltinScalarFunction::$ENUM, - args: vec![e], - } - } - }; -} - macro_rules! scalar_expr { - ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { - #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ] + ($ENUM:ident, $FUNC:ident, $($arg:ident)*, $DOC:expr) => { + #[doc = $DOC ] pub fn $FUNC($($arg: Expr),*) -> Expr { Expr::ScalarFunction { fun: built_in_function::BuiltinScalarFunction::$ENUM, @@ -331,8 +318,8 @@ macro_rules! scalar_expr { } macro_rules! nary_scalar_expr { - ($ENUM:ident, $FUNC:ident) => { - #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ] + ($ENUM:ident, $FUNC:ident, $DOC:expr) => { + #[doc = $DOC ] pub fn $FUNC(args: Vec) -> Expr { Expr::ScalarFunction { fun: built_in_function::BuiltinScalarFunction::$ENUM, @@ -345,136 +332,181 @@ macro_rules! nary_scalar_expr { // generate methods for creating the supported unary/binary expressions // math functions -unary_scalar_expr!(Sqrt, sqrt, "square root of a number"); -unary_scalar_expr!(Sin, sin, "sine"); -unary_scalar_expr!(Cos, cos, "cosine"); -unary_scalar_expr!(Tan, tan, "tangent"); -unary_scalar_expr!(Asin, asin, "inverse sine"); -unary_scalar_expr!(Acos, acos, "inverse cosine"); -unary_scalar_expr!(Atan, atan, "inverse tangent"); -unary_scalar_expr!( +scalar_expr!(Sqrt, sqrt, num, "square root of a number"); +scalar_expr!(Sin, sin, num, "sine"); +scalar_expr!(Cos, cos, num, "cosine"); +scalar_expr!(Tan, tan, num, "tangent"); +scalar_expr!(Asin, asin, num, "inverse sine"); +scalar_expr!(Acos, acos, num, "inverse cosine"); +scalar_expr!(Atan, atan, num, "inverse tangent"); +scalar_expr!( Floor, floor, + num, "nearest integer less than or equal to argument" ); -unary_scalar_expr!( +scalar_expr!( Ceil, ceil, + num, "nearest integer greater than or equal to argument" ); -unary_scalar_expr!(Round, round, "round to nearest integer"); -unary_scalar_expr!(Trunc, trunc, "truncate toward zero"); -unary_scalar_expr!(Abs, abs, "absolute value"); -unary_scalar_expr!(Signum, signum, "sign of the argument (-1, 0, +1) "); -unary_scalar_expr!(Exp, exp, "exponential"); -unary_scalar_expr!(Log2, log2, "base 2 logarithm"); -unary_scalar_expr!(Log10, log10, "base 10 logarithm"); -unary_scalar_expr!(Ln, ln, "natural logarithm"); -scalar_expr!(NullIf, nullif, arg_1, arg_2); -scalar_expr!(Power, power, base, exponent); -scalar_expr!(Atan2, atan2, y, x); +scalar_expr!(Round, round, num, "round to nearest integer"); +scalar_expr!(Trunc, trunc, num, "truncate toward zero"); +scalar_expr!(Abs, abs, num, "absolute value"); +scalar_expr!(Signum, signum, num, "sign of the argument (-1, 0, +1) "); +scalar_expr!(Exp, exp, num, "exponential"); +scalar_expr!(Log2, log2, num, "base 2 logarithm"); +scalar_expr!(Log10, log10, num, "base 10 logarithm"); +scalar_expr!(Ln, ln, num, "natural logarithm"); +scalar_expr!(NullIf, nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression."); +scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`"); +scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument"); +scalar_expr!( + ToHex, + to_hex, + num, + "returns the hexdecimal representation of an integer" +); +scalar_expr!(Uuid, uuid, , "Returns uuid v4 as a string value"); // string functions -scalar_expr!(Ascii, ascii, string); -scalar_expr!(BitLength, bit_length, string); -scalar_expr!(CharacterLength, character_length, string); -scalar_expr!(CharacterLength, length, string); -scalar_expr!(Chr, chr, string); -scalar_expr!(Digest, digest, input, algorithm); -scalar_expr!(InitCap, initcap, string); -scalar_expr!(Left, left, string, count); -scalar_expr!(Lower, lower, string); -scalar_expr!(Ltrim, ltrim, string); -scalar_expr!(MD5, md5, string); -scalar_expr!(OctetLength, octet_length, string); -scalar_expr!(Replace, replace, string, from, to); -scalar_expr!(Repeat, repeat, string, count); -scalar_expr!(Reverse, reverse, string); -scalar_expr!(Right, right, string, count); -scalar_expr!(Rtrim, rtrim, string); -scalar_expr!(SHA224, sha224, string); -scalar_expr!(SHA256, sha256, string); -scalar_expr!(SHA384, sha384, string); -scalar_expr!(SHA512, sha512, string); -scalar_expr!(SplitPart, split_part, expr, delimiter, index); -scalar_expr!(StartsWith, starts_with, string, characters); -scalar_expr!(Strpos, strpos, string, substring); -scalar_expr!(Substr, substr, string, position); -scalar_expr!(Substr, substring, string, position, count); -scalar_expr!(ToHex, to_hex, string); -scalar_expr!(Translate, translate, string, from, to); -scalar_expr!(Trim, trim, string); -scalar_expr!(Upper, upper, string); +scalar_expr!(Ascii, ascii, chr, "ASCII code value of the character"); +scalar_expr!( + BitLength, + bit_length, + string, + "the number of bits in the `string`" +); +scalar_expr!( + CharacterLength, + character_length, + string, + "the number of characters in the `string`" +); +scalar_expr!( + Chr, + chr, + code_point, + "converts the Unicode code point to a UTF8 character" +); +scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of `input`, using the `algorithm`"); +scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase"); +scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`"); +scalar_expr!(Lower, lower, string, "convert the string to lower case"); +scalar_expr!( + Ltrim, + ltrim, + string, + "removes all characters, spaces by default, from the beginning of a string" +); +scalar_expr!(MD5, md5, string, "returns the MD5 hash of a string"); +scalar_expr!( + OctetLength, + octet_length, + string, + "returns the number of bytes of a string" +); +scalar_expr!(Replace, replace, string from to, "replaces all occurrences of `from` with `to` in the `string`"); +scalar_expr!(Repeat, repeat, string n, "repeats the `string` to `n` times"); +scalar_expr!(Reverse, reverse, string, "reverses the `string`"); +scalar_expr!(Right, right, string n, "returns the last `n` characters in the `string`"); +scalar_expr!( + Rtrim, + rtrim, + string, + "removes all characters, spaces by default, from the end of a string" +); +scalar_expr!(SHA224, sha224, string, "SHA-224 hash"); +scalar_expr!(SHA256, sha256, string, "SHA-256 hash"); +scalar_expr!(SHA384, sha384, string, "SHA-384 hash"); +scalar_expr!(SHA512, sha512, string, "SHA-512 hash"); +scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index. "); +scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`"); +scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matchs the `string`"); +scalar_expr!(Substr, substr, string position, "substring from the `position` to the end"); +scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters"); +scalar_expr!(Translate, translate, string from to, "replaces the characters in `from` with the counterpart in `to`"); +scalar_expr!( + Trim, + trim, + string, + "removes all characters, space by default from the string" +); +scalar_expr!(Upper, upper, string, "converts the string to upper case"); //use vec as parameter -nary_scalar_expr!(Lpad, lpad); -nary_scalar_expr!(Rpad, rpad); -nary_scalar_expr!(RegexpReplace, regexp_replace); -nary_scalar_expr!(RegexpMatch, regexp_match); -nary_scalar_expr!(Btrim, btrim); +nary_scalar_expr!( + Lpad, + lpad, + "fill up a string to the length by prepending the characters" +); +nary_scalar_expr!( + Rpad, + rpad, + "fill up a string to the length by appending the characters" +); +nary_scalar_expr!( + RegexpReplace, + regexp_replace, + "replace strings that match a regular expression" +); +nary_scalar_expr!( + RegexpMatch, + regexp_match, + "matches a regular expression against a string and returns matched substrings." +); +nary_scalar_expr!( + Btrim, + btrim, + "removes all characters, spaces by default, from both sides of a string" +); +nary_scalar_expr!( + MakeArray, + array, + "returns an array of fixed size with each argument on it." +); +nary_scalar_expr!(Coalesce, coalesce, "returns `coalesce(args...)`, which evaluates to the value of the first [Expr] which is not NULL"); //there is a func concat_ws before, so use concat_ws_expr as name.c -nary_scalar_expr!(ConcatWithSeparator, concat_ws_expr); -nary_scalar_expr!(Concat, concat_expr); +nary_scalar_expr!( + ConcatWithSeparator, + concat_ws_expr, + "concatenates several strings, placing a seperator between each one" +); +nary_scalar_expr!(Concat, concat_expr, "concatenates several strings"); // date functions -scalar_expr!(DatePart, date_part, part, date); -scalar_expr!(DateTrunc, date_trunc, part, date); -scalar_expr!(DateBin, date_bin, stride, source, origin); -scalar_expr!(ToTimestampMillis, to_timestamp_millis, date); -scalar_expr!(ToTimestampMicros, to_timestamp_micros, date); -scalar_expr!(ToTimestampSeconds, to_timestamp_seconds, date); -scalar_expr!(FromUnixtime, from_unixtime, unixtime); - -unary_scalar_expr!(ArrowTypeof, arrow_typeof, "data type"); - -/// Returns an array of fixed size with each argument on it. -pub fn array(args: Vec) -> Expr { - Expr::ScalarFunction { - fun: built_in_function::BuiltinScalarFunction::MakeArray, - args, - } -} - -/// Returns `coalesce(args...)`, which evaluates to the value of the first [Expr] -/// which is not NULL -pub fn coalesce(args: Vec) -> Expr { - Expr::ScalarFunction { - fun: BuiltinScalarFunction::Coalesce, - args, - } -} - -/// Returns current timestamp in nanoseconds, using the same value for all instances of now() in -/// same statement. -pub fn now() -> Expr { - Expr::ScalarFunction { - fun: BuiltinScalarFunction::Now, - args: vec![], - } -} - -/// Returns current UTC date as a [`DataType::Date32`] value -pub fn current_date() -> Expr { - Expr::ScalarFunction { - fun: BuiltinScalarFunction::CurrentDate, - args: vec![], - } -} - -/// Returns uuid v4 as a string value -pub fn uuid() -> Expr { - Expr::ScalarFunction { - fun: BuiltinScalarFunction::Uuid, - args: vec![], - } -} +scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date"); +scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision"); +scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval"); +scalar_expr!( + ToTimestampMillis, + to_timestamp_millis, + date, + "converts a string to a `Timestamp(Milliseconds, None)`" +); +scalar_expr!( + ToTimestampMicros, + to_timestamp_micros, + date, + "converts a string to a `Timestamp(Microseconds, None)`" +); +scalar_expr!( + ToTimestampSeconds, + to_timestamp_seconds, + date, + "converts a string to a `Timestamp(Seconds, None)`" +); +scalar_expr!( + FromUnixtime, + from_unixtime, + unixtime, + "returns the unix time in format" +); +scalar_expr!(CurrentDate, current_date, ,"returns current UTC date as a [`DataType::Date32`] value"); +scalar_expr!(Now, now, ,"returns current timestamp in nanoseconds, using the same value for all instances of now() in same statement"); +scalar_expr!(CurrentTime, current_time, , "returns current UTC time as a [`DataType::Time64`] value"); -/// Returns current UTC time as a [`DataType::Time64`] value -pub fn current_time() -> Expr { - Expr::ScalarFunction { - fun: BuiltinScalarFunction::CurrentTime, - args: vec![], - } -} +scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type"); /// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression. pub fn case(expr: Expr) -> CaseBuilder { @@ -635,7 +667,6 @@ mod test { test_nary_scalar_expr!(Btrim, btrim, string); test_nary_scalar_expr!(Btrim, btrim, string, characters); test_scalar_expr!(CharacterLength, character_length, string); - test_scalar_expr!(CharacterLength, length, string); test_scalar_expr!(Chr, chr, string); test_scalar_expr!(Digest, digest, string, algorithm); test_scalar_expr!(InitCap, initcap, string);