From f2de2c4cc2009d9b6965f7951fd543e1974fcd2c Mon Sep 17 00:00:00 2001 From: Jiashen Cao Date: Sun, 8 Dec 2024 02:48:57 -0500 Subject: [PATCH] Refactor regexplike signature (#13394) * update * update * update * clean up errors * fix flags types * fix failed example --- datafusion-examples/examples/regexp.rs | 2 +- datafusion/functions/src/regex/regexplike.rs | 50 +++++++++++-------- .../test_files/string/string_view.slt | 2 +- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/datafusion-examples/examples/regexp.rs b/datafusion-examples/examples/regexp.rs index 02e74bae22af..5419efd2faea 100644 --- a/datafusion-examples/examples/regexp.rs +++ b/datafusion-examples/examples/regexp.rs @@ -148,7 +148,7 @@ async fn main() -> Result<()> { // invalid flags will result in an error let result = ctx - .sql(r"select regexp_like('\b4(?!000)\d\d\d\b', 4010, 'g')") + .sql(r"select regexp_like('\b4(?!000)\d\d\d\b', '4010', 'g')") .await? .collect() .await; diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 49e57776c7b8..1c826b12ef8f 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -81,26 +81,7 @@ impl RegexpLikeFunc { pub fn new() -> Self { Self { signature: Signature::one_of( - vec![ - TypeSignature::Exact(vec![Utf8View, Utf8]), - TypeSignature::Exact(vec![Utf8View, Utf8View]), - TypeSignature::Exact(vec![Utf8View, LargeUtf8]), - TypeSignature::Exact(vec![Utf8, Utf8]), - TypeSignature::Exact(vec![Utf8, Utf8View]), - TypeSignature::Exact(vec![Utf8, LargeUtf8]), - TypeSignature::Exact(vec![LargeUtf8, Utf8]), - TypeSignature::Exact(vec![LargeUtf8, Utf8View]), - TypeSignature::Exact(vec![LargeUtf8, LargeUtf8]), - TypeSignature::Exact(vec![Utf8View, Utf8, Utf8]), - TypeSignature::Exact(vec![Utf8View, Utf8View, Utf8]), - TypeSignature::Exact(vec![Utf8View, LargeUtf8, Utf8]), - TypeSignature::Exact(vec![Utf8, Utf8, Utf8]), - TypeSignature::Exact(vec![Utf8, Utf8View, Utf8]), - TypeSignature::Exact(vec![Utf8, LargeUtf8, Utf8]), - TypeSignature::Exact(vec![LargeUtf8, Utf8, Utf8]), - TypeSignature::Exact(vec![LargeUtf8, Utf8View, Utf8]), - TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Utf8]), - ], + vec![TypeSignature::String(2), TypeSignature::String(3)], Volatility::Immutable, ), } @@ -211,7 +192,34 @@ pub fn regexp_like(args: &[ArrayRef]) -> Result { match args.len() { 2 => handle_regexp_like(&args[0], &args[1], None), 3 => { - let flags = args[2].as_string::(); + let flags = match args[2].data_type() { + Utf8 => args[2].as_string::(), + LargeUtf8 => { + let large_string_array = args[2].as_string::(); + let string_vec: Vec> = (0..large_string_array.len()).map(|i| { + if large_string_array.is_null(i) { + None + } else { + Some(large_string_array.value(i)) + } + }) + .collect(); + + &GenericStringArray::::from(string_vec) + }, + _ => { + let string_view_array = args[2].as_string_view(); + let string_vec: Vec> = (0..string_view_array.len()).map(|i| { + if string_view_array.is_null(i) { + None + } else { + Some(string_view_array.value(i).to_string()) + } + }) + .collect(); + &GenericStringArray::::from(string_vec) + }, + }; if flags.iter().any(|s| s == Some("g")) { return plan_err!("regexp_like() does not support the \"global\" option"); diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index ebabaf7655ff..c37dd1ed3b4f 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -731,7 +731,7 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: regexp_like(test.column1_utf8view, Utf8("^https?://(?:www\.)?([^/]+)/.*$")) AS k +01)Projection: regexp_like(test.column1_utf8view, Utf8View("^https?://(?:www\.)?([^/]+)/.*$")) AS k 02)--TableScan: test projection=[column1_utf8view] ## Ensure no casts for REGEXP_MATCH