diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs index 007c9f60d8ce..477b1f473b76 100644 --- a/arrow-string/src/like.rs +++ b/arrow-string/src/like.rs @@ -588,41 +588,26 @@ fn ilike_scalar_op<'a, F: Fn(bool) -> bool, L: ArrayAccessor>( right: &str, op: F, ) -> Result { - if !right.contains(is_like_pattern) { - // fast path, can use equals - if right.is_ascii() { - // This avoids additional allocations - Ok(BooleanArray::from_unary(left, |item| { + // If not ASCII faster to use case insensitive regex than allocating using to_uppercase + if right.is_ascii() { + if !right.contains(is_like_pattern) { + return Ok(BooleanArray::from_unary(left, |item| { op(item.eq_ignore_ascii_case(right)) - })) - } else { - let right_uppercase = right.to_uppercase(); - Ok(BooleanArray::from_unary(left, |item| { - op(item.to_uppercase() == right_uppercase) - })) - } - } else if right.ends_with('%') - && !right.ends_with("\\%") - && !right[..right.len() - 1].contains(is_like_pattern) - { - // fast path, can use starts_with - if right.is_ascii() { + })); + } else if right.ends_with('%') + && !right.ends_with("\\%") + && !right[..right.len() - 1].contains(is_like_pattern) + { + // fast path, can use starts_with let start_str = &right[..right.len() - 1]; - Ok(BooleanArray::from_unary(left, |item| { + return Ok(BooleanArray::from_unary(left, |item| { let end = item.len().min(start_str.len()); let result = item.is_char_boundary(end) && start_str.eq_ignore_ascii_case(&item[..end]); op(result) - })) - } else { - let start_str = &right[..right.len() - 1].to_uppercase(); - Ok(BooleanArray::from_unary(left, |item| { - op(item.to_uppercase().starts_with(start_str)) - })) - } - } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) { - // fast path, can use ends_with - if right.is_ascii() { + })); + } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) { + // fast path, can use ends_with let ends_str = &right[1..]; Ok(BooleanArray::from_unary(left, |item| { let start = item.len().saturating_sub(ends_str.len()); @@ -630,33 +615,18 @@ fn ilike_scalar_op<'a, F: Fn(bool) -> bool, L: ArrayAccessor>( && ends_str.eq_ignore_ascii_case(&item[start..]); op(result) })) - } else { - let ends_str = &right[1..].to_uppercase(); - Ok(BooleanArray::from_unary(left, |item| { - op(item.to_uppercase().ends_with(ends_str)) - })) } - } else if right.starts_with('%') - && right.ends_with('%') - && !right.ends_with("\\%") - && !right[1..right.len() - 1].contains(is_like_pattern) - { - // fast path, can use contains - let contains = &right[1..right.len() - 1].to_uppercase(); - Ok(BooleanArray::from_unary(left, |item| { - op(item.to_uppercase().contains(contains)) - })) - } else { - let re_pattern = replace_like_wildcards(right)?; - let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| { - ArrowError::ComputeError(format!( - "Unable to build regex from ILIKE pattern: {}", - e - )) - })?; - - Ok(BooleanArray::from_unary(left, |item| op(re.is_match(item)))) } + + let re_pattern = replace_like_wildcards(right)?; + let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| { + ArrowError::ComputeError(format!( + "Unable to build regex from ILIKE pattern: {}", + e + )) + })?; + + Ok(BooleanArray::from_unary(left, |item| op(re.is_match(item)))) } #[inline]