Skip to content

Commit

Permalink
remove Operator::{Like,NotLike,ILike,NotILike} (#4792)
Browse files Browse the repository at this point in the history
* remove Operator::{Like,NotLike,ILike,NotILike}

* fix comments
  • Loading branch information
unconsolable authored Jan 5, 2023
1 parent 24023b5 commit 702e117
Show file tree
Hide file tree
Showing 18 changed files with 684 additions and 187 deletions.
8 changes: 0 additions & 8 deletions datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2019,14 +2019,6 @@ mod tests {
col("c1").eq(bool_expr.clone()),
// u32 AND bool
col("c2").and(bool_expr),
// utf8 LIKE u32
col("c1").like(col("c2")),
// utf8 NOT LIKE u32
col("c1").not_like(col("c2")),
// utf8 ILIKE u32
col("c1").ilike(col("c2")),
// utf8 NOT ILIKE u32
col("c1").not_ilike(col("c2")),
];
for case in cases {
let logical_plan = test_csv_scan().await?.project(vec![case.clone()]);
Expand Down
11 changes: 4 additions & 7 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,6 @@ impl BinaryExpr {
match self.op {
Operator::Or => 5,
Operator::And => 10,
Operator::Like | Operator::NotLike | Operator::ILike | Operator::NotILike => {
19
}
Operator::NotEq
| Operator::Eq
| Operator::Lt
Expand Down Expand Up @@ -685,22 +682,22 @@ impl Expr {

/// Return `self LIKE other`
pub fn like(self, other: Expr) -> Expr {
binary_expr(self, Operator::Like, other)
Expr::Like(Like::new(false, Box::new(self), Box::new(other), None))
}

/// Return `self NOT LIKE other`
pub fn not_like(self, other: Expr) -> Expr {
binary_expr(self, Operator::NotLike, other)
Expr::Like(Like::new(true, Box::new(self), Box::new(other), None))
}

/// Return `self ILIKE other`
pub fn ilike(self, other: Expr) -> Expr {
binary_expr(self, Operator::ILike, other)
Expr::ILike(Like::new(false, Box::new(self), Box::new(other), None))
}

/// Return `self NOT ILIKE other`
pub fn not_ilike(self, other: Expr) -> Expr {
binary_expr(self, Operator::NotILike, other)
Expr::ILike(Like::new(true, Box::new(self), Box::new(other), None))
}

/// Return `self AS name` alias expression
Expand Down
22 changes: 1 addition & 21 deletions datafusion/expr/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ pub enum Operator {
And,
/// Logical OR, like `||`
Or,
/// Matches a wildcard pattern
Like,
/// Does not match a wildcard pattern
NotLike,
/// Matches a wildcard pattern, ignores case
ILike,
/// Does not match a wildcard pattern, ignores case
NotILike,
/// IS DISTINCT FROM
IsDistinctFrom,
/// IS NOT DISTINCT FROM
Expand Down Expand Up @@ -96,10 +88,6 @@ impl Operator {
Operator::LtEq => Some(Operator::Gt),
Operator::Gt => Some(Operator::LtEq),
Operator::GtEq => Some(Operator::Lt),
Operator::Like => Some(Operator::NotLike),
Operator::NotLike => Some(Operator::Like),
Operator::ILike => Some(Operator::NotILike),
Operator::NotILike => Some(Operator::ILike),
Operator::IsDistinctFrom => Some(Operator::IsNotDistinctFrom),
Operator::IsNotDistinctFrom => Some(Operator::IsDistinctFrom),
Operator::Plus
Expand Down Expand Up @@ -133,11 +121,7 @@ impl Operator {
Operator::LtEq => Some(Operator::GtEq),
Operator::Gt => Some(Operator::Lt),
Operator::GtEq => Some(Operator::LtEq),
Operator::Like
| Operator::NotLike
| Operator::ILike
| Operator::NotILike
| Operator::IsDistinctFrom
Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom
| Operator::Plus
| Operator::Minus
Expand Down Expand Up @@ -176,10 +160,6 @@ impl fmt::Display for Operator {
Operator::Modulo => "%",
Operator::And => "AND",
Operator::Or => "OR",
Operator::Like => "LIKE",
Operator::NotLike => "NOT LIKE",
Operator::ILike => "ILIKE",
Operator::NotILike => "NOT ILIKE",
Operator::RegexMatch => "~",
Operator::RegexIMatch => "~*",
Operator::RegexNotMatch => "!~",
Expand Down
38 changes: 5 additions & 33 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ pub fn binary_operator_data_type(
| Operator::NotEq
| Operator::And
| Operator::Or
| Operator::Like
| Operator::NotLike
| Operator::ILike
| Operator::NotILike
| Operator::Lt
| Operator::Gt
| Operator::GtEq
Expand Down Expand Up @@ -117,10 +113,6 @@ pub fn coerce_types(
| Operator::Gt
| Operator::GtEq
| Operator::LtEq => comparison_coercion(lhs_type, rhs_type),
// "like" operators operate on strings and always return a boolean
Operator::Like | Operator::NotLike | Operator::ILike | Operator::NotILike => {
like_coercion(lhs_type, rhs_type)
}
Operator::Plus | Operator::Minus
if is_date(lhs_type) || is_timestamp(lhs_type) =>
{
Expand Down Expand Up @@ -525,7 +517,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>

/// coercion rules for like operations.
/// This is a union of string coercion rules and dictionary coercion rules
fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
string_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
.or_else(|| null_coercion(lhs_type, rhs_type))
Expand Down Expand Up @@ -879,30 +871,10 @@ mod tests {

#[test]
fn test_type_coercion() -> Result<()> {
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::Like,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::NotLike,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::ILike,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::NotILike,
DataType::Utf8
);
// test like coercion rule
let result = like_coercion(&DataType::Utf8, &DataType::Utf8);
assert_eq!(result, Some(DataType::Utf8));

test_coercion_binary_rule!(
DataType::Utf8,
DataType::Date32,
Expand Down
16 changes: 15 additions & 1 deletion datafusion/optimizer/src/simplify_expressions/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_expr::{
expr::{Between, BinaryExpr},
expr_fn::{and, concat_ws, or},
lit, BuiltinScalarFunction, Expr, Operator,
lit, BuiltinScalarFunction, Expr, Like, Operator,
};

pub static POWS_OF_TEN: [i128; 38] = [
Expand Down Expand Up @@ -232,6 +232,20 @@ pub fn negate_clause(expr: Expr) -> Expr {
between.low,
between.high,
)),
// not (A like B) ===> A not like B
Expr::Like(like) => Expr::Like(Like::new(
!like.negated,
like.expr,
like.pattern,
like.escape_char,
)),
// not (A ilike B) ===> A not ilike B
Expr::ILike(like) => Expr::ILike(Like::new(
!like.negated,
like.expr,
like.pattern,
like.escape_char,
)),
// use not clause
_ => Expr::Not(Box::new(expr)),
}
Expand Down
22 changes: 16 additions & 6 deletions datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ use datafusion_common::{
use datafusion_expr::expr::{self, Between, BinaryExpr, Case, Like, WindowFunction};
use datafusion_expr::expr_rewriter::{ExprRewriter, RewriteRecursion};
use datafusion_expr::logical_plan::Subquery;
use datafusion_expr::type_coercion::binary::{coerce_types, comparison_coercion};
use datafusion_expr::type_coercion::binary::{
coerce_types, comparison_coercion, like_coercion,
};
use datafusion_expr::type_coercion::functions::data_types;
use datafusion_expr::type_coercion::other::{
get_coerce_type_for_case_when, get_coerce_type_for_list,
Expand Down Expand Up @@ -175,8 +177,11 @@ impl ExprRewriter for TypeCoercionRewriter {
}) => {
let left_type = expr.get_type(&self.schema)?;
let right_type = pattern.get_type(&self.schema)?;
let coerced_type =
coerce_types(&left_type, &Operator::Like, &right_type)?;
let coerced_type = like_coercion(&left_type, &right_type).ok_or_else(|| {
DataFusionError::Plan(format!(
"There isn't a common type to coerce {left_type} and {right_type} in LIKE expression"
))
})?;
let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?);
let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?);
let expr = Expr::Like(Like::new(negated, expr, pattern, escape_char));
Expand All @@ -190,8 +195,11 @@ impl ExprRewriter for TypeCoercionRewriter {
}) => {
let left_type = expr.get_type(&self.schema)?;
let right_type = pattern.get_type(&self.schema)?;
let coerced_type =
coerce_types(&left_type, &Operator::ILike, &right_type)?;
let coerced_type = like_coercion(&left_type, &right_type).ok_or_else(|| {
DataFusionError::Plan(format!(
"There isn't a common type to coerce {left_type} and {right_type} in ILIKE expression"
))
})?;
let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?);
let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?);
let expr = Expr::ILike(Like::new(negated, expr, pattern, escape_char));
Expand Down Expand Up @@ -932,7 +940,9 @@ mod test {
let plan = LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty)?);
let err = assert_optimized_plan_eq(&plan, expected);
assert!(err.is_err());
assert!(err.unwrap_err().to_string().contains("'Int64 LIKE Utf8' can't be evaluated because there isn't a common type to coerce the types to"));
assert!(err.unwrap_err().to_string().contains(
"There isn't a common type to coerce Int64 and Utf8 in LIKE expression"
));
Ok(())
}

Expand Down
86 changes: 2 additions & 84 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ use arrow::compute::kernels::arithmetic::{
multiply_scalar, subtract, subtract_scalar,
};
use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
use arrow::compute::kernels::comparison::regexp_is_match_utf8;
use arrow::compute::kernels::comparison::regexp_is_match_utf8_scalar;
use arrow::compute::kernels::comparison::{
eq_dyn_binary_scalar, gt_dyn_binary_scalar, gt_eq_dyn_binary_scalar,
lt_dyn_binary_scalar, lt_eq_dyn_binary_scalar, neq_dyn_binary_scalar,
Expand All @@ -47,13 +49,6 @@ use arrow::compute::kernels::comparison::{
use arrow::compute::kernels::comparison::{
eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
};
use arrow::compute::kernels::comparison::{
ilike_utf8, like_utf8, nilike_utf8, nlike_utf8, regexp_is_match_utf8,
};
use arrow::compute::kernels::comparison::{
ilike_utf8_scalar, like_utf8_scalar, nilike_utf8_scalar, nlike_utf8_scalar,
regexp_is_match_utf8_scalar,
};

use adapter::{eq_dyn, gt_dyn, gt_eq_dyn, lt_dyn, lt_eq_dyn, neq_dyn};
use arrow::compute::kernels::concat_elements::concat_elements_utf8;
Expand Down Expand Up @@ -344,19 +339,6 @@ macro_rules! compute_op {
}};
}

macro_rules! binary_string_array_op_scalar {
($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{
let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray, $OP_TYPE),
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for scalar operation '{}' on string array",
other, stringify!($OP)
))),
};
Some(result)
}};
}

macro_rules! binary_string_array_op {
($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
match $LEFT.data_type() {
Expand Down Expand Up @@ -941,18 +923,6 @@ impl BinaryExpr {
Operator::NotEq => {
binary_array_op_dyn_scalar!(array, scalar.clone(), neq, bool_type)
}
Operator::Like => {
binary_string_array_op_scalar!(array, scalar.clone(), like, bool_type)
}
Operator::NotLike => {
binary_string_array_op_scalar!(array, scalar.clone(), nlike, bool_type)
}
Operator::ILike => {
binary_string_array_op_scalar!(array, scalar.clone(), ilike, bool_type)
}
Operator::NotILike => {
binary_string_array_op_scalar!(array, scalar.clone(), nilike, bool_type)
}
Operator::Plus => {
binary_primitive_array_op_scalar!(array, scalar.clone(), add)
}
Expand Down Expand Up @@ -1053,10 +1023,6 @@ impl BinaryExpr {
right_data_type: &DataType,
) -> Result<ArrayRef> {
match &self.op {
Operator::Like => binary_string_array_op!(left, right, like),
Operator::NotLike => binary_string_array_op!(left, right, nlike),
Operator::ILike => binary_string_array_op!(left, right, ilike),
Operator::NotILike => binary_string_array_op!(left, right, nilike),
Operator::Lt => lt_dyn(&left, &right),
Operator::LtEq => lt_eq_dyn(&left, &right),
Operator::Gt => gt_dyn(&left, &right),
Expand Down Expand Up @@ -1347,54 +1313,6 @@ mod tests {
DataType::Float32,
vec![2f32],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hello world", "world"],
StringArray,
DataType::Utf8,
vec!["%hello%", "%hello%"],
Operator::Like,
BooleanArray,
DataType::Boolean,
vec![true, false],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hello world", "world"],
StringArray,
DataType::Utf8,
vec!["%hello%", "%hello%"],
Operator::NotLike,
BooleanArray,
DataType::Boolean,
vec![false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hEllo world", "world"],
StringArray,
DataType::Utf8,
vec!["%helLo%", "%helLo%"],
Operator::ILike,
BooleanArray,
DataType::Boolean,
vec![true, false],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["hEllo world", "world"],
StringArray,
DataType::Utf8,
vec!["%helLo%", "%helLo%"],
Operator::NotILike,
BooleanArray,
DataType::Boolean,
vec![false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
Expand Down
Loading

0 comments on commit 702e117

Please sign in to comment.