Skip to content

Commit

Permalink
Allow simplification even when nullable (#12746)
Browse files Browse the repository at this point in the history
The nullable requirement seem to have been added in #1401 but as far as
I can tell they are not needed for these 2 cases.

I think this can be shown using this truth table: (generated using
datafusion-cli without this patch)
```
> CREATE TABLE t (v BOOLEAN) as values (true), (false), (NULL);
> select t.v, t2.v, t.v AND (t.v OR t2.v), t.v OR (t.v AND t2.v) from t cross join t as t2;
+-------+-------+---------------------+---------------------+
| v     | v     | t.v AND t.v OR t2.v | t.v OR t.v AND t2.v |
+-------+-------+---------------------+---------------------+
| true  | true  | true                | true                |
| true  | false | true                | true                |
| true  |       | true                | true                |
| false | true  | false               | false               |
| false | false | false               | false               |
| false |       | false               | false               |
|       | true  |                     |                     |
|       | false |                     |                     |
|       |       |                     |                     |
+-------+-------+---------------------+---------------------+
```

And it seems Spark applies both of these and DuckDB applies only the
first one.
  • Loading branch information
eejbyfeldt authored Oct 6, 2024
1 parent 18f9201 commit 9bf0630
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 35 deletions.
40 changes: 13 additions & 27 deletions datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -838,22 +838,18 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
op: Or,
right,
}) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
// A OR (A AND B) --> A (if B not null)
// A OR (A AND B) --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if !info.nullable(&right)? && is_op_with(And, &right, &left) => {
Transformed::yes(*left)
}
// (A AND B) OR A --> A (if B not null)
}) if is_op_with(And, &right, &left) => Transformed::yes(*left),
// (A AND B) OR A --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if !info.nullable(&left)? && is_op_with(And, &left, &right) => {
Transformed::yes(*right)
}
}) if is_op_with(And, &left, &right) => Transformed::yes(*right),

//
// Rules for AND
Expand Down Expand Up @@ -911,22 +907,18 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
op: And,
right,
}) if expr_contains(&right, &left, And) => Transformed::yes(*right),
// A AND (A OR B) --> A (if B not null)
// A AND (A OR B) --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if !info.nullable(&right)? && is_op_with(Or, &right, &left) => {
Transformed::yes(*left)
}
// (A OR B) AND A --> A (if B not null)
}) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
// (A OR B) AND A --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if !info.nullable(&left)? && is_op_with(Or, &left, &right) => {
Transformed::yes(*right)
}
}) if is_op_with(Or, &left, &right) => Transformed::yes(*right),

//
// Rules for Multiply
Expand Down Expand Up @@ -2609,15 +2601,11 @@ mod tests {
// (c2 > 5) OR ((c1 < 6) AND (c2 > 5))
let expr = or(l.clone(), r.clone());

// no rewrites if c1 can be null
let expected = expr.clone();
let expected = l.clone();
assert_eq!(simplify(expr), expected);

// ((c1 < 6) AND (c2 > 5)) OR (c2 > 5)
let expr = or(l, r);

// no rewrites if c1 can be null
let expected = expr.clone();
let expr = or(r, l);
assert_eq!(simplify(expr), expected);
}

Expand Down Expand Up @@ -2648,13 +2636,11 @@ mod tests {
// (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
let expr = and(l.clone(), r.clone());

// no rewrites if c1 can be null
let expected = expr.clone();
let expected = l.clone();
assert_eq!(simplify(expr), expected);

// ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
let expr = and(l, r);
let expected = expr.clone();
let expr = and(r, l);
assert_eq!(simplify(expr), expected);
}

Expand Down Expand Up @@ -3223,7 +3209,7 @@ mod tests {
)],
Some(Box::new(col("c2").eq(lit(true)))),
)))),
col("c2").or(col("c2").not().and(col("c2"))) // #1716
col("c2")
);

// CASE WHEN ISNULL(c2) THEN true ELSE c2
Expand Down
16 changes: 8 additions & 8 deletions datafusion/sqllogictest/test_files/cse.slt
Original file line number Diff line number Diff line change
Expand Up @@ -179,20 +179,20 @@ physical_plan
# Surely only once but also conditionally evaluated expressions
query TT
EXPLAIN SELECT
(a = 1 OR random() = 0) AND a = 1 AS c1,
(a = 2 AND random() = 0) OR a = 2 AS c2,
(a = 1 OR random() = 0) AND a = 2 AS c1,
(a = 2 AND random() = 0) OR a = 1 AS c2,
CASE WHEN a + 3 = 0 THEN a + 3 ELSE 0 END AS c3,
CASE WHEN a + 4 = 0 THEN 0 WHEN a + 4 THEN 0 ELSE 0 END AS c4,
CASE WHEN a + 5 = 0 THEN 0 WHEN random() = 0 THEN a + 5 ELSE 0 END AS c5,
CASE WHEN a + 6 = 0 THEN 0 ELSE a + 6 END AS c6
FROM t1
----
logical_plan
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_1 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_2 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_1 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS __common_expr_6
03)----TableScan: t1 projection=[a]
physical_plan
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_1@0 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_2@1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_2@1 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_1@0 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 5 as __common_expr_5, a@0 + 6 as __common_expr_6]
03)----MemoryExec: partitions=1, partition_sizes=[0]

Expand All @@ -217,17 +217,17 @@ physical_plan
# Only conditionally evaluated expressions
query TT
EXPLAIN SELECT
(random() = 0 OR a = 1) AND a = 1 AS c1,
(random() = 0 AND a = 2) OR a = 2 AS c2,
(random() = 0 OR a = 1) AND a = 2 AS c1,
(random() = 0 AND a = 2) OR a = 1 AS c2,
CASE WHEN random() = 0 THEN a + 3 ELSE a + 3 END AS c3,
CASE WHEN random() = 0 THEN 0 WHEN a + 4 = 0 THEN a + 4 ELSE 0 END AS c4,
CASE WHEN random() = 0 THEN 0 WHEN a + 5 = 0 THEN 0 ELSE a + 5 END AS c5,
CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a + 6 ELSE a + 6 END AS c6
FROM t1
----
logical_plan
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(1) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(2) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(2) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(1) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
02)--TableScan: t1 projection=[a]
physical_plan
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 1 as c1, random() = 0 AND a@0 = 2 OR a@0 = 2 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 2 as c1, random() = 0 AND a@0 = 2 OR a@0 = 1 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
02)--MemoryExec: partitions=1, partition_sizes=[0]

0 comments on commit 9bf0630

Please sign in to comment.