From 608ce37f76627b7692fe9d5ca73df8efe88d75b4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Oct 2023 09:23:57 -0400 Subject: [PATCH 1/6] Support Interval analysis for OR expressions --- .../physical-expr/src/expressions/binary.rs | 4 +- .../physical-expr/src/intervals/cp_solver.rs | 9 ++- .../src/intervals/interval_aritmetic.rs | 60 +++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 63fa98011fdd..6e67189bdabd 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -344,8 +344,8 @@ impl PhysicalExpr for BinaryExpr { let right_interval = children[1]; let (left, right) = if self.op.is_logic_operator() { - // TODO: Currently, this implementation only supports the AND operator - // and does not require any further propagation. In the future, + // TODO: Currently, this implementation only supports the AND/OR operator + // and does not require any further propagation (??). In the future, // upon adding support for additional logical operators, this // method will require modification to support propagating the // changes accordingly. diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index e7515341c52c..61fcb6085d3d 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -498,15 +498,22 @@ impl ExprIntervalGraph { pub fn evaluate_bounds(&mut self) -> Result<&Interval> { let mut dfs = DfsPostOrder::new(&self.graph, self.root); while let Some(node) = dfs.next(&self.graph) { + println!("Considering node: {}", self.graph[node].expr); let neighbors = self.graph.neighbors_directed(node, Outgoing); let mut children_intervals = neighbors - .map(|child| self.graph[child].interval()) + .map(|child| { + println!(" child: {}", self.graph[child].expr); + self.graph[child].interval() + }) .collect::>(); + println!(" children_intervals: {:?}", children_intervals); // If the current expression is a leaf, its interval should already // be set externally, just continue with the evaluation procedure: if !children_intervals.is_empty() { // Reverse to align with [PhysicalExpr]'s children: children_intervals.reverse(); + println!(" evaluating bounds with: {:?}", children_intervals); + self.graph[node].interval = self.graph[node].expr.evaluate_bounds(&children_intervals)?; } diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 1ea9b2d9aee6..42a481151653 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -432,6 +432,33 @@ impl Interval { } } + /// Compute the logical disjunction of this (boolean) interval with the given boolean interval. + pub(crate) fn or>(&self, other: T) -> Result { + let rhs = other.borrow(); + match ( + &self.lower.value, + &self.upper.value, + &rhs.lower.value, + &rhs.upper.value, + ) { + ( + ScalarValue::Boolean(Some(self_lower)), + ScalarValue::Boolean(Some(self_upper)), + ScalarValue::Boolean(Some(other_lower)), + ScalarValue::Boolean(Some(other_upper)), + ) => { + let lower = *self_lower || *other_lower; + let upper = *self_upper || *other_upper; + + Ok(Interval { + lower: IntervalBound::new(ScalarValue::Boolean(Some(lower)), false), + upper: IntervalBound::new(ScalarValue::Boolean(Some(upper)), false), + }) + } + _ => internal_err!("Incompatible types for logical disjunction"), + } + } + /// Compute the logical negation of this (boolean) interval. pub(crate) fn not(&self) -> Result { if !matches!(self.get_datatype()?, DataType::Boolean) { @@ -738,6 +765,7 @@ pub fn apply_operator(op: &Operator, lhs: &Interval, rhs: &Interval) -> Result Ok(lhs.lt(rhs)), Operator::LtEq => Ok(lhs.lt_eq(rhs)), Operator::And => lhs.and(rhs), + Operator::Or => lhs.or(rhs), Operator::Plus => lhs.add(rhs), Operator::Minus => lhs.sub(rhs), _ => Ok(Interval::default()), @@ -1053,9 +1081,12 @@ mod tests { fn open_open(lower: Option, upper: Option) -> Interval where + T: std::fmt::Debug, ScalarValue: From>, { + println!("Creating open_open from {:?} to {:?}", lower, upper); Interval::make(lower, upper, (true, true)) + } fn open_closed(lower: Option, upper: Option) -> Interval @@ -1221,6 +1252,8 @@ mod tests { #[test] fn and_test() -> Result<()> { + // check that (lhs AND rhs) is equal to result + // (lhs_lower, lhs_upper, rhs_lower, rhs_upper, lower_result, upper_result) let cases = vec![ (false, true, false, false, false, false), (false, false, false, true, false, false), @@ -1240,6 +1273,33 @@ mod tests { Ok(()) } + #[test] + fn or_test() -> Result<()> { + // check that (lhs OR rhs) is equal to result + // (lhs_lower, lhs_upper, rhs_lower, rhs_upper, lower_result, upper_result) + let cases = vec![ + (false, true, false, false, true, false), + (false, false, false, true, false, false), + (false, true, false, true, false, true), + (false, true, true, true, false, true), + (false, false, false, false, false, false), + (true, true, true, true, true, true), + ]; + + for case in cases { + println!("case: {:?}", case); + let lhs = open_open(Some(case.0), Some(case.1)); + let rhs = open_open(Some(case.2), Some(case.3)); + let expected = open_open(Some(case.4), Some(case.5)); + println!("lhs: {:?}, rhs: {:?}, expected: {:?}", lhs, rhs, expected); + assert_eq!( + lhs.or(rhs)?, + expected, + ); + } + Ok(()) + } + #[test] fn add_test() -> Result<()> { let cases = vec![ From 4591d82ccba6f52db535a377486362a35bfd84fb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Oct 2023 09:37:41 -0400 Subject: [PATCH 2/6] cleanup --- datafusion/physical-expr/src/intervals/cp_solver.rs | 9 +-------- .../physical-expr/src/intervals/interval_aritmetic.rs | 10 +++------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index 61fcb6085d3d..e7515341c52c 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -498,22 +498,15 @@ impl ExprIntervalGraph { pub fn evaluate_bounds(&mut self) -> Result<&Interval> { let mut dfs = DfsPostOrder::new(&self.graph, self.root); while let Some(node) = dfs.next(&self.graph) { - println!("Considering node: {}", self.graph[node].expr); let neighbors = self.graph.neighbors_directed(node, Outgoing); let mut children_intervals = neighbors - .map(|child| { - println!(" child: {}", self.graph[child].expr); - self.graph[child].interval() - }) + .map(|child| self.graph[child].interval()) .collect::>(); - println!(" children_intervals: {:?}", children_intervals); // If the current expression is a leaf, its interval should already // be set externally, just continue with the evaluation procedure: if !children_intervals.is_empty() { // Reverse to align with [PhysicalExpr]'s children: children_intervals.reverse(); - println!(" evaluating bounds with: {:?}", children_intervals); - self.graph[node].interval = self.graph[node].expr.evaluate_bounds(&children_intervals)?; } diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 42a481151653..0396b29a919c 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -1081,12 +1081,9 @@ mod tests { fn open_open(lower: Option, upper: Option) -> Interval where - T: std::fmt::Debug, ScalarValue: From>, { - println!("Creating open_open from {:?} to {:?}", lower, upper); Interval::make(lower, upper, (true, true)) - } fn open_closed(lower: Option, upper: Option) -> Interval @@ -1286,16 +1283,15 @@ mod tests { (true, true, true, true, true, true), ]; + // Something is not right here -- lhs, rhs and expected are always the same Interval + // for case in cases { println!("case: {:?}", case); let lhs = open_open(Some(case.0), Some(case.1)); let rhs = open_open(Some(case.2), Some(case.3)); let expected = open_open(Some(case.4), Some(case.5)); println!("lhs: {:?}, rhs: {:?}, expected: {:?}", lhs, rhs, expected); - assert_eq!( - lhs.or(rhs)?, - expected, - ); + assert_eq!(lhs.or(rhs)?, expected,); } Ok(()) } From b7e66a3a22b119b1e9085a103d260f78d59082e9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Oct 2023 09:43:20 -0400 Subject: [PATCH 3/6] update comment --- datafusion/physical-expr/src/intervals/interval_aritmetic.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 0396b29a919c..b0d14d63389c 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -1283,8 +1283,9 @@ mod tests { (true, true, true, true, true, true), ]; - // Something is not right here -- lhs, rhs and expected are always the same Interval - // + // Something is not right here -- lhs, rhs and expected are always the + // same Interval (so the test is useless as it always passes) + // Interval { lower: IntervalBound { value: Boolean(true), open: false }, upper: IntervalBound { value: Boolean(false), open: false } } for case in cases { println!("case: {:?}", case); let lhs = open_open(Some(case.0), Some(case.1)); From 765b6682751f85022764f0b5f78b4e08ee4bc4e6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Oct 2023 11:52:54 -0400 Subject: [PATCH 4/6] Fix up --- .../src/intervals/interval_aritmetic.rs | 76 +++++++++++-------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index b0d14d63389c..56815a12e31b 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -435,27 +435,21 @@ impl Interval { /// Compute the logical disjunction of this (boolean) interval with the given boolean interval. pub(crate) fn or>(&self, other: T) -> Result { let rhs = other.borrow(); - match ( - &self.lower.value, - &self.upper.value, - &rhs.lower.value, - &rhs.upper.value, - ) { - ( - ScalarValue::Boolean(Some(self_lower)), - ScalarValue::Boolean(Some(self_upper)), - ScalarValue::Boolean(Some(other_lower)), - ScalarValue::Boolean(Some(other_upper)), - ) => { - let lower = *self_lower || *other_lower; - let upper = *self_upper || *other_upper; + if self.get_datatype()? != DataType::Boolean + && rhs.get_datatype()? != DataType::Boolean + { + return internal_err!( + "Incompatible types for logical disjunction: {self:?}, {rhs:?}" + ); + } - Ok(Interval { - lower: IntervalBound::new(ScalarValue::Boolean(Some(lower)), false), - upper: IntervalBound::new(ScalarValue::Boolean(Some(upper)), false), - }) - } - _ => internal_err!("Incompatible types for logical disjunction"), + // if either bound is always true, the result is always true. + if self.is_certainly_true() || rhs.is_certainly_true() { + Ok(Interval::CERTAINLY_TRUE) + } else if self.is_certainly_false() && rhs.is_certainly_false() { + Ok(Interval::CERTAINLY_FALSE) + } else { + Ok(Interval::UNCERTAIN) } } @@ -546,6 +540,20 @@ impl Interval { )) } + /// return true if this is a Boolean interval that is known to be `true` + pub fn is_certainly_true(&self) -> bool { + self == &Self::CERTAINLY_TRUE + } + + /// return true if this is a Boolean interval that may be either `true` or `false` + pub fn is_uncertain(&self) -> bool { + self == &Self::UNCERTAIN + } + + /// return true if this is a Boolean interval that is known to be `false` + pub fn is_certainly_false(&self) -> bool { + self == &Self::CERTAINLY_FALSE + } pub const CERTAINLY_FALSE: Interval = Interval { lower: IntervalBound::new_closed(ScalarValue::Boolean(Some(false))), upper: IntervalBound::new_closed(ScalarValue::Boolean(Some(false))), @@ -1273,24 +1281,26 @@ mod tests { #[test] fn or_test() -> Result<()> { // check that (lhs OR rhs) is equal to result - // (lhs_lower, lhs_upper, rhs_lower, rhs_upper, lower_result, upper_result) + // ((lhs_lower, lhs_upper), (rhs_lower, rhs_upper), (lower_result, upper_result)) let cases = vec![ - (false, true, false, false, true, false), - (false, false, false, true, false, false), - (false, true, false, true, false, true), - (false, true, true, true, false, true), - (false, false, false, false, false, false), - (true, true, true, true, true, true), + // Note there are only three valid boolean intervals: [false, false], [false, true] and [true, true], + ((false, false), (false, false), (false, false)), + ((false, false), (false, true), (false, true)), + ((false, false), (true, true), (true, true)), + ((false, true), (false, false), (false, true)), + ((false, true), (false, true), (false, true)), + ((false, true), (true, true), (true, true)), + ((true, true), (false, false), (true, true)), + ((true, true), (false, true), (true, true)), + ((true, true), (true, true), (true, true)), ]; - // Something is not right here -- lhs, rhs and expected are always the - // same Interval (so the test is useless as it always passes) - // Interval { lower: IntervalBound { value: Boolean(true), open: false }, upper: IntervalBound { value: Boolean(false), open: false } } for case in cases { + let (lhs, rhs, expected) = case; println!("case: {:?}", case); - let lhs = open_open(Some(case.0), Some(case.1)); - let rhs = open_open(Some(case.2), Some(case.3)); - let expected = open_open(Some(case.4), Some(case.5)); + let lhs = closed_closed(Some(lhs.0), Some(lhs.1)); + let rhs = closed_closed(Some(rhs.0), Some(rhs.1)); + let expected = closed_closed(Some(expected.0), Some(expected.1)); println!("lhs: {:?}, rhs: {:?}, expected: {:?}", lhs, rhs, expected); assert_eq!(lhs.or(rhs)?, expected,); } From d846ac3b950614670db38bfc4ddeea5d3fe7c27b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Oct 2023 12:58:16 -0400 Subject: [PATCH 5/6] Update datafusion/physical-expr/src/intervals/interval_aritmetic.rs Co-authored-by: Alex Huang --- datafusion/physical-expr/src/intervals/interval_aritmetic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs index 56815a12e31b..5e6ff0fc04d7 100644 --- a/datafusion/physical-expr/src/intervals/interval_aritmetic.rs +++ b/datafusion/physical-expr/src/intervals/interval_aritmetic.rs @@ -436,7 +436,7 @@ impl Interval { pub(crate) fn or>(&self, other: T) -> Result { let rhs = other.borrow(); if self.get_datatype()? != DataType::Boolean - && rhs.get_datatype()? != DataType::Boolean + || rhs.get_datatype()? != DataType::Boolean { return internal_err!( "Incompatible types for logical disjunction: {self:?}, {rhs:?}" From 072588a17c3afb344093c6004470adc46119bb7b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 21 Oct 2023 16:14:45 -0400 Subject: [PATCH 6/6] Update datafusion/physical-expr/src/expressions/binary.rs --- datafusion/physical-expr/src/expressions/binary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 6e67189bdabd..977ac74ab043 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -345,7 +345,7 @@ impl PhysicalExpr for BinaryExpr { let (left, right) = if self.op.is_logic_operator() { // TODO: Currently, this implementation only supports the AND/OR operator - // and does not require any further propagation (??). In the future, + // and does not require any further propagation. In the future, // upon adding support for additional logical operators, this // method will require modification to support propagating the // changes accordingly.