From bb6e0dabe292df216f8783766a9dfbb0dc5f14e1 Mon Sep 17 00:00:00 2001 From: Florent Poinsard <35779988+frouioui@users.noreply.github.com> Date: Tue, 28 Mar 2023 17:51:06 +0200 Subject: [PATCH] planner: fix predicate simplifier (#12650) (#12715) * planner: fix predicate simplifier * predicate rewriting: add debug logging * add debug logging to make the code easier to understand * address review feedback * Update go/vt/vtgate/planbuilder/predicate_rewrite_test.go Nicer panic message --------- Signed-off-by: Andres Taylor Signed-off-by: Andres Taylor Signed-off-by: Florent Poinsard Co-authored-by: Andres Taylor --- go/vt/sqlparser/predicate_rewriting.go | 261 ++++++++++++++------ go/vt/sqlparser/predicate_rewriting_test.go | 11 +- 2 files changed, 191 insertions(+), 81 deletions(-) diff --git a/go/vt/sqlparser/predicate_rewriting.go b/go/vt/sqlparser/predicate_rewriting.go index eb772191a13..0348f95f115 100644 --- a/go/vt/sqlparser/predicate_rewriting.go +++ b/go/vt/sqlparser/predicate_rewriting.go @@ -16,41 +16,47 @@ limitations under the License. package sqlparser -const ( - Changed RewriteState = true - NoChange RewriteState = false +import ( + "vitess.io/vitess/go/vt/log" ) -type RewriteState bool - // RewritePredicate walks the input AST and rewrites any boolean logic into a simpler form // This simpler form is CNF plus logic for extracting predicates from OR, plus logic for turning ORs into IN // Note: In order to re-plan, we need to empty the accumulated metadata in the AST, // so ColName.Metadata will be nil:ed out as part of this rewrite func RewritePredicate(ast SQLNode) SQLNode { for { - finishedRewrite := true - ast = SafeRewrite(ast, nil, func(cursor *Cursor) bool { - if e, isExpr := cursor.node.(Expr); isExpr { - rewritten, state := simplifyExpression(e) - if state == Changed { - finishedRewrite = false - cursor.Replace(rewritten) - } + printExpr(ast) + exprChanged := false + stopOnChange := func(SQLNode, SQLNode) bool { + return !exprChanged + } + ast = SafeRewrite(ast, stopOnChange, func(cursor *Cursor) bool { + e, isExpr := cursor.node.(Expr) + if !isExpr { + return true } + + rewritten, state := simplifyExpression(e) + if ch, isChange := state.(changed); isChange { + printRule(ch.rule, ch.exprMatched) + exprChanged = true + cursor.Replace(rewritten) + } + if col, isCol := cursor.node.(*ColName); isCol { col.Metadata = nil } - return true + return !exprChanged }) - if finishedRewrite { + if !exprChanged { return ast } } } -func simplifyExpression(expr Expr) (Expr, RewriteState) { +func simplifyExpression(expr Expr) (Expr, rewriteState) { switch expr := expr.(type) { case *NotExpr: return simplifyNot(expr) @@ -61,24 +67,22 @@ func simplifyExpression(expr Expr) (Expr, RewriteState) { case *AndExpr: return simplifyAnd(expr) } - return expr, NoChange + return expr, noChange{} } -func simplifyNot(expr *NotExpr) (Expr, RewriteState) { +func simplifyNot(expr *NotExpr) (Expr, rewriteState) { switch child := expr.Expr.(type) { case *NotExpr: - // NOT NOT A => A - return child.Expr, Changed + return child.Expr, + newChange("NOT NOT A => A", f(expr)) case *OrExpr: - // DeMorgan Rewriter - // NOT (A OR B) => NOT A AND NOT B - return &AndExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}}, Changed + return &AndExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}}, + newChange("NOT (A OR B) => NOT A AND NOT B", f(expr)) case *AndExpr: - // DeMorgan Rewriter - // NOT (A AND B) => NOT A OR NOT B - return &OrExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}}, Changed + return &OrExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}}, + newChange("NOT (A AND B) => NOT A OR NOT B", f(expr)) } - return expr, NoChange + return expr, noChange{} } // ExtractINFromOR will add additional predicated to an OR. @@ -104,16 +108,16 @@ func ExtractINFromOR(expr *OrExpr) []Expr { continue } in, state := tryTurningOrIntoIn(l, r) - if state == Changed { + if state.changed() { ins = append(ins, in) } } } - return ins + return uniquefy(ins) } -func simplifyOr(expr *OrExpr) (Expr, RewriteState) { +func simplifyOr(expr *OrExpr) (Expr, rewriteState) { or := expr // first we search for ANDs and see how they can be simplified @@ -121,42 +125,47 @@ func simplifyOr(expr *OrExpr) (Expr, RewriteState) { rand, rok := or.Right.(*AndExpr) switch { case lok && rok: + // (<> AND <>) OR (<> AND <>) var a, b, c Expr + var change changed switch { - // (A and B) or (A and C) => A AND (B OR C) case Equals.Expr(land.Left, rand.Left): + change = newChange("(A and B) or (A and C) => A AND (B OR C)", f(expr)) a, b, c = land.Left, land.Right, rand.Right - // (A and B) or (C and A) => A AND (B OR C) case Equals.Expr(land.Left, rand.Right): + change = newChange("(A and B) or (C and A) => A AND (B OR C)", f(expr)) a, b, c = land.Left, land.Right, rand.Left - // (B and A) or (A and C) => A AND (B OR C) case Equals.Expr(land.Right, rand.Left): + change = newChange("(B and A) or (A and C) => A AND (B OR C)", f(expr)) a, b, c = land.Right, land.Left, rand.Right - // (B and A) or (C and A) => A AND (B OR C) case Equals.Expr(land.Right, rand.Right): + change = newChange("(B and A) or (C and A) => A AND (B OR C)", f(expr)) a, b, c = land.Right, land.Left, rand.Left default: - return expr, NoChange + return expr, noChange{} } - return &AndExpr{Left: a, Right: &OrExpr{Left: b, Right: c}}, Changed + return &AndExpr{Left: a, Right: &OrExpr{Left: b, Right: c}}, change case lok: + // (<> AND <>) OR <> // Simplification - // (A AND B) OR A => A if Equals.Expr(or.Right, land.Left) || Equals.Expr(or.Right, land.Right) { - return or.Right, Changed + return or.Right, newChange("(A AND B) OR A => A", f(expr)) } // Distribution Law - // (A AND B) OR C => (A OR C) AND (B OR C) - return &AndExpr{Left: &OrExpr{Left: land.Left, Right: or.Right}, Right: &OrExpr{Left: land.Right, Right: or.Right}}, Changed + return &AndExpr{Left: &OrExpr{Left: land.Left, Right: or.Right}, Right: &OrExpr{Left: land.Right, Right: or.Right}}, + newChange("(A AND B) OR C => (A OR C) AND (B OR C)", f(expr)) case rok: + // <> OR (<> AND <>) // Simplification - // A OR (A AND B) => A if Equals.Expr(or.Left, rand.Left) || Equals.Expr(or.Left, rand.Right) { - return or.Left, Changed + return or.Left, newChange("A OR (A AND B) => A", f(expr)) } // Distribution Law - // C OR (A AND B) => (C OR A) AND (C OR B) - return &AndExpr{Left: &OrExpr{Left: or.Left, Right: rand.Left}, Right: &OrExpr{Left: or.Left, Right: rand.Right}}, Changed + return &AndExpr{ + Left: &OrExpr{Left: or.Left, Right: rand.Left}, + Right: &OrExpr{Left: or.Left, Right: rand.Right}, + }, + newChange("C OR (A AND B) => (C OR A) AND (C OR B)", f(expr)) } // next, we want to try to turn multiple ORs into an IN when possible @@ -164,8 +173,8 @@ func simplifyOr(expr *OrExpr) (Expr, RewriteState) { rgtCmp, rok := or.Right.(*ComparisonExpr) if lok && rok { newExpr, rewritten := tryTurningOrIntoIn(lftCmp, rgtCmp) - if rewritten { - return newExpr, Changed + if rewritten.changed() { + return newExpr, rewritten } } @@ -173,46 +182,54 @@ func simplifyOr(expr *OrExpr) (Expr, RewriteState) { return distinctOr(expr) } -func tryTurningOrIntoIn(l, r *ComparisonExpr) (Expr, RewriteState) { +func tryTurningOrIntoIn(l, r *ComparisonExpr) (Expr, rewriteState) { // looks for A = X OR A = Y and turns them into A IN (X, Y) col, ok := l.Left.(*ColName) if !ok || !Equals.Expr(col, r.Left) { - return nil, NoChange + return nil, noChange{} } var tuple ValTuple - + var ruleStr string switch l.Operator { case EqualOp: tuple = ValTuple{l.Right} + ruleStr = "A = <>" case InOp: lft, ok := l.Right.(ValTuple) if !ok { - return nil, NoChange + return nil, noChange{} } tuple = lft + ruleStr = "A IN (<>, <>)" default: - return nil, NoChange + return nil, noChange{} } + ruleStr += " OR " + switch r.Operator { case EqualOp: tuple = append(tuple, r.Right) + ruleStr += "A = <>" case InOp: lft, ok := r.Right.(ValTuple) if !ok { - return nil, NoChange + return nil, noChange{} } tuple = append(tuple, lft...) + ruleStr += "A IN (<>, <>)" default: - return nil, NoChange + return nil, noChange{} } + ruleStr += " => A IN (<>, <>)" + return &ComparisonExpr{ Operator: InOp, Left: col, Right: uniquefy(tuple), - }, Changed + }, newChange(ruleStr, f(&OrExpr{Left: l, Right: r})) } func uniquefy(tuple ValTuple) (output ValTuple) { @@ -228,37 +245,45 @@ outer: return } -func simplifyXor(expr *XorExpr) (Expr, RewriteState) { +func simplifyXor(expr *XorExpr) (Expr, rewriteState) { // DeMorgan Rewriter - // (A XOR B) => (A OR B) AND NOT (A AND B) - return &AndExpr{Left: &OrExpr{Left: expr.Left, Right: expr.Right}, Right: &NotExpr{Expr: &AndExpr{Left: expr.Left, Right: expr.Right}}}, Changed + return &AndExpr{ + Left: &OrExpr{Left: expr.Left, Right: expr.Right}, + Right: &NotExpr{Expr: &AndExpr{Left: expr.Left, Right: expr.Right}}, + }, newChange("(A XOR B) => (A OR B) AND NOT (A AND B)", f(expr)) } -func simplifyAnd(expr *AndExpr) (Expr, RewriteState) { +func simplifyAnd(expr *AndExpr) (Expr, rewriteState) { res, rewritten := distinctAnd(expr) - if rewritten { + if rewritten.changed() { return res, rewritten } and := expr if or, ok := and.Left.(*OrExpr); ok { // Simplification - // (A OR B) AND A => A - if Equals.Expr(or.Left, and.Right) || Equals.Expr(or.Right, and.Right) { - return and.Right, Changed + + if Equals.Expr(or.Left, and.Right) { + return and.Right, newChange("(A OR B) AND A => A", f(expr)) + } + if Equals.Expr(or.Right, and.Right) { + return and.Right, newChange("(A OR B) AND B => B", f(expr)) } } if or, ok := and.Right.(*OrExpr); ok { // Simplification - // A OR (A AND B) => A - if Equals.Expr(or.Left, and.Left) || Equals.Expr(or.Right, and.Left) { - return or.Left, Changed + if Equals.Expr(or.Left, and.Left) { + return and.Left, newChange("A AND (A OR B) => A", f(expr)) + } + if Equals.Expr(or.Right, and.Left) { + return and.Left, newChange("A AND (B OR A) => A", f(expr)) } } - return expr, NoChange + return expr, noChange{} } -func distinctOr(in *OrExpr) (Expr, RewriteState) { +func distinctOr(in *OrExpr) (Expr, rewriteState) { + var skipped []*OrExpr todo := []*OrExpr{in} var leaves []Expr for len(todo) > 0 { @@ -284,13 +309,16 @@ outer1: leaves = leaves[1:] for _, alreadyIn := range predicates { if Equals.Expr(alreadyIn, curr) { + if log.V(0) { + skipped = append(skipped, &OrExpr{Left: alreadyIn, Right: curr}) + } continue outer1 } } predicates = append(predicates, curr) } if original == len(predicates) { - return in, NoChange + return in, noChange{} } var result Expr for i, curr := range predicates { @@ -300,42 +328,58 @@ outer1: } result = &OrExpr{Left: result, Right: curr} } - return result, Changed + + return result, newChange("A OR A => A", func() Expr { + var result Expr + for _, orExpr := range skipped { + if result == nil { + result = orExpr + continue + } + + result = &OrExpr{ + Left: result, + Right: orExpr, + } + } + return result + }) } -func distinctAnd(in *AndExpr) (Expr, RewriteState) { +func distinctAnd(in *AndExpr) (Expr, rewriteState) { + var skipped []*AndExpr todo := []*AndExpr{in} var leaves []Expr for len(todo) > 0 { curr := todo[0] todo = todo[1:] - addAnd := func(in Expr) { - and, ok := in.(*AndExpr) - if ok { + addExpr := func(in Expr) { + if and, ok := in.(*AndExpr); ok { todo = append(todo, and) } else { leaves = append(leaves, in) } } - addAnd(curr.Left) - addAnd(curr.Right) + addExpr(curr.Left) + addExpr(curr.Right) } original := len(leaves) var predicates []Expr outer1: - for len(leaves) > 0 { - curr := leaves[0] - leaves = leaves[1:] + for _, curr := range leaves { for _, alreadyIn := range predicates { if Equals.Expr(alreadyIn, curr) { + if log.V(0) { + skipped = append(skipped, &AndExpr{Left: alreadyIn, Right: curr}) + } continue outer1 } } predicates = append(predicates, curr) } if original == len(predicates) { - return in, NoChange + return in, noChange{} } var result Expr for i, curr := range predicates { @@ -345,5 +389,62 @@ outer1: } result = &AndExpr{Left: result, Right: curr} } - return result, Changed + return AndExpressions(leaves...), newChange("A AND A => A", func() Expr { + var result Expr + for _, andExpr := range skipped { + if result == nil { + result = andExpr + continue + } + + result = &AndExpr{ + Left: result, + Right: andExpr, + } + } + return result + }) +} + +type ( + rewriteState interface { + changed() bool + } + noChange struct{} + + // changed makes it possible to make sure we have a rule string for each change we do in the expression tree + changed struct { + rule string + + // ExprMatched is a function here so building of this expression can be paid only when we are debug logging + exprMatched func() Expr + } +) + +func (noChange) changed() bool { return false } +func (changed) changed() bool { return true } + +// f returns a function that returns the expression. It's short by design, so it interferes minimally +// used for logging +func f(e Expr) func() Expr { + return func() Expr { return e } +} + +func printRule(rule string, expr func() Expr) { + if log.V(10) { + log.Infof("Rule: %s ON %s", rule, String(expr())) + } +} + +func printExpr(expr SQLNode) { + if log.V(10) { + log.Infof("Current: %s", String(expr)) + } +} + +func newChange(rule string, exprMatched func() Expr) changed { + return changed{ + rule: rule, + exprMatched: exprMatched, + } } diff --git a/go/vt/sqlparser/predicate_rewriting_test.go b/go/vt/sqlparser/predicate_rewriting_test.go index 1c86dec61a2..34e23597894 100644 --- a/go/vt/sqlparser/predicate_rewriting_test.go +++ b/go/vt/sqlparser/predicate_rewriting_test.go @@ -92,7 +92,7 @@ func TestSimplifyExpression(in *testing.T) { require.NoError(t, err) expr, didRewrite := simplifyExpression(expr) - assert.True(t, didRewrite == Changed) + assert.True(t, didRewrite.changed()) assert.Equal(t, tc.expected, String(expr)) }) } @@ -114,12 +114,21 @@ func TestRewritePredicate(in *testing.T) { }, { in: "(A and B) OR (A and C)", expected: "A and (B or C)", + }, { + in: "(A and B) OR (C and A)", + expected: "A and (B or C)", + }, { + in: "(B and A) OR (A and C)", + expected: "A and (B or C)", }, { in: "(A and B) or (A and C) or (A and D)", expected: "A and (B or C or D)", }, { in: "(a=1 or a IN (1,2)) or (a = 2 or a = 3)", expected: "a in (1, 2, 3)", + }, { + in: "A and (B or A)", + expected: "A", }} for _, tc := range tests {