diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index e3cd9d9be391..f5e0b29cb9ed 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -317,6 +317,7 @@ ALL_TESTS = [ "//pkg/sql/opt/bench:bench_test", "//pkg/sql/opt/cat:cat_test", "//pkg/sql/opt/constraint:constraint_test", + "//pkg/sql/opt/cycle:cycle_test", "//pkg/sql/opt/distribution:distribution_test", "//pkg/sql/opt/exec/execbuilder:execbuilder_test", "//pkg/sql/opt/exec/explain:explain_test", diff --git a/pkg/sql/opt/cycle/BUILD.bazel b/pkg/sql/opt/cycle/BUILD.bazel new file mode 100644 index 000000000000..b27fd3315666 --- /dev/null +++ b/pkg/sql/opt/cycle/BUILD.bazel @@ -0,0 +1,14 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "cycle", + srcs = ["detector.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/sql/opt/cycle", + visibility = ["//visibility:public"], +) + +go_test( + name = "cycle_test", + srcs = ["detector_test.go"], + embed = [":cycle"], +) diff --git a/pkg/sql/opt/cycle/detector.go b/pkg/sql/opt/cycle/detector.go new file mode 100644 index 000000000000..843545e5acb7 --- /dev/null +++ b/pkg/sql/opt/cycle/detector.go @@ -0,0 +1,127 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cycle + +// Detector finds cycles in directed graphs. Vertices are identified as unique +// integers by the caller. +// +// Example usage: +// +// var d Detector +// d.AddEdge(Vertex(0), Vertex(1)) +// d.AddEdge(Vertex(1), Vertex(2)) +// d.AddEdge(Vertex(2), Vertex(0)) +// d.FindCycleStartingAtVertex(Vertex(0)) +// => [0, 1, 2, 0], true +// +type Detector struct { + // edges are the directed edges in the graph. + edges []edge +} + +// Vertex is a vertex in the graph. +type Vertex int + +// edge is a directed edge between two vertices. +type edge struct { + from Vertex + to Vertex +} + +type status int + +const ( + // statusNotVisited indicates that a vertex has not been previously visited. + statusNotVisited status = iota + // statusVisited indicates that a vertex has been visited but a verdict on + // its involvement in a cycle has not yet been reached. + statusVisited + // statusDone indicates that a vertex has been visited and has been proven + // innocent of being involved in a cycle. + statusDone +) + +// AddEdge adds a directed edge to the graph. +func (cd *Detector) AddEdge(from, to Vertex) { + cd.edges = append(cd.edges, edge{from, to}) +} + +// FindCycleStartingAtVertex searches for a cycle starting from v. If one is +// found, a path containing that cycle is returned. After finding the first +// cycle, searching ceases. If no cycle is found, ok=false is returned. +func (cd *Detector) FindCycleStartingAtVertex(v Vertex) (cycle []Vertex, ok bool) { + vertices := make(map[Vertex]status) + var s stack + if ok := cd.hasCycle(v, vertices, &s); ok { + // If a cycle was found, s will contain a path that includes the cycle. + return s, true + } + return nil, false +} + +// hasCycle performs a depth-first search of the graph starting at v in search +// of a cycle. It returns true when the first cycle is found. If a cycle was +// found, s contains a path that includes the cycle. Cycles are detected by +// finding edges that backtrack to a previously visited vertex. +func (cd *Detector) hasCycle(v Vertex, vertices map[Vertex]status, s *stack) bool { + // Mark the given Vertex as visited and push it onto the stack. + vertices[v] = statusVisited + s.push(v) + + // Search all children of v for cycles. + for i := range cd.edges { + edge := &cd.edges[i] + + // Ignore edges that do not originate from v. + if edge.from != v { + continue + } + + child := edge.to + switch vertices[child] { + case statusVisited: + // A cycle has been found. The current stack is a path to the cycle. + // Push the child onto the stack so that the cycle is more obvious + // in the path. + s.push(child) + return true + case statusNotVisited: + // Recurse if this child of v has not yet been visited. + if ok := cd.hasCycle(child, vertices, s); ok { + // If a cycle was found deeper down, propagate the result and + // halt searching other children of v. + return true + } + case statusDone: + // We have already proven that this child does not backtrack to + // anywhere further up the stack, so we do not need to recurse into + // it. + } + } + + // If we didn't return early from the loop, we did not find a cycle below + // this vertex, so we can mark the vertex as done and pop it off the stack. + vertices[v] = statusDone + s.pop() + return false +} + +// stack is a simple implementation of a stack of vertices. It allows more +// ergonomic mutation by callees than a slice. +type stack []Vertex + +func (s *stack) push(i Vertex) { + *s = append(*s, i) +} + +func (s *stack) pop() { + *s = (*s)[:len(*s)-1] +} diff --git a/pkg/sql/opt/cycle/detector_test.go b/pkg/sql/opt/cycle/detector_test.go new file mode 100644 index 000000000000..0ac3e9744015 --- /dev/null +++ b/pkg/sql/opt/cycle/detector_test.go @@ -0,0 +1,183 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cycle + +import ( + "reflect" + "testing" +) + +func TestDetector(t *testing.T) { + testCases := []struct { + edges []edge + start Vertex + expected []Vertex + }{ + // No cycles. + { + edges: []edge{}, + start: 0, + expected: nil, + }, + { + edges: []edge{ + {0, 1}, + {1, 2}, + {2, 3}, + }, + start: 0, + expected: nil, + }, + { + edges: []edge{ + {0, 1}, + {0, 2}, + {0, 3}, + {1, 2}, + {2, 3}, + }, + start: 0, + expected: nil, + }, + { + edges: []edge{ + {0, 1}, + {0, 2}, + {0, 3}, + {1, 2}, + {3, 1}, + }, + start: 0, + expected: nil, + }, + { + edges: []edge{ + {0, 1}, + {0, 4}, + {1, 2}, + {1, 3}, + {4, 3}, + {4, 5}, + {5, 3}, + }, + start: 0, + expected: nil, + }, + { + edges: []edge{ + {1, 2}, + {1, 3}, + {1, 4}, + {1, 7}, + {4, 5}, + {5, 6}, + {6, 7}, + {7, 4}, + }, + start: 0, + expected: nil, + }, + + // Cycles. + { + edges: []edge{ + {0, 0}, + }, + start: 0, + expected: []Vertex{0, 0}, + }, + { + edges: []edge{ + {0, 1}, + {1, 0}, + }, + start: 0, + expected: []Vertex{0, 1, 0}, + }, + { + edges: []edge{ + {0, 1}, + {1, 0}, + }, + start: 1, + expected: []Vertex{1, 0, 1}, + }, + { + edges: []edge{ + {0, 1}, + {1, 2}, + {2, 3}, + {3, 4}, + {4, 1}, + }, + start: 0, + expected: []Vertex{0, 1, 2, 3, 4, 1}, + }, + { + edges: []edge{ + {0, 1}, + {1, 3}, + {3, 2}, + {2, 1}, + }, + start: 0, + expected: []Vertex{0, 1, 3, 2, 1}, + }, + { + edges: []edge{ + {0, 1}, + {0, 4}, + {1, 2}, + {1, 3}, + {3, 5}, + {4, 3}, + {4, 5}, + {5, 1}, + }, + start: 0, + expected: []Vertex{0, 1, 3, 5, 1}, + }, + { + edges: []edge{ + {1, 2}, + {1, 3}, + {1, 4}, + {1, 7}, + {4, 5}, + {5, 6}, + {6, 7}, + {7, 4}, + }, + start: 1, + expected: []Vertex{1, 4, 5, 6, 7, 4}, + }, + { + edges: []edge{ + {0, 1}, + {1, 2}, + {2, 3}, + {3, 3}, + }, + start: 1, + expected: []Vertex{1, 2, 3, 3}, + }, + } + for _, tc := range testCases { + var d Detector + for _, edge := range tc.edges { + d.AddEdge(edge.from, edge.to) + } + res, _ := d.FindCycleStartingAtVertex(tc.start) + if !reflect.DeepEqual(res, tc.expected) { + t.Errorf("expected %v, got %v", tc.expected, res) + } + } +} diff --git a/pkg/sql/opt/xform/BUILD.bazel b/pkg/sql/opt/xform/BUILD.bazel index 70cf0cbe8810..243cba7670d1 100644 --- a/pkg/sql/opt/xform/BUILD.bazel +++ b/pkg/sql/opt/xform/BUILD.bazel @@ -30,6 +30,7 @@ go_library( "//pkg/sql/opt", "//pkg/sql/opt/cat", "//pkg/sql/opt/constraint", + "//pkg/sql/opt/cycle", "//pkg/sql/opt/distribution", "//pkg/sql/opt/idxconstraint", "//pkg/sql/opt/invertedexpr", diff --git a/pkg/sql/opt/xform/memo_format.go b/pkg/sql/opt/xform/memo_format.go index 7a4988326738..e8fe1c554145 100644 --- a/pkg/sql/opt/xform/memo_format.go +++ b/pkg/sql/opt/xform/memo_format.go @@ -16,6 +16,7 @@ import ( "sort" "github.com/cockroachdb/cockroach/pkg/sql/opt" + "github.com/cockroachdb/cockroach/pkg/sql/opt/cycle" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" "github.com/cockroachdb/cockroach/pkg/util/treeprinter" @@ -29,6 +30,9 @@ const ( // FmtPretty performs a breadth-first topological sort on the memo groups, // and shows the root group at the top of the memo. FmtPretty FmtFlags = iota + // FmtCycle formats a memo like FmtPretty, but attempts to detect a cycle in + // the memo and include it in the formatted output. + FmtCycle ) type group struct { @@ -71,8 +75,8 @@ func (mf *memoFormatter) format() string { desc = "optimized" } tpRoot := tp.Childf( - "memo (%s, ~%dKB, required=%s)", - desc, mf.o.mem.MemoryEstimate()/1024, m.RootProps(), + "memo (%s, ~%dKB, required=%s%s)", + desc, mf.o.mem.MemoryEstimate()/1024, m.RootProps(), mf.formatCycles(), ) for i, e := range mf.groups { @@ -317,3 +321,57 @@ func firstExpr(expr opt.Expr) opt.Expr { } return expr } + +func (mf *memoFormatter) formatCycles() string { + m := mf.o.mem + var d cycle.Detector + + if mf.flags != FmtCycle { + return "" + } + + addExpr := func(from cycle.Vertex, e opt.Expr) { + for i := 0; i < e.ChildCount(); i++ { + child := e.Child(i) + if opt.IsListItemOp(child) { + child = child.Child(0) + } + to := cycle.Vertex(mf.group(child)) + d.AddEdge(from, to) + } + } + + addGroup := func(from cycle.Vertex, first memo.RelExpr) { + for member := first; member != nil; member = member.NextExpr() { + addExpr(from, member) + } + } + + // Add an edge to the cycle detector from a group to each of the groups it + // references. + for i, e := range mf.groups { + from := cycle.Vertex(i) + rel, ok := e.first.(memo.RelExpr) + if !ok { + addExpr(from, e.first) + continue + } + addGroup(from, rel) + } + + // Search for a cycle from the root expression group. + root := cycle.Vertex(mf.group(m.RootExpr())) + if cyclePath, ok := d.FindCycleStartingAtVertex(root); ok { + mf.buf.Reset() + mf.buf.WriteString(", cycle=[") + for i, group := range cyclePath { + if i > 0 { + mf.buf.WriteString("->") + } + fmt.Fprintf(mf.buf, "G%d", group+1) + } + mf.buf.WriteString("]") + return mf.buf.String() + } + return "" +} diff --git a/pkg/sql/opt/xform/optimizer.go b/pkg/sql/opt/xform/optimizer.go index 916fa2283d82..2193d6829f34 100644 --- a/pkg/sql/opt/xform/optimizer.go +++ b/pkg/sql/opt/xform/optimizer.go @@ -470,7 +470,7 @@ func (o *Optimizer) optimizeGroup(grp memo.RelExpr, required *physical.Required) // times, there is likely a cycle in the memo. To avoid a stack // overflow, throw an internal error. The formatted memo is included as // an error detail to aid in debugging the cycle. - mf := makeMemoFormatter(o, FmtPretty) + mf := makeMemoFormatter(o, FmtCycle) panic(errors.WithDetail( errors.AssertionFailedf( "memo group optimization passes surpassed limit of %v; "+ diff --git a/pkg/sql/opt/xform/testdata/rules/cycle b/pkg/sql/opt/xform/testdata/rules/cycle index 7d9645917447..2529ac976fe9 100644 --- a/pkg/sql/opt/xform/testdata/rules/cycle +++ b/pkg/sql/opt/xform/testdata/rules/cycle @@ -6,6 +6,22 @@ CREATE TABLE ab ( ) ---- +exec-ddl +CREATE TABLE uv ( + u INT PRIMARY KEY, + v INT, + INDEX (v) +) +---- + +exec-ddl +CREATE TABLE xy ( + x INT PRIMARY KEY, + y INT, + INDEX (y) +) +---- + # This test ensures that a memo cycle does not cause a stack overflow. Instead, # the cycle is detected and the optimizer throws an internal error. The cycle is # created by the test-only exploration rule MemoCycleTestRelRule. @@ -17,7 +33,7 @@ expropt ---- error: memo group optimization passes surpassed limit of 100000; there may be a cycle in the memo details: -memo (not optimized, ~3KB, required=[]) +memo (not optimized, ~3KB, required=[], cycle=[G1->G1]) ├── G1: (memo-cycle-test-rel G2 G3) (memo-cycle-test-rel G1 G3) ├── G2: (scan ab,cols=(1,2)) (scan ab@ab_b_idx,cols=(1,2)) │ └── [] @@ -27,3 +43,40 @@ memo (not optimized, ~3KB, required=[]) ├── G4: (eq G5 G6) ├── G5: (variable b) └── G6: (const 1) + +expropt +(LeftJoin + (Scan [ (Table "ab") (Cols "a,b") ]) + (LeftJoin + (MemoCycleTestRel + (Scan [ (Table "uv") (Cols "u,v") ]) + [ (Eq (Var "v") (Const 1 "int")) ] + ) + (Scan [ (Table "xy") (Cols "x,y") ]) + [ ] + [ ] + ) + [ ] + [ ] +) +---- +error: memo group optimization passes surpassed limit of 100000; there may be a cycle in the memo +details: +memo (not optimized, ~15KB, required=[], cycle=[G1->G3->G5->G5]) + ├── G1: (left-join G2 G3 G4) + ├── G2: (scan ab,cols=()) (scan ab@ab_b_idx,cols=()) + │ └── [] + │ ├── best: (scan ab,cols=()) + │ └── cost: 1044.22 + ├── G3: (left-join G5 G6 G4) + ├── G4: (filters) + ├── G5: (memo-cycle-test-rel G7 G8) (memo-cycle-test-rel G5 G8) + ├── G6: (scan xy,cols=()) + ├── G7: (scan uv,cols=(5,6)) (scan uv@uv_v_idx,cols=(5,6)) + │ └── [] + │ ├── best: (scan uv,cols=(5,6)) + │ └── cost: 1064.42 + ├── G8: (filters G9) + ├── G9: (eq G10 G11) + ├── G10: (variable v) + └── G11: (const 1)