From 1654f358d40b2d611afdffb48d95045fc0bc6461 Mon Sep 17 00:00:00 2001 From: Asdine El Hrychy Date: Fri, 30 Jul 2021 17:11:44 -0400 Subject: [PATCH 1/2] Parse optional parentheses on DEFAULT clause --- internal/sql/parser/create.go | 12 ++++++++++++ internal/sql/parser/create_test.go | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/internal/sql/parser/create.go b/internal/sql/parser/create.go index f946973df..c17da88f0 100644 --- a/internal/sql/parser/create.go +++ b/internal/sql/parser/create.go @@ -184,6 +184,11 @@ func (p *Parser) parseFieldConstraint(fc *database.FieldConstraint) error { return newParseError(scanner.Tokstr(tok, lit), []string{"CONSTRAINT", ")"}, pos) } + withParentheses, err := p.parseOptional(scanner.LPAREN) + if err != nil { + return err + } + // Parse default value expression. // Only a few tokens are allowed. e, err := p.parseExprWithMinPrecedence(scanner.EQ.Precedence(), @@ -217,6 +222,13 @@ func (p *Parser) parseFieldConstraint(fc *database.FieldConstraint) error { } fc.DefaultValue = expr.Constraint(e) + + if withParentheses { + _, err = p.parseOptional(scanner.RPAREN) + if err != nil { + return err + } + } case scanner.UNIQUE: // if it's already unique we return an error if fc.IsUnique { diff --git a/internal/sql/parser/create_test.go b/internal/sql/parser/create_test.go index 49bc4782c..488a41d12 100644 --- a/internal/sql/parser/create_test.go +++ b/internal/sql/parser/create_test.go @@ -61,7 +61,17 @@ func TestParserCreateTable(t *testing.T) { }, }, }, false}, + {"With default", "CREATE TABLE test(foo DEFAULT (\"10\"))", + &statement.CreateTableStmt{ + Info: database.TableInfo{ + TableName: "test", + FieldConstraints: []*database.FieldConstraint{ + {Path: document.Path(testutil.ParsePath(t, "foo")), DefaultValue: expr.Constraint(expr.LiteralValue{Value: types.NewTextValue("10")})}, + }, + }, + }, false}, {"With default twice", "CREATE TABLE test(foo DEFAULT 10 DEFAULT 10)", nil, true}, + {"With default and no parentheses", "CREATE TABLE test(foo DEFAULT (10)", nil, true}, {"With forbidden tokens", "CREATE TABLE test(foo DEFAULT a)", nil, true}, {"With forbidden tokens", "CREATE TABLE test(foo DEFAULT 1 AND 2)", nil, true}, {"With unique", "CREATE TABLE test(foo UNIQUE)", From c1cc69d1bef4b4f82e66fa6d12e4aa89cbcbdbdd Mon Sep 17 00:00:00 2001 From: Asdine El Hrychy Date: Thu, 5 Aug 2021 20:34:33 -0400 Subject: [PATCH 2/2] Add new query plan logic --- internal/expr/operator.go | 10 - internal/planner/index_selection.go | 523 ++++++++++++++++++++++++++++ internal/planner/optimizer.go | 424 +--------------------- internal/planner/optimizer_test.go | 72 ++-- 4 files changed, 560 insertions(+), 469 deletions(-) create mode 100644 internal/planner/index_selection.go diff --git a/internal/expr/operator.go b/internal/expr/operator.go index 35f1f25a9..fc580f178 100644 --- a/internal/expr/operator.go +++ b/internal/expr/operator.go @@ -91,16 +91,6 @@ type Operator interface { Token() scanner.Token } -// OperatorIsIndexCompatible returns whether the operator can be used to read from an index. -func OperatorIsIndexCompatible(op Operator) bool { - switch op.Token() { - case scanner.EQ, scanner.GT, scanner.GTE, scanner.LT, scanner.LTE, scanner.IN: - return true - } - - return false -} - type ConcatOperator struct { *simpleOperator } diff --git a/internal/planner/index_selection.go b/internal/planner/index_selection.go new file mode 100644 index 000000000..babbda79c --- /dev/null +++ b/internal/planner/index_selection.go @@ -0,0 +1,523 @@ +package planner + +import ( + "github.com/genjidb/genji/document" + "github.com/genjidb/genji/internal/database" + "github.com/genjidb/genji/internal/expr" + "github.com/genjidb/genji/internal/sql/scanner" + "github.com/genjidb/genji/internal/stream" + "github.com/genjidb/genji/internal/stringutil" +) + +// SelectIndex attempts to replace a sequential scan by an index scan or a pk scan by +// analyzing the stream for indexable filter nodes. +// It expects the first node of the stream to be a seqScan. +// +// Compatibility of filter nodes. +// +// For a filter node to be selected if must be of the following form: +// +// or +// +// path: path of a document +// compatible operator: one of =, >, >=, <, <=, IN +// expression: any expression +// +// Index compatibility. +// +// Once we have a list of all compatible filter nodes, we try to associate +// indexes with them. +// Given the following index: +// CREATE INDEX foo_a_idx ON foo (a) +// and this query: +// SELECT * FROM foo WHERE a > 5 AND b > 10 +// seqScan('foo') | filter(a > 5) | filter(b > 10) | project(*) +// foo_a_idx matches filter(a > 5) and can be selected. +// Now, with a different index: +// CREATE INDEX foo_a_b_c_idx ON foo(a, b, c) +// and this query: +// SELECT * FROM foo WHERE a > 5 AND c > 20 +// seqScan('foo') | filter(a > 5) | filter(c > 20) | project(*) +// foo_a_b_c_idx matches with the first filter because a is the leftmost path indexed by it. +// The second filter is not selected because it is not the second leftmost path. +// For composite indexes, filter nodes can be selected if they match with one or more indexed path +// consecutively, from left to right. +// Now, let's have a look a this query: +// SELECT * FROM foo WHERE a = 5 AND b = 10 AND c > 15 AND d > 20 +// seqScan('foo') | filter(a = 5) | filter(b = 10) | filter(c > 15) | filter(d > 20) | project(*) +// foo_a_b_c_idx matches with first three filters because they satisfy several conditions: +// - each of them matches with the first 3 indexed paths, consecutively. +// - the first 2 filters use the equal operator +// A counter-example: +// SELECT * FROM foo WHERE a = 5 AND b > 10 AND c > 15 AND d > 20 +// seqScan('foo') | filter(a = 5) | filter(b > 10) | filter(c > 15) | filter(d > 20) | project(*) +// foo_a_b_c_idx only matches with the first two filter nodes because while the first node uses the equal +// operator, the second one doesn't, and thus the third node cannot be selected as well. +// +// Candidates and cost +// +// Because a table can have multiple indexes, we need to establish which of these +// indexes should be used to run the query, if not all of them. +// For that we generate a cost for each selected index and return the one with the cheapest cost. +func SelectIndex(s *stream.Stream, catalog database.Catalog) (*stream.Stream, error) { + // first we lookup for the seq scan node. + // Here we will assume that at this point + // if there is one it has to be the + // first node of the stream. + firstNode := s.First() + if firstNode == nil { + return s, nil + } + seq, ok := firstNode.(*stream.SeqScanOperator) + if !ok { + return s, nil + } + + is := indexSelector{ + seqScan: seq, + catalog: catalog, + } + + return is.SelectIndex(s) +} + +// indexSelector analyses a stream and generates a plan for each of them that +// can benefit from using an index. +// It then compares the cost of each plan and returns the cheapest stream. +type indexSelector struct { + seqScan *stream.SeqScanOperator + catalog database.Catalog +} + +func (i *indexSelector) SelectIndex(s *stream.Stream) (*stream.Stream, error) { + // get the list of all filter nodes + var filterNodes []*stream.FilterOperator + for op := s.Op; op != nil; op = op.GetPrev() { + if f, ok := op.(*stream.FilterOperator); ok { + filterNodes = append(filterNodes, f) + } + } + + // if there are no filter, return the stream untouched + if len(filterNodes) == 0 { + return s, nil + } + + return i.selectIndex(s, filterNodes) +} + +func (i *indexSelector) selectIndex(s *stream.Stream, filters []*stream.FilterOperator) (*stream.Stream, error) { + // generate a list of candidates from all the filter nodes that + // can benefit from reading from an index or the table pk + nodes := make(filterNodes, 0, len(filters)) + for _, f := range filters { + filter := i.isFilterIndexable(f) + if filter == nil { + continue + } + + nodes = append(nodes, filter) + } + + // select the cheapest plan + var selected *candidate + var cost int + + // start with the primary key of the table + tb, err := i.catalog.GetTableInfo(i.seqScan.TableName) + if err != nil { + return nil, err + } + selected = i.associatePkWithNodes(tb, nodes) + if selected != nil { + cost = selected.Cost() + } + + // get all the indexes for this table and associate them + // with compatible candidates + for _, idxName := range i.catalog.ListIndexes(i.seqScan.TableName) { + idxInfo, err := i.catalog.GetIndexInfo(idxName) + if err != nil { + return nil, err + } + + candidate := i.associateIndexWithNodes(idxInfo, nodes) + + if candidate == nil { + continue + } + + if selected == nil { + selected = candidate + cost = selected.Cost() + continue + } + + c := candidate.Cost() + + if len(selected.nodes) < len(candidate.nodes) || (len(selected.nodes) == len(candidate.nodes) && c < cost) { + cost = c + selected = candidate + } + } + + if selected == nil { + return s, nil + } + + // remove the filter nodes from the tree + for _, f := range selected.nodes { + s.Remove(f.node) + } + + // we replace the seq scan node by the selected root + s.Remove(s.First()) + for i := len(selected.replaceRootBy) - 1; i >= 0; i-- { + if s.Op == nil { + s.Op = selected.replaceRootBy[i] + continue + } + stream.InsertBefore(s.First(), selected.replaceRootBy[i]) + } + + return s, nil +} + +func (i *indexSelector) isFilterIndexable(f *stream.FilterOperator) *filterNode { + // only operators can associate this node to an index + op, ok := f.E.(expr.Operator) + if !ok { + return nil + } + + // ensure the operator is compatible + if !operatorIsIndexCompatible(op) { + return nil + } + + // determine if the operator could benefit from an index + ok, path, e := operatorCanUseIndex(op) + if !ok { + return nil + } + + node := filterNode{ + node: f, + path: path, + operator: op.Token(), + operand: e, + } + + return &node +} + +func (i *indexSelector) associatePkWithNodes(tb *database.TableInfo, nodes filterNodes) *candidate { + // TODO: add support for pk() + pk := tb.FieldConstraints.GetPrimaryKey() + + if pk == nil { + return nil + } + + n := nodes.getByPath(pk.Path) + if n == nil { + return nil + } + + ranges := getRangesFromOp(n.operator, n.operand) + + return &candidate{ + nodes: filterNodes{n}, + rangesCost: ranges.Cost(), + replaceRootBy: []stream.Operator{ + stream.PkScan(tb.TableName, ranges...), + }, + } +} + +// for a given index, select all filter nodes that match according to the following rules: +// - from left to right, associate each indexed path to a filter node and stop when there is no +// node available or the node is not compatible +// - for n associated nodes, the n - 1 first must all use the = operator, only the last one +// can be any of =, >, >=, <, <= +// - transform all associated nodes into an index range +// If not all indexed paths have an associated filter node, return whatever has been associated +// A few examples for this index: CREATE INDEX ON foo(a, b, c) +// fitler(a = 3) | filter(b = 10) | (c > 20) +// -> range = {min: [3, 10, 20]} +// fitler(a = 3) | filter(b > 10) | (c > 20) +// -> range = {min: [3], exact: true} +// filter(a IN (1, 2)) +// -> ranges = [1], [2] +func (i *indexSelector) associateIndexWithNodes(idx *database.IndexInfo, nodes filterNodes) *candidate { + found := make([]*filterNode, 0, len(idx.Paths)) + + var hasIn bool + for _, p := range idx.Paths { + n := nodes.getByPath(p) + if n == nil { + break + } + + if n.operator == scanner.IN { + hasIn = true + } + + // in the case there is an IN operator somewhere + // we only select additional IN or = operators. + // Otherwise, any operator is accepted + if !hasIn || (n.operator == scanner.EQ || n.operator == scanner.IN) { + found = append(found, n) + } + + // we must stop at the first operator that is not a IN or a = + if n.operator != scanner.EQ && n.operator != scanner.IN { + break + } + } + + if len(found) == 0 { + return nil + } + + // in case there is an IN operator in the list, we need to generate multiple ranges. + // If not, we only need one range. + var ranges stream.IndexRanges + + if !hasIn { + ranges = stream.IndexRanges{i.buildRangeFromFilterNodes(idx, found...)} + } else { + ranges = i.buildRangesFromFilterNodes(idx, found) + } + + return &candidate{ + nodes: found, + rangesCost: ranges.Cost(), + isIndex: true, + isUnique: idx.Unique, + replaceRootBy: []stream.Operator{ + stream.IndexScan(idx.IndexName, ranges...), + }, + } +} + +func (i *indexSelector) buildRangesFromFilterNodes(idx *database.IndexInfo, filters []*filterNode) stream.IndexRanges { + // build a 2 dimentional list of all expressions + // so that: filter(a IN (10, 11)) | filter(b = 20) | filter(c IN (30, 31)) + // becomes: + // [10, 11] + // [20] + // [30, 31] + + l := make([][]expr.Expr, 0, len(filters)) + + for _, f := range filters { + var row []expr.Expr + if f.operator != scanner.IN { + row = []expr.Expr{f.operand} + } else { + row = f.operand.(expr.LiteralExprList) + } + + l = append(l, row) + } + + // generate a list of combinaison between each row of the list + // Example for the list above: + // 10, 20, 30 + // 10, 20, 31 + // 11, 20, 30 + // 11, 20, 31 + + var ranges stream.IndexRanges + + i.walkExpr(l, func(row []expr.Expr) { + ranges = append(ranges, i.buildRangeFromOperator(scanner.EQ, idx.Paths[:len(row)], idx, row...)) + }) + + return ranges +} + +func (i *indexSelector) walkExpr(l [][]expr.Expr, fn func(row []expr.Expr)) { + curLine := l[0] + + if len(l) == 0 { + return + } + + if len(l) == 1 { + for _, e := range curLine { + fn([]expr.Expr{e}) + } + + return + } + + for _, e := range curLine { + i.walkExpr(l[1:], func(row []expr.Expr) { + fn(append([]expr.Expr{e}, row...)) + }) + } +} + +func (i *indexSelector) buildRangeFromFilterNodes(idx *database.IndexInfo, filters ...*filterNode) stream.IndexRange { + // first, generate a list of paths and a list of expressions + paths := make([]document.Path, 0, len(filters)) + el := make(expr.LiteralExprList, 0, len(filters)) + for i := range filters { + paths = append(paths, filters[i].path) + el = append(el, filters[i].operand) + } + + // use last filter node to determine the direction of the range + filter := filters[len(filters)-1] + + return i.buildRangeFromOperator(filter.operator, paths, idx, el...) +} + +func (i *indexSelector) buildRangeFromOperator(op scanner.Token, paths []document.Path, idx *database.IndexInfo, operands ...expr.Expr) stream.IndexRange { + rng := stream.IndexRange{ + Paths: paths, + IndexArity: len(idx.Paths), + } + + el := expr.LiteralExprList(operands) + + switch op { + case scanner.EQ, scanner.IN: + rng.Exact = true + rng.Min = el + case scanner.GT: + rng.Exclusive = true + rng.Min = el + case scanner.GTE: + rng.Min = el + case scanner.LT: + rng.Exclusive = true + rng.Max = el + case scanner.LTE: + rng.Max = el + } + + return rng +} + +type filterNode struct { + // associated stream node + node stream.Operator + + // the expression of the node + // has been broken into + // + // Ex: a.b[0] > 5 + 5 + // Gives: + // - path: a.b[0] + // - operator: scanner.GT + // - operand: 5 + 5 + path document.Path + operator scanner.Token + operand expr.Expr +} + +type filterNodes []*filterNode + +// getByPath returns the first filter for the given path. +// TODO(asdine): add a rule that merges filter nodes that point to the +// same path. +func (f filterNodes) getByPath(p document.Path) *filterNode { + for _, fn := range f { + if fn.path.IsEqual(p) { + return fn + } + } + + return nil +} + +type candidate struct { + // filter operators to remove and replace by either an indexScan + // or pkScan operators. + nodes filterNodes + + // replace the seqScan by these nodes + replaceRootBy []stream.Operator + + // cost of the associated ranges + rangesCost int + + // is this candidate reading from an index. + // if false, we are reading from the table + // primary key. + isIndex bool + // if it's an index, does it have a unique constraint + isUnique bool +} + +func (c *candidate) Cost() int { + // we start with the cost of ranges + cost := c.rangesCost + + if c.isIndex { + cost += 20 + } + if c.isUnique { + cost -= 10 + } + + cost -= len(c.nodes) + + return cost +} + +// operatorIsIndexCompatible returns whether the operator can be used to read from an index. +func operatorIsIndexCompatible(op expr.Operator) bool { + switch op.Token() { + case scanner.EQ, scanner.GT, scanner.GTE, scanner.LT, scanner.LTE, scanner.IN: + return true + } + + return false +} + +func getRangesFromOp(op scanner.Token, e expr.Expr) stream.ValueRanges { + var ranges stream.ValueRanges + + switch op { + case scanner.EQ: + ranges = ranges.Append(stream.ValueRange{ + Min: e, + Exact: true, + }) + case scanner.GT: + ranges = ranges.Append(stream.ValueRange{ + Min: e, + Exclusive: true, + }) + case scanner.GTE: + ranges = ranges.Append(stream.ValueRange{ + Min: e, + }) + case scanner.LT: + ranges = ranges.Append(stream.ValueRange{ + Max: e, + Exclusive: true, + }) + case scanner.LTE: + ranges = ranges.Append(stream.ValueRange{ + Max: e, + }) + case scanner.IN: + // operatorCanUseIndex made sure e is a expression list. + el := e.(expr.LiteralExprList) + for i := range el { + ranges = ranges.Append(stream.ValueRange{ + Min: el[i], + Exact: true, + }) + } + default: + panic(stringutil.Sprintf("unknown operator %#v", op)) + } + + return ranges +} diff --git a/internal/planner/optimizer.go b/internal/planner/optimizer.go index 6462180c5..76f070f66 100644 --- a/internal/planner/optimizer.go +++ b/internal/planner/optimizer.go @@ -8,7 +8,6 @@ import ( "github.com/genjidb/genji/internal/expr/functions" "github.com/genjidb/genji/internal/sql/scanner" "github.com/genjidb/genji/internal/stream" - "github.com/genjidb/genji/internal/stringutil" "github.com/genjidb/genji/types" ) @@ -17,7 +16,7 @@ var optimizerRules = []func(s *stream.Stream, catalog database.Catalog) (*stream RemoveUnnecessaryProjection, RemoveUnnecessaryDistinctNodeRule, RemoveUnnecessaryFilterNodesRule, - UseIndexBasedOnFilterNodeRule, + SelectIndex, PrecalculateExprRule, } @@ -395,289 +394,6 @@ func isProjectionUnique(indexes []*database.IndexInfo, po *stream.ProjectOperato return true } -type filterNode struct { - path document.Path - e expr.Expr - f *stream.FilterOperator -} - -// UseIndexBasedOnFilterNodeRule scans the tree for filter nodes whose conditions are -// operators that satisfies the following criterias: -// - is a comparison operator -// - one of its operands is a path expression that is indexed -// - the other operand is a literal value or a parameter -// -// If one or many are found, it will replace the input node by an indexInputNode using this index, -// removing the now irrelevant filter nodes. -// -// TODO(asdine): add support for ORDER BY -// TODO(jh): clarify cost code in composite indexes case -func UseIndexBasedOnFilterNodeRule(s *stream.Stream, catalog database.Catalog) (*stream.Stream, error) { - // first we lookup for the seq scan node. - // Here we will assume that at this point - // if there is one it has to be the - // first node of the stream. - firstNode := s.First() - if firstNode == nil { - return s, nil - } - st, ok := firstNode.(*stream.SeqScanOperator) - if !ok { - return s, nil - } - info, err := catalog.GetTableInfo(st.TableName) - if err != nil { - return nil, err - } - - var candidates []*candidate - var filterNodes []filterNode - - // then we collect all usable filter nodes, in order to see what index (or PK) can be - // used to replace them. - for n := s.Op; n != nil; n = n.GetPrev() { - if f, ok := n.(*stream.FilterOperator); ok { - if f.E == nil { - continue - } - - op, ok := f.E.(expr.Operator) - if !ok { - continue - } - - if !expr.OperatorIsIndexCompatible(op) { - continue - } - - // determine if the operator could benefit from an index - ok, path, e := operatorCanUseIndex(op) - if !ok { - continue - } - - filterNodes = append(filterNodes, filterNode{path: path, e: e, f: f}) - - // check for primary keys scan while iterating on the filter nodes - if pk := info.FieldConstraints.GetPrimaryKey(); pk != nil && pk.Path.IsEqual(path) { - // // if both types are different, don't select this scanner - // v, ok, err := operandCanUseIndex(pk.Type, pk.Path, t.Info.FieldConstraints, v) - // if err != nil { - // return nil, err - // } - - if !ok { - continue - } else { - cd := candidate{ - filterOps: []*stream.FilterOperator{f}, - isPk: true, - priority: 3, - } - - ranges, err := getRangesFromOp(op, e) - if err != nil { - return nil, err - } - - cd.newOp = stream.PkScan(st.TableName, ranges...) - cd.cost = ranges.Cost() - - candidates = append(candidates, &cd) - } - } - } - } - - findByPath := func(path document.Path) *filterNode { - for _, fno := range filterNodes { - if fno.path.IsEqual(path) { - return &fno - } - } - - return nil - } - - isNodeEq := func(fno *filterNode) bool { - op := fno.f.E.(expr.Operator) - return op.Token() == scanner.EQ || op.Token() == scanner.IN - } - isNodeComp := func(fno *filterNode) bool { - op := fno.f.E.(expr.Operator) - return expr.IsComparisonOperator(op) - } - - // iterate on all indexes for that table, checking for each of them if its paths are matching - // the filter nodes of the given query. The resulting nodes are ordered like the index paths. -outer: - - for _, idxName := range catalog.ListIndexes(st.TableName) { - idxInfo, err := catalog.GetIndexInfo(idxName) - if err != nil { - return nil, err - } - // order filter nodes by how the index paths order them; if absent, nil in still inserted - found := make([]*filterNode, len(idxInfo.Paths)) - for i, path := range idxInfo.Paths { - fno := findByPath(path) - - if fno != nil { - // mark this path from the index as found - found[i] = fno - } - } - - // Iterate on all the nodes for the given index, checking for each of its path, their is a corresponding node. - // It's possible for an index to be selected if not all of its paths are covered by the nodes, if and only if - // those are contiguous, relatively to the paths, i.e: - // - given idx_foo_abc(a, b, c) - // - given a query SELECT ... WHERE a = 1 AND b > 2 - // - the paths a and b are contiguous in the index definition, this index can be used - // - given a query SELECT ... WHERE a = 1 AND c > 2 - // - the paths a and c are not contiguous in the index definition, this index cannot be used for both values - // but it will be used with a and c with a normal filter node. - var fops []*stream.FilterOperator - var usableFilterNodes []*filterNode - contiguous := true - for i, fno := range found { - if contiguous { - if fno == nil { - contiguous = false - continue - } - - // is looking ahead at the next node possible? - if i+1 < len(found) { - // is there another node found after this one? - if found[i+1] != nil { - // current one must be an eq node then - if !isNodeEq(fno) { - continue outer - } - } else { - // the next node is the last one found, so the current one can also be a comparison and not just eq - if !isNodeComp(fno) { - continue outer - } - } - } else { - // that's the last filter node, it can be a comparison, - if !isNodeComp(fno) { - continue outer - } - } - } else { - // if on the index idx_abc(a,b,c), a is found, b isn't but c is - // then idx_abc is valid but just with a, c will use a filter node instead - continue - } - - usableFilterNodes = append(usableFilterNodes, fno) - fops = append(fops, fno.f) - } - - // no nodes for the index has been found - if found[0] == nil { - continue outer - } - - cd := candidate{ - filterOps: fops, - isIndex: true, - } - - // there are probably less values to iterate on if the index is unique - if idxInfo.Unique { - cd.priority = 2 - } else { - cd.priority = 1 - } - - ranges, err := getRangesFromFilterNodes(usableFilterNodes) - if err != nil { - return nil, err - } - - cd.newOp = stream.IndexScan(idxInfo.IndexName, ranges...) - cd.cost = ranges.Cost() - - candidates = append(candidates, &cd) - } - - // determine which index is the most interesting and replace it in the tree. - // we will assume that unique indexes are more interesting than list indexes - // because they usually have less elements. - var selectedCandidate *candidate - var cost int - - for i, candidate := range candidates { - currentCost := candidate.cost - - if selectedCandidate == nil { - selectedCandidate = candidates[i] - cost = currentCost - continue - } - - // With the current cost be computing on ranges, it's a bit hard to know what's best in - // between indexes. So, before looking at the cost, we look at how many filter ops would - // be replaced. - if len(selectedCandidate.filterOps) < len(candidate.filterOps) { - selectedCandidate = candidates[i] - cost = currentCost - continue - } else if len(selectedCandidate.filterOps) == len(candidate.filterOps) { - if currentCost < cost { - selectedCandidate = candidates[i] - cost = currentCost - continue - } - - // if the cost is the same and the candidate's related index has a higher priority, - // select it. - if currentCost == cost { - if selectedCandidate.priority < candidate.priority { - selectedCandidate = candidates[i] - } - } - } - } - - if selectedCandidate == nil { - return s, nil - } - - // remove the selection node from the tree - for _, f := range selectedCandidate.filterOps { - s.Remove(f) - } - - // we replace the seq scan node by the selected index scan node - stream.InsertBefore(s.First(), selectedCandidate.newOp) - - s.Remove(s.First().GetNext()) - - return s, nil -} - -type candidate struct { - // filter operators to remove and replace by either an indexScan - // or pkScan operators. - filterOps []*stream.FilterOperator - // the candidate indexScan or pkScan operator - newOp stream.Operator - // the cost of the candidate - cost int - // is this candidate reading from an index - isIndex bool - // is this candidate reading primary key ranges - isPk bool - // if the costs of two candidates are equal, - // this number determines which node will be prioritized - priority int -} - func operatorCanUseIndex(op expr.Operator) (bool, document.Path, expr.Expr) { lf, leftIsPath := op.LeftHand().(expr.Path) rf, rightIsPath := op.RightHand().(expr.Path) @@ -710,141 +426,3 @@ func operatorCanUseIndex(op expr.Operator) (bool, document.Path, expr.Expr) { return false, nil, nil } - -func getRangesFromFilterNodes(fnodes []*filterNode) (stream.IndexRanges, error) { - var ranges stream.IndexRanges - var el expr.LiteralExprList - // store IN operands with their position (in the index paths) as a key - inOperands := make(map[int]expr.LiteralExprList) - - for i, fno := range fnodes { - op := fno.f.E.(expr.Operator) - e := fno.e - - switch { - case op.Token() == scanner.IN: - // mark where the IN operator values are supposed to go is in the buffer - // and what are the value needed to generate the ranges. - // operatorCanUseIndex made sure v is an array. - inOperands[i] = e.(expr.LiteralExprList) - - // placeholder for when we'll explode the IN operands in multiple ranges - el = append(el, expr.LiteralValue{}) - case expr.IsComparisonOperator(op): - el = append(el, e) - default: - panic(stringutil.Sprintf("unknown operator %#v", op)) - } - } - - if len(inOperands) > 1 { - // TODO FEATURE https://github.com/genjidb/genji/issues/392 - panic("unsupported operation: multiple IN operators on a composite index") - } - - // a small helper func to create a range based on an operator type - buildRange := func(op expr.Operator, el expr.LiteralExprList) stream.IndexRange { - var paths []document.Path - for i := range el { - paths = append(paths, fnodes[i].path) - } - rng := stream.IndexRange{ - Paths: paths, - } - - switch op.Token() { - case scanner.EQ, scanner.IN: - rng.Exact = true - rng.Min = el - case scanner.GT: - rng.Exclusive = true - rng.Min = el - case scanner.GTE: - rng.Min = el - case scanner.LT: - rng.Exclusive = true - rng.Max = el - case scanner.LTE: - rng.Max = el - } - - return rng - } - - // explode the IN operator values in multiple ranges - for pos, operands := range inOperands { - for i := range operands { - newVB := make(expr.LiteralExprList, len(el)) - copy(newVB, el) - - // insert IN operand at the right position, replacing the placeholder value - newVB[pos] = operands[i] - - // the last node is the only one that can be a comparison operator, so - // it's the one setting the range behaviour - last := fnodes[len(fnodes)-1] - op := last.f.E.(expr.Operator) - - rng := buildRange(op, newVB) - - ranges = ranges.Append(rng) - } - } - - // Were there any IN operators requiring multiple ranges? - // If yes, we're done here. - if len(ranges) > 0 { - return ranges, nil - } - - // the last node is the only one that can be a comparison operator, so - // it's the one setting the range behaviour - last := fnodes[len(fnodes)-1] - op := last.f.E.(expr.Operator) - rng := buildRange(op, el) - - return stream.IndexRanges{rng}, nil -} - -func getRangesFromOp(op expr.Operator, e expr.Expr) (stream.ValueRanges, error) { - var ranges stream.ValueRanges - - switch op.Token() { - case scanner.EQ: - ranges = ranges.Append(stream.ValueRange{ - Min: e, - Exact: true, - }) - case scanner.GT: - ranges = ranges.Append(stream.ValueRange{ - Min: e, - Exclusive: true, - }) - case scanner.GTE: - ranges = ranges.Append(stream.ValueRange{ - Min: e, - }) - case scanner.LT: - ranges = ranges.Append(stream.ValueRange{ - Max: e, - Exclusive: true, - }) - case scanner.LTE: - ranges = ranges.Append(stream.ValueRange{ - Max: e, - }) - case scanner.IN: - // operatorCanUseIndex made sure e is a expression list. - el := e.(expr.LiteralExprList) - for i := range el { - ranges = ranges.Append(stream.ValueRange{ - Min: el[i], - Exact: true, - }) - } - default: - panic(stringutil.Sprintf("unknown operator %#v", op)) - } - - return ranges, nil -} diff --git a/internal/planner/optimizer_test.go b/internal/planner/optimizer_test.go index 02663340f..300d4db37 100644 --- a/internal/planner/optimizer_test.go +++ b/internal/planner/optimizer_test.go @@ -288,7 +288,7 @@ func exprList(list ...expr.Expr) expr.LiteralExprList { return expr.LiteralExprList(list) } -func TestUseIndexBasedOnSelectionNodeRule_Simple(t *testing.T) { +func TestSelectIndex_Simple(t *testing.T) { tests := []struct { name string root, expected *st.Stream @@ -437,7 +437,7 @@ func TestUseIndexBasedOnSelectionNodeRule_Simple(t *testing.T) { (3, 3, 3, 3, 3) `) - res, err := planner.UseIndexBasedOnFilterNodeRule(test.root, db.Catalog) + res, err := planner.SelectIndex(test.root, db.Catalog) require.NoError(t, err) require.Equal(t, test.expected.String(), res.String()) }) @@ -498,7 +498,7 @@ func TestUseIndexBasedOnSelectionNodeRule_Simple(t *testing.T) { res, err := planner.PrecalculateExprRule(test.root, db.Catalog) require.NoError(t, err) - res, err = planner.UseIndexBasedOnFilterNodeRule(res, db.Catalog) + res, err = planner.SelectIndex(res, db.Catalog) require.NoError(t, err) require.Equal(t, test.expected.String(), res.String()) }) @@ -506,7 +506,7 @@ func TestUseIndexBasedOnSelectionNodeRule_Simple(t *testing.T) { }) } -func TestUseIndexBasedOnSelectionNodeRule_Composite(t *testing.T) { +func TestSelectIndex_Composite(t *testing.T) { tests := []struct { name string root, expected *st.Stream @@ -680,9 +680,9 @@ func TestUseIndexBasedOnSelectionNodeRule_Composite(t *testing.T) { Pipe(st.Filter(parser.MustParseExpr("b = 3"))). Pipe(st.Filter(parser.MustParseExpr("c > 4"))), st.New(st.IndexScan("idx_foo_a_b_c", - st.IndexRange{Min: testutil.ExprList(t, `[1, 3, 4]`), Exclusive: true}, - st.IndexRange{Min: testutil.ExprList(t, `[2, 3, 4]`), Exclusive: true}, - )), + st.IndexRange{Min: testutil.ExprList(t, `[1, 3]`), Exact: true}, + st.IndexRange{Min: testutil.ExprList(t, `[2, 3]`), Exact: true}, + )).Pipe(st.Filter(parser.MustParseExpr("c > 4"))), }, { "FROM foo WHERE a IN [1, 2] AND b = 3 AND c < 4", @@ -696,33 +696,33 @@ func TestUseIndexBasedOnSelectionNodeRule_Composite(t *testing.T) { Pipe(st.Filter(parser.MustParseExpr("b = 3"))). Pipe(st.Filter(parser.MustParseExpr("c < 4"))), st.New(st.IndexScan("idx_foo_a_b_c", - st.IndexRange{Max: testutil.ExprList(t, `[1, 3, 4]`), Exclusive: true}, - st.IndexRange{Max: testutil.ExprList(t, `[2, 3, 4]`), Exclusive: true}, - )), + st.IndexRange{Min: testutil.ExprList(t, `[1, 3]`), Exact: true}, + st.IndexRange{Min: testutil.ExprList(t, `[2, 3]`), Exact: true}, + )).Pipe(st.Filter(parser.MustParseExpr("c < 4"))), + }, + { + "FROM foo WHERE a IN [1, 2] AND b IN [3, 4] AND c > 5", + st.New(st.SeqScan("foo")). + Pipe(st.Filter( + expr.In( + parser.MustParseExpr("a"), + testutil.ExprList(t, `[1, 2]`), + ), + )). + Pipe(st.Filter( + expr.In( + parser.MustParseExpr("b"), + testutil.ExprList(t, `[3, 4]`), + ), + )). + Pipe(st.Filter(parser.MustParseExpr("c > 5"))), + st.New(st.IndexScan("idx_foo_a_b_c", + st.IndexRange{Min: testutil.ExprList(t, `[1, 3]`), Exact: true}, + st.IndexRange{Min: testutil.ExprList(t, `[1, 4]`), Exact: true}, + st.IndexRange{Min: testutil.ExprList(t, `[2, 3]`), Exact: true}, + st.IndexRange{Min: testutil.ExprList(t, `[2, 4]`), Exact: true}, + )).Pipe(st.Filter(parser.MustParseExpr("c > 5"))), }, - // { - // "FROM foo WHERE a IN [1, 2] AND b IN [3, 4] AND c > 5", - // st.New(st.SeqScan("foo")). - // Pipe(st.Filter( - // expr.In( - // parser.MustParseExpr("a"), - // testutil.ArrayValue(document.NewValueBuffer(types.NewIntegerValue(1), types.NewIntegerValue(2))), - // ), - // )). - // Pipe(st.Filter( - // expr.In( - // parser.MustParseExpr("b"), - // testutil.ArrayValue(document.NewValueBuffer(types.NewIntegerValue(3), types.NewIntegerValue(4))), - // ), - // )). - // Pipe(st.Filter(parser.MustParseExpr("c < 5"))), - // st.New(st.IndexScan("idx_foo_a_b_c", - // st.IndexRange{Max: testutil.ExprList(t, `[1, 3, 5]`), Exclusive: true}, - // st.IndexRange{Max: testutil.ExprList(t, `[2, 3, 5]`), Exclusive: true}, - // st.IndexRange{Max: testutil.ExprList(t, `[1, 4, 5]`), Exclusive: true}, - // st.IndexRange{Max: testutil.ExprList(t, `[2, 4, 5]`), Exclusive: true}, - // )), - // }, { "FROM foo WHERE 1 IN a AND d = 2", st.New(st.SeqScan("foo")). @@ -753,7 +753,7 @@ func TestUseIndexBasedOnSelectionNodeRule_Composite(t *testing.T) { (3, 3, 3, 3, 3) `) - res, err := planner.UseIndexBasedOnFilterNodeRule(test.root, db.Catalog) + res, err := planner.SelectIndex(test.root, db.Catalog) require.NoError(t, err) require.Equal(t, test.expected.String(), res.String()) }) @@ -805,7 +805,7 @@ func TestUseIndexBasedOnSelectionNodeRule_Composite(t *testing.T) { res, err := planner.PrecalculateExprRule(test.root, db.Catalog) require.NoError(t, err) - res, err = planner.UseIndexBasedOnFilterNodeRule(res, db.Catalog) + res, err = planner.SelectIndex(res, db.Catalog) require.NoError(t, err) require.Equal(t, test.expected.String(), res.String()) }) @@ -900,7 +900,7 @@ func TestOptimize(t *testing.T) { }) }) - t.Run("UseIndexBasedOnSelectionNodeRule", func(t *testing.T) { + t.Run("SelectIndex", func(t *testing.T) { db, tx, cleanup := testutil.NewTestTx(t) defer cleanup() testutil.MustExec(t, db, tx, `