From e572e6aec4d325fcbe168cf54dca028f172b3963 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Fri, 16 Oct 2020 00:27:08 +0300 Subject: [PATCH 01/10] feat: add SELECT DISTINCT queries support --- sql/parser/select.go | 21 ++++++++++++- sql/planner/hash_set.go | 61 +++++++++++++++++++++++++++++++++++++ sql/planner/projection.go | 22 +++++++++++-- sql/query/select_test.go | 42 +++++++++++++++++++++++++ sql/scanner/scanner_test.go | 1 + sql/scanner/token.go | 2 ++ 6 files changed, 146 insertions(+), 3 deletions(-) create mode 100644 sql/planner/hash_set.go diff --git a/sql/parser/select.go b/sql/parser/select.go index 6a760fc3b..bc11311f4 100644 --- a/sql/parser/select.go +++ b/sql/parser/select.go @@ -14,6 +14,11 @@ func (p *Parser) parseSelectStatement() (*planner.Tree, error) { var cfg selectConfig var err error + cfg.Distinct, err = p.parseDistinct() + if err != nil { + return nil, err + } + // Parse path list or query.Wildcard cfg.ProjectionExprs, err = p.parseResultFields() if err != nil { @@ -122,6 +127,15 @@ func (p *Parser) parseResultField() (planner.ProjectedField, error) { return rf, nil } +func (p *Parser) parseDistinct() (bool, error) { + if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.DISTINCT { + p.Unscan() + return false, nil + } + + return true, nil +} + func (p *Parser) parseFrom() (string, bool, error) { if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.FROM { p.Unscan() @@ -208,6 +222,7 @@ func (p *Parser) parseOffset() (expr.Expr, error) { // SelectConfig holds SELECT configuration. type selectConfig struct { TableName string + Distinct bool WhereExpr expr.Expr GroupByExpr expr.Expr OrderBy expr.Path @@ -233,7 +248,11 @@ func (cfg selectConfig) ToTree() (*planner.Tree, error) { n = planner.NewGroupingNode(n, cfg.GroupByExpr) } - n = planner.NewProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) + if cfg.Distinct { + n = planner.NewDistinctProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) + } else { + n = planner.NewProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) + } if cfg.OrderBy != nil { n = planner.NewSortNode(n, cfg.OrderBy, cfg.OrderByDirection) diff --git a/sql/planner/hash_set.go b/sql/planner/hash_set.go new file mode 100644 index 000000000..2adbd090f --- /dev/null +++ b/sql/planner/hash_set.go @@ -0,0 +1,61 @@ +package planner + +import ( + "hash" + "hash/maphash" + + "github.com/genjidb/genji/document" + "github.com/genjidb/genji/key" +) + +type documentHashSet struct { + hash hash.Hash64 + set map[uint64]struct{} +} + +func newDocumentHashSet(hash hash.Hash64) *documentHashSet { + if hash == nil { + hash = &maphash.Hash{} + } + + return &documentHashSet{ + hash: hash, + set: map[uint64]struct{}{}, + } +} + +func (s documentHashSet) generateKey(d document.Document) (uint64, error) { + defer s.hash.Reset() + + err := d.Iterate(func(field string, value document.Value) error { + var buf []byte + buf, err := key.AppendValue(buf, value) + if err != nil { + return err + } + + _, err = s.hash.Write(buf) + if err != nil { + return err + } + + return nil + }) + + return s.hash.Sum64(), err +} + +func (s documentHashSet) Filter(d document.Document) (bool, error) { + k, err := s.generateKey(d) + if err != nil { + return false, err + } + + _, ok := s.set[k] + if ok { + return false, nil + } + + s.set[k] = struct{}{} + return true, nil +} diff --git a/sql/planner/projection.go b/sql/planner/projection.go index 77ef8741f..91e15d4b0 100644 --- a/sql/planner/projection.go +++ b/sql/planner/projection.go @@ -19,8 +19,9 @@ type ProjectionNode struct { Expressions []ProjectedField tableName string - info *database.TableInfo - tx *database.Transaction + info *database.TableInfo + tx *database.Transaction + distinct bool } var _ operationNode = (*ProjectionNode)(nil) @@ -37,6 +38,18 @@ func NewProjectionNode(n Node, expressions []ProjectedField, tableName string) N } } +func NewDistinctProjectionNode(n Node, expressions []ProjectedField, tableName string) Node { + return &ProjectionNode{ + node: node{ + op: Projection, + left: n, + }, + Expressions: expressions, + tableName: tableName, + distinct: true, + } +} + // Bind database resources to this node. func (n *ProjectionNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { n.tx = tx @@ -100,6 +113,11 @@ func (n *ProjectionNode) toStream(st document.Stream) (document.Stream, error) { return &dm, nil }) + + if n.distinct { + set := newDocumentHashSet(nil) // use default hashing algorithm + st = st.Filter(set.Filter) + } } return st, nil diff --git a/sql/query/select_test.go b/sql/query/select_test.go index 993e904a7..6e14983ef 100644 --- a/sql/query/select_test.go +++ b/sql/query/select_test.go @@ -33,6 +33,8 @@ func TestSelectStmt(t *testing.T) { {"No table, wildcard", "SELECT *", true, ``, nil}, {"No table, document", "SELECT {a: 1, b: 2 + 1}", false, `[{"{a: 1, b: 2 + 1}":{"a":1,"b":3}}]`, nil}, {"No cond", "SELECT * FROM test", false, `[{"k":1,"color":"red","size":10,"shape":"square"},{"k":2,"color":"blue","size":10,"weight":100},{"k":3,"height":100,"weight":200}]`, nil}, + {"With DISTINCT", "SELECT DISTINCT * FROM test", false, `[{"k":1,"color":"red","size":10,"shape":"square"},{"k":2,"color":"blue","size":10,"weight":100},{"k":3,"height":100,"weight":200}]`, nil}, + {"With DISTINCT and expr", "SELECT DISTINCT 'a' FROM test", false, `[{"'a'":"a"}]`, nil}, {"Multiple wildcards cond", "SELECT *, *, color FROM test", false, `[{"k":1,"color":"red","size":10,"shape":"square","k":1,"color":"red","size":10,"shape":"square","color":"red"},{"k":2,"color":"blue","size":10,"weight":100,"k":2,"color":"blue","size":10,"weight":100,"color":"blue"},{"k":3,"height":100,"weight":200,"k":3,"height":100,"weight":200,"color":null}]`, nil}, {"With fields", "SELECT color, shape FROM test", false, `[{"color":"red","shape":"square"},{"color":"blue","shape":null},{"color":null,"shape":null}]`, nil}, {"With expr fields", "SELECT color, color != 'red' AS notred FROM test", false, `[{"color":"red","notred":false},{"color":"blue","notred":true},{"color":null,"notred":null}]`, nil}, @@ -222,3 +224,43 @@ func TestSelectStmt(t *testing.T) { require.JSONEq(t, `[{"foo": true},{"foo": 1}, {"foo": 2},{"foo": "hello"}]`, buf.String()) }) } + +func TestDistinct(t *testing.T) { + ctx := context.Background() + + db, err := genji.Open(":memory:") + require.NoError(t, err) + defer db.Close() + + err = db.Exec(ctx, "CREATE TABLE test(a integer)") + require.NoError(t, err) + + for i := 0; i < 100; i++ { + err = db.Exec(ctx, `INSERT INTO test VALUES {a: ?, b: ?, c: {a: ?, b: ?}}`, i%10, i, i%10, i%10+1) + require.NoError(t, err) + } + + tests := []struct { + name string + query string + expectedCount int + }{ + {`non-unique`, `SELECT DISTINCT a FROM test`, 10}, + {`non-unique-documents`, `SELECT DISTINCT c FROM test`, 10}, + {`unique`, `SELECT DISTINCT b FROM test`, 100}, + {`wildcard`, `SELECT DISTINCT * FROM test`, 100}, + {`literal`, `SELECT DISTINCT 'a' FROM test`, 1}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + q, err := db.Query(ctx, test.query) + require.NoError(t, err) + defer q.Close() + + c, err := q.Count() + require.NoError(t, err) + require.Equal(t, test.expectedCount, c) + }) + } +} diff --git a/sql/scanner/scanner_test.go b/sql/scanner/scanner_test.go index 5e661cfe3..18eadacf1 100644 --- a/sql/scanner/scanner_test.go +++ b/sql/scanner/scanner_test.go @@ -130,6 +130,7 @@ func TestScanner_Scan(t *testing.T) { {s: `DEFAULT`, tok: scanner.DEFAULT, raw: `DEFAULT`}, {s: `DELETE`, tok: scanner.DELETE, raw: `DELETE`}, {s: `DESC`, tok: scanner.DESC, raw: `DESC`}, + {s: `DISTINCT`, tok: scanner.DISTINCT, raw: `DISTINCT`}, {s: `DROP`, tok: scanner.DROP, raw: `DROP`}, {s: `FIELD`, tok: scanner.FIELD, raw: `FIELD`}, {s: `FROM`, tok: scanner.FROM, raw: `FROM`}, diff --git a/sql/scanner/token.go b/sql/scanner/token.go index 695733d1f..dee2cac73 100644 --- a/sql/scanner/token.go +++ b/sql/scanner/token.go @@ -85,6 +85,7 @@ const ( DEFAULT DELETE DESC + DISTINCT DROP EXISTS EXPLAIN @@ -208,6 +209,7 @@ var tokens = [...]string{ DEFAULT: "DEFAULT", DELETE: "DELETE", DESC: "DESC", + DISTINCT: "DISTINCT", DROP: "DROP", EXISTS: "EXISTS", EXPLAIN: "EXPLAIN", From ee88f44904bc0b33d923c6184f0b46cd73d349b6 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Fri, 23 Oct 2020 21:53:39 +0300 Subject: [PATCH 02/10] fix: move SELECT DISTINCT query logic to dedupNode --- sql/parser/select.go | 6 +++--- sql/planner/distinct.go | 33 +++++++++++++++++++++++++++++++++ sql/planner/hash_set.go | 6 +++++- sql/planner/projection.go | 22 ++-------------------- sql/planner/tree.go | 3 +++ 5 files changed, 46 insertions(+), 24 deletions(-) create mode 100644 sql/planner/distinct.go diff --git a/sql/parser/select.go b/sql/parser/select.go index bc11311f4..149f1bb75 100644 --- a/sql/parser/select.go +++ b/sql/parser/select.go @@ -248,10 +248,10 @@ func (cfg selectConfig) ToTree() (*planner.Tree, error) { n = planner.NewGroupingNode(n, cfg.GroupByExpr) } + n = planner.NewProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) + if cfg.Distinct { - n = planner.NewDistinctProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) - } else { - n = planner.NewProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) + n = planner.NewDistinctNode(n) } if cfg.OrderBy != nil { diff --git a/sql/planner/distinct.go b/sql/planner/distinct.go new file mode 100644 index 000000000..32f881556 --- /dev/null +++ b/sql/planner/distinct.go @@ -0,0 +1,33 @@ +package planner + +import ( + "github.com/genjidb/genji/database" + "github.com/genjidb/genji/document" + "github.com/genjidb/genji/sql/query/expr" +) + +type dedupNode struct { + node +} + +func NewDistinctNode(n Node) Node { + return &dedupNode{ + node{ + op: Dedup, + left: n, + }, + } +} + +func (n *dedupNode) Bind(tx *database.Transaction, params []expr.Param) error { + return nil +} + +func (n *dedupNode) toStream(st document.Stream) (document.Stream, error) { + set := newDocumentHashSet(nil) // use default hashing algorithm + return st.Filter(set.Filter), nil +} + +func (n *dedupNode) String() string { + return "Dedup()" +} diff --git a/sql/planner/hash_set.go b/sql/planner/hash_set.go index 2adbd090f..6485a6779 100644 --- a/sql/planner/hash_set.go +++ b/sql/planner/hash_set.go @@ -39,10 +39,14 @@ func (s documentHashSet) generateKey(d document.Document) (uint64, error) { return err } + buf = buf[0:] return nil }) + if err != nil { + return 0, err + } - return s.hash.Sum64(), err + return s.hash.Sum64(), nil } func (s documentHashSet) Filter(d document.Document) (bool, error) { diff --git a/sql/planner/projection.go b/sql/planner/projection.go index 91e15d4b0..77ef8741f 100644 --- a/sql/planner/projection.go +++ b/sql/planner/projection.go @@ -19,9 +19,8 @@ type ProjectionNode struct { Expressions []ProjectedField tableName string - info *database.TableInfo - tx *database.Transaction - distinct bool + info *database.TableInfo + tx *database.Transaction } var _ operationNode = (*ProjectionNode)(nil) @@ -38,18 +37,6 @@ func NewProjectionNode(n Node, expressions []ProjectedField, tableName string) N } } -func NewDistinctProjectionNode(n Node, expressions []ProjectedField, tableName string) Node { - return &ProjectionNode{ - node: node{ - op: Projection, - left: n, - }, - Expressions: expressions, - tableName: tableName, - distinct: true, - } -} - // Bind database resources to this node. func (n *ProjectionNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { n.tx = tx @@ -113,11 +100,6 @@ func (n *ProjectionNode) toStream(st document.Stream) (document.Stream, error) { return &dm, nil }) - - if n.distinct { - set := newDocumentHashSet(nil) // use default hashing algorithm - st = st.Filter(set.Filter) - } } return st, nil diff --git a/sql/planner/tree.go b/sql/planner/tree.go index 79764f685..800998895 100644 --- a/sql/planner/tree.go +++ b/sql/planner/tree.go @@ -43,6 +43,9 @@ const ( // Unset is an operation that removes a value at a given path from every document of a stream Unset // Group is an operation that groups documents based on a given path. + _ + // Dedup + Dedup ) // A Tree describes the flow of a stream of documents. From 72d0d39e96a3f832f8ecc6f0a8d4cf813ff9f5e5 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Sat, 24 Oct 2020 00:33:06 +0300 Subject: [PATCH 03/10] test: add some SELECT DISTINCT tests --- sql/query/select_test.go | 90 ++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 26 deletions(-) diff --git a/sql/query/select_test.go b/sql/query/select_test.go index 6e14983ef..6218f61f2 100644 --- a/sql/query/select_test.go +++ b/sql/query/select_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "database/sql" + "strconv" "testing" "github.com/genjidb/genji" @@ -228,39 +229,76 @@ func TestSelectStmt(t *testing.T) { func TestDistinct(t *testing.T) { ctx := context.Background() - db, err := genji.Open(":memory:") - require.NoError(t, err) - defer db.Close() - - err = db.Exec(ctx, "CREATE TABLE test(a integer)") - require.NoError(t, err) - - for i := 0; i < 100; i++ { - err = db.Exec(ctx, `INSERT INTO test VALUES {a: ?, b: ?, c: {a: ?, b: ?}}`, i%10, i, i%10, i%10+1) - require.NoError(t, err) - } - - tests := []struct { + types := []struct { name string - query string - expectedCount int + generateValue func(i, notUniqueCount int) (unique interface{}, notunique interface{}) }{ - {`non-unique`, `SELECT DISTINCT a FROM test`, 10}, - {`non-unique-documents`, `SELECT DISTINCT c FROM test`, 10}, - {`unique`, `SELECT DISTINCT b FROM test`, 100}, - {`wildcard`, `SELECT DISTINCT * FROM test`, 100}, - {`literal`, `SELECT DISTINCT 'a' FROM test`, 1}, + {`integer`, func(i, notUniqueCount int) (unique interface{}, notunique interface{}) { + return i, i % notUniqueCount + }}, + {`double`, func(i, notUniqueCount int) (unique interface{}, notunique interface{}) { + return float64(i), float64(i % notUniqueCount) + }}, + {`text`, func(i, notUniqueCount int) (unique interface{}, notunique interface{}) { + return strconv.Itoa(i), strconv.Itoa(i % notUniqueCount) + }}, + {`array`, func(i, notUniqueCount int) (unique interface{}, notunique interface{}) { + return []interface{}{i}, []interface{}{i % notUniqueCount} + }}, } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - q, err := db.Query(ctx, test.query) + for _, typ := range types { + total := 100 + notUnique := total / 10 + + t.Run(typ.name, func(t *testing.T) { + db, err := genji.Open(":memory:") + require.NoError(t, err) + defer db.Close() + + tx, err := db.Begin(true) + require.NoError(t, err) + defer tx.Rollback() + + err = tx.Exec(ctx, "CREATE TABLE test(a "+typ.name+" PRIMARY KEY, b "+typ.name+", doc DOCUMENT, nullable "+typ.name+");") require.NoError(t, err) - defer q.Close() - c, err := q.Count() + err = tx.Exec(ctx, "CREATE UNIQUE INDEX test_doc_index ON test(doc);") require.NoError(t, err) - require.Equal(t, test.expectedCount, c) + + for i := 0; i < total; i++ { + unique, nonunique := typ.generateValue(i, notUnique) + err = tx.Exec(ctx, `INSERT INTO test VALUES {a: ?, b: ?, doc: {a: ?, b: ?}, nullable: null}`, unique, nonunique, unique, nonunique) + require.NoError(t, err) + } + err = tx.Commit() + require.NoError(t, err) + + tests := []struct { + name string + query string + expectedCount int + }{ + {`unique`, `SELECT DISTINCT a FROM test`, total}, + {`non-unique`, `SELECT DISTINCT b FROM test`, notUnique}, + {`documents`, `SELECT DISTINCT doc FROM test`, total}, + {`null`, `SELECT DISTINCT nullable FROM test`, 1}, + {`wildcard`, `SELECT DISTINCT * FROM test`, total}, + {`literal`, `SELECT DISTINCT 'a' FROM test`, 1}, + {`pk()`, `SELECT DISTINCT pk() FROM test`, total}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + q, err := db.Query(ctx, test.query) + require.NoError(t, err) + defer q.Close() + + c, err := q.Count() + require.NoError(t, err) + require.Equal(t, test.expectedCount, c) + }) + } }) } } From 5c9956d37c66c9b2919b08ffcd15cb2f495cc665 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Sat, 24 Oct 2020 00:34:38 +0300 Subject: [PATCH 04/10] feat: add RemoveUnnecessarySelectionNodesRule --- sql/planner/optimizer.go | 73 ++++++++++++++++++++++ sql/planner/optimizer_test.go | 114 ++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) diff --git a/sql/planner/optimizer.go b/sql/planner/optimizer.go index 799abe695..d2de8c44b 100644 --- a/sql/planner/optimizer.go +++ b/sql/planner/optimizer.go @@ -11,6 +11,7 @@ var optimizerRules = []func(t *Tree) (*Tree, error){ SplitANDConditionRule, PrecalculateExprRule, RemoveUnnecessarySelectionNodesRule, + RemoveUnnecessaryDedupNodeRule, UseIndexBasedOnSelectionNodeRule, } @@ -248,6 +249,78 @@ func RemoveUnnecessarySelectionNodesRule(t *Tree) (*Tree, error) { return t, nil } +// RemoveUnnecessaryDedupNodeRule removes any Dedup nodes +// where projection is already unique. +func RemoveUnnecessaryDedupNodeRule(t *Tree) (*Tree, error) { + n := t.Root + var prev Node + + for n != nil { + if n.Operation() == Dedup { + d, ok := n.(*dedupNode) + if !ok { + continue + } + + pn, ok := d.left.(*ProjectionNode) + if !ok { + continue + } + + table, err := pn.tx.GetTable(pn.tableName) + if err != nil { + return nil, err + } + + indexes, err := table.Indexes() + if err != nil { + return nil, err + } + + // if the projection is unique, we remove the node from the tree + if isProjectionUnique(indexes, pn) { + if prev != nil { + prev.SetLeft(n.Left()) + } else { + t.Root = n.Left() + } + } + } + + prev = n + n = n.Left() + } + + return t, nil +} + +func isProjectionUnique(indexes map[string]database.Index, pn *ProjectionNode) bool { + pk := pn.info.GetPrimaryKey() + for _, field := range pn.Expressions { + e, ok := field.(ProjectedExpr) + if !ok { + return false + } + + switch v := e.Expr.(type) { + case expr.FieldSelector: + if pk != nil && pk.Path.IsEqual(document.ValuePath(v)) { + continue + } + + if idx, ok := indexes[v.String()]; ok && idx.Unique { + continue + } + case expr.PKFunc: + continue + } + + return false // if one field is not unique, so projection is not unique too. + } + + return true +} + // UseIndexBasedOnSelectionNodeRule scans the tree for the first selection node whose condition is an // operator that satisfies the following criterias: // - implements the indexIteratorOperator interface diff --git a/sql/planner/optimizer_test.go b/sql/planner/optimizer_test.go index b0d1001dc..71b6662ea 100644 --- a/sql/planner/optimizer_test.go +++ b/sql/planner/optimizer_test.go @@ -206,6 +206,120 @@ func TestRemoveUnnecessarySelectionNodesRule(t *testing.T) { } } +func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { + tests := []struct { + name string + root, expected planner.Node + }{ + { + "non-unique key", + planner.NewDistinctNode( + planner.NewProjectionNode( + planner.NewTableInputNode("foo"), + []planner.ProjectedField{planner.ProjectedExpr{ + Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "b"}}, + ExprName: "b", + }}, + "foo", + )), + nil, + }, + { + "primary key", + planner.NewDistinctNode( + planner.NewProjectionNode( + planner.NewTableInputNode("foo"), + []planner.ProjectedField{planner.ProjectedExpr{ + Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "a"}}, + ExprName: "a", + }}, + "foo", + )), + planner.NewProjectionNode( + planner.NewTableInputNode("foo"), + []planner.ProjectedField{planner.ProjectedExpr{ + Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "a"}}, + ExprName: "a", + }}, + "foo", + ), + }, + { + "unique index", + planner.NewDistinctNode( + planner.NewProjectionNode( + planner.NewTableInputNode("foo"), + []planner.ProjectedField{planner.ProjectedExpr{ + Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "c"}}, + ExprName: "c", + }}, + "foo", + )), + planner.NewProjectionNode( + planner.NewTableInputNode("foo"), + []planner.ProjectedField{planner.ProjectedExpr{ + Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "c"}}, + ExprName: "c", + }}, + "foo", + ), + }, + { + "pk() function", + planner.NewDistinctNode( + planner.NewProjectionNode( + planner.NewTableInputNode("foo"), + []planner.ProjectedField{planner.ProjectedExpr{ + Expr: expr.PKFunc{}, + ExprName: "pk()", + }}, + "foo", + )), + planner.NewProjectionNode( + planner.NewTableInputNode("foo"), + []planner.ProjectedField{planner.ProjectedExpr{ + Expr: expr.PKFunc{}, + ExprName: "pk()", + }}, + "foo", + ), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + db, err := genji.Open(":memory:") + require.NoError(t, err) + defer db.Close() + + tx, err := db.Begin(true) + require.NoError(t, err) + defer tx.Rollback() + + err = tx.Exec(context.Background(), ` + CREATE TABLE foo(a integer PRIMARY KEY, b integer, c integer); + CREATE UNIQUE INDEX idx_foo_idx ON foo(c); + INSERT INTO foo (a, b, c) VALUES + (1, 1, 1), + (2, 2, 2), + (3, 3, 3) + `) + require.NoError(t, err) + + err = planner.Bind(planner.NewTree(test.root), tx.Transaction, nil) + require.NoError(t, err) + + res, err := planner.RemoveUnnecessaryDedupNodeRule(planner.NewTree(test.root)) + require.NoError(t, err) + if test.expected != nil { + require.Equal(t, planner.NewTree(test.expected).String(), res.String()) + } else { + require.Equal(t, test.root, res.Root) + } + }) + } +} + func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { tests := []struct { name string From 4993f2ab854a6ead8b9ffab4f4cea9c1cb895335 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Sat, 24 Oct 2020 03:13:18 +0300 Subject: [PATCH 05/10] fix: resolve comments --- sql/parser/select.go | 2 +- sql/planner/distinct.go | 18 ++++++++++++++---- sql/planner/hash_set.go | 1 - sql/planner/optimizer.go | 12 +----------- sql/planner/optimizer_test.go | 8 ++++---- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/sql/parser/select.go b/sql/parser/select.go index 149f1bb75..7c20fb8f7 100644 --- a/sql/parser/select.go +++ b/sql/parser/select.go @@ -251,7 +251,7 @@ func (cfg selectConfig) ToTree() (*planner.Tree, error) { n = planner.NewProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) if cfg.Distinct { - n = planner.NewDistinctNode(n) + n = planner.NewDistinctNode(n, cfg.TableName) } if cfg.OrderBy != nil { diff --git a/sql/planner/distinct.go b/sql/planner/distinct.go index 32f881556..93bf144fd 100644 --- a/sql/planner/distinct.go +++ b/sql/planner/distinct.go @@ -8,19 +8,29 @@ import ( type dedupNode struct { node + + tableName string + indexes map[string]database.Index } -func NewDistinctNode(n Node) Node { +func NewDistinctNode(n Node, tableName string) Node { return &dedupNode{ - node{ + node: node{ op: Dedup, left: n, }, + tableName: tableName, } } -func (n *dedupNode) Bind(tx *database.Transaction, params []expr.Param) error { - return nil +func (n *dedupNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { + table, err := tx.GetTable(n.tableName) + if err != nil { + return + } + + n.indexes, err = table.Indexes() + return } func (n *dedupNode) toStream(st document.Stream) (document.Stream, error) { diff --git a/sql/planner/hash_set.go b/sql/planner/hash_set.go index 6485a6779..bddf54e24 100644 --- a/sql/planner/hash_set.go +++ b/sql/planner/hash_set.go @@ -39,7 +39,6 @@ func (s documentHashSet) generateKey(d document.Document) (uint64, error) { return err } - buf = buf[0:] return nil }) if err != nil { diff --git a/sql/planner/optimizer.go b/sql/planner/optimizer.go index d2de8c44b..2b7c1eaaf 100644 --- a/sql/planner/optimizer.go +++ b/sql/planner/optimizer.go @@ -267,18 +267,8 @@ func RemoveUnnecessaryDedupNodeRule(t *Tree) (*Tree, error) { continue } - table, err := pn.tx.GetTable(pn.tableName) - if err != nil { - return nil, err - } - - indexes, err := table.Indexes() - if err != nil { - return nil, err - } - // if the projection is unique, we remove the node from the tree - if isProjectionUnique(indexes, pn) { + if isProjectionUnique(d.indexes, pn) { if prev != nil { prev.SetLeft(n.Left()) } else { diff --git a/sql/planner/optimizer_test.go b/sql/planner/optimizer_test.go index 71b6662ea..884f47bab 100644 --- a/sql/planner/optimizer_test.go +++ b/sql/planner/optimizer_test.go @@ -221,7 +221,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { ExprName: "b", }}, "foo", - )), + ), "foo"), nil, }, { @@ -234,7 +234,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { ExprName: "a", }}, "foo", - )), + ), "foo"), planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ @@ -254,7 +254,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { ExprName: "c", }}, "foo", - )), + ), "foo"), planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ @@ -274,7 +274,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { ExprName: "pk()", }}, "foo", - )), + ), "foo"), planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ From 2323fb45b1d9616c6c06381eab5af836a82aae7d Mon Sep 17 00:00:00 2001 From: tdakkota Date: Sun, 25 Oct 2020 04:11:42 +0300 Subject: [PATCH 06/10] fix: resolve comments --- sql/parser/select.go | 2 +- sql/planner/distinct.go | 2 +- sql/planner/hash_set.go | 3 +-- sql/planner/optimizer_test.go | 8 ++++---- sql/planner/tree.go | 2 +- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sql/parser/select.go b/sql/parser/select.go index 7c20fb8f7..3315f13a6 100644 --- a/sql/parser/select.go +++ b/sql/parser/select.go @@ -251,7 +251,7 @@ func (cfg selectConfig) ToTree() (*planner.Tree, error) { n = planner.NewProjectionNode(n, cfg.ProjectionExprs, cfg.TableName) if cfg.Distinct { - n = planner.NewDistinctNode(n, cfg.TableName) + n = planner.NewDedupNode(n, cfg.TableName) } if cfg.OrderBy != nil { diff --git a/sql/planner/distinct.go b/sql/planner/distinct.go index 93bf144fd..3d4b8b180 100644 --- a/sql/planner/distinct.go +++ b/sql/planner/distinct.go @@ -13,7 +13,7 @@ type dedupNode struct { indexes map[string]database.Index } -func NewDistinctNode(n Node, tableName string) Node { +func NewDedupNode(n Node, tableName string) Node { return &dedupNode{ node: node{ op: Dedup, diff --git a/sql/planner/hash_set.go b/sql/planner/hash_set.go index bddf54e24..a5e2a63ca 100644 --- a/sql/planner/hash_set.go +++ b/sql/planner/hash_set.go @@ -28,8 +28,7 @@ func (s documentHashSet) generateKey(d document.Document) (uint64, error) { defer s.hash.Reset() err := d.Iterate(func(field string, value document.Value) error { - var buf []byte - buf, err := key.AppendValue(buf, value) + buf, err := key.AppendValue(nil, value) if err != nil { return err } diff --git a/sql/planner/optimizer_test.go b/sql/planner/optimizer_test.go index 884f47bab..f85dd9c74 100644 --- a/sql/planner/optimizer_test.go +++ b/sql/planner/optimizer_test.go @@ -213,7 +213,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { }{ { "non-unique key", - planner.NewDistinctNode( + planner.NewDedupNode( planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ @@ -226,7 +226,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { }, { "primary key", - planner.NewDistinctNode( + planner.NewDedupNode( planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ @@ -246,7 +246,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { }, { "unique index", - planner.NewDistinctNode( + planner.NewDedupNode( planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ @@ -266,7 +266,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { }, { "pk() function", - planner.NewDistinctNode( + planner.NewDedupNode( planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ diff --git a/sql/planner/tree.go b/sql/planner/tree.go index 800998895..85705fa9e 100644 --- a/sql/planner/tree.go +++ b/sql/planner/tree.go @@ -44,7 +44,7 @@ const ( Unset // Group is an operation that groups documents based on a given path. _ - // Dedup + // Dedup is an operation that removes duplicate documents from a stream Dedup ) From 350a1450c83aa68a29b2619334f85f44b90af08f Mon Sep 17 00:00:00 2001 From: tdakkota Date: Mon, 26 Oct 2020 09:00:48 +0300 Subject: [PATCH 07/10] feat: add IterateInOrder function --- document/document.go | 28 ++++++++++++++++++++++++++++ document/document_test.go | 16 ++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/document/document.go b/document/document.go index 60ab26466..810862f4b 100644 --- a/document/document.go +++ b/document/document.go @@ -75,6 +75,34 @@ func Fields(d Document) ([]string, error) { return fields, nil } +func IterateInOrder(d Document, fn func(string, Value) error) error { + type pair struct { + field string + value Value + } + var pairs []pair + + err := d.Iterate(func(field string, value Value) error { + pairs = append(pairs, pair{field, value}) + return nil + }) + if err != nil { + return err + } + + sort.Slice(pairs, func(i, j int) bool { + return strings.Compare(pairs[i].field, pairs[j].field) == -1 + }) + + for _, p := range pairs { + err := fn(p.field, p.value) + if err != nil { + return err + } + } + return nil +} + // FieldBuffer stores a group of fields in memory. It implements the Document interface. type FieldBuffer struct { fields []fieldValue diff --git a/document/document_test.go b/document/document_test.go index fd6b098ff..6ce2a71aa 100644 --- a/document/document_test.go +++ b/document/document_test.go @@ -606,3 +606,19 @@ func BenchmarkDocumentIterate(b *testing.B) { } }) } + +func TestIterateInOrder(t *testing.T) { + fb := new(document.FieldBuffer) + + fb.Add("zyx", document.NewIntegerValue(3)) + fb.Add("abc", document.NewIntegerValue(1)) + fb.Add("cba", document.NewIntegerValue(2)) + + i := int64(0) + err := document.IterateInOrder(fb, func(_ string, value document.Value) error { + i++ + require.Equal(t, i, value.V.(int64)) + return nil + }) + require.NoError(t, err) +} From 4830ce8480f0f9f8a7a0da5f92ff44ce2482a82a Mon Sep 17 00:00:00 2001 From: tdakkota Date: Mon, 26 Oct 2020 09:02:08 +0300 Subject: [PATCH 08/10] fix: make document hash function iterating in order --- sql/planner/hash_set.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sql/planner/hash_set.go b/sql/planner/hash_set.go index a5e2a63ca..de01b1daa 100644 --- a/sql/planner/hash_set.go +++ b/sql/planner/hash_set.go @@ -27,18 +27,14 @@ func newDocumentHashSet(hash hash.Hash64) *documentHashSet { func (s documentHashSet) generateKey(d document.Document) (uint64, error) { defer s.hash.Reset() - err := d.Iterate(func(field string, value document.Value) error { + err := document.IterateInOrder(d, func(field string, value document.Value) error { buf, err := key.AppendValue(nil, value) if err != nil { return err } _, err = s.hash.Write(buf) - if err != nil { - return err - } - - return nil + return err }) if err != nil { return 0, err From 0a8c1a2c801f3b379485e62b861b01febe85a1a3 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Mon, 26 Oct 2020 09:23:22 +0300 Subject: [PATCH 09/10] rebase --- sql/planner/optimizer.go | 4 ++-- sql/planner/optimizer_test.go | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/planner/optimizer.go b/sql/planner/optimizer.go index 2b7c1eaaf..45ba776c7 100644 --- a/sql/planner/optimizer.go +++ b/sql/planner/optimizer.go @@ -293,8 +293,8 @@ func isProjectionUnique(indexes map[string]database.Index, pn *ProjectionNode) b } switch v := e.Expr.(type) { - case expr.FieldSelector: - if pk != nil && pk.Path.IsEqual(document.ValuePath(v)) { + case expr.Path: + if pk != nil && pk.Path.IsEqual(document.Path(v)) { continue } diff --git a/sql/planner/optimizer_test.go b/sql/planner/optimizer_test.go index f85dd9c74..58e909de9 100644 --- a/sql/planner/optimizer_test.go +++ b/sql/planner/optimizer_test.go @@ -217,7 +217,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "b"}}, + Expr: expr.Path{document.PathFragment{FieldName: "b"}}, ExprName: "b", }}, "foo", @@ -230,7 +230,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "a"}}, + Expr: expr.Path{document.PathFragment{FieldName: "a"}}, ExprName: "a", }}, "foo", @@ -238,7 +238,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "a"}}, + Expr: expr.Path{document.PathFragment{FieldName: "a"}}, ExprName: "a", }}, "foo", @@ -250,7 +250,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "c"}}, + Expr: expr.Path{document.PathFragment{FieldName: "c"}}, ExprName: "c", }}, "foo", @@ -258,7 +258,7 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { planner.NewProjectionNode( planner.NewTableInputNode("foo"), []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.FieldSelector{document.ValuePathFragment{FieldName: "c"}}, + Expr: expr.Path{document.PathFragment{FieldName: "c"}}, ExprName: "c", }}, "foo", From c8f7bc239e62a8a602f842b46018e939407b9aaf Mon Sep 17 00:00:00 2001 From: tdakkota Date: Mon, 26 Oct 2020 12:20:56 +0300 Subject: [PATCH 10/10] fix: documentMask.GetByField function, remove IterateInOrder function --- document/document.go | 28 ---------------------------- document/document_test.go | 16 ---------------- sql/planner/hash_set.go | 21 +++++++++++++++------ sql/planner/projection.go | 27 ++++++++++++++++++++++++--- 4 files changed, 39 insertions(+), 53 deletions(-) diff --git a/document/document.go b/document/document.go index 810862f4b..60ab26466 100644 --- a/document/document.go +++ b/document/document.go @@ -75,34 +75,6 @@ func Fields(d Document) ([]string, error) { return fields, nil } -func IterateInOrder(d Document, fn func(string, Value) error) error { - type pair struct { - field string - value Value - } - var pairs []pair - - err := d.Iterate(func(field string, value Value) error { - pairs = append(pairs, pair{field, value}) - return nil - }) - if err != nil { - return err - } - - sort.Slice(pairs, func(i, j int) bool { - return strings.Compare(pairs[i].field, pairs[j].field) == -1 - }) - - for _, p := range pairs { - err := fn(p.field, p.value) - if err != nil { - return err - } - } - return nil -} - // FieldBuffer stores a group of fields in memory. It implements the Document interface. type FieldBuffer struct { fields []fieldValue diff --git a/document/document_test.go b/document/document_test.go index 6ce2a71aa..fd6b098ff 100644 --- a/document/document_test.go +++ b/document/document_test.go @@ -606,19 +606,3 @@ func BenchmarkDocumentIterate(b *testing.B) { } }) } - -func TestIterateInOrder(t *testing.T) { - fb := new(document.FieldBuffer) - - fb.Add("zyx", document.NewIntegerValue(3)) - fb.Add("abc", document.NewIntegerValue(1)) - fb.Add("cba", document.NewIntegerValue(2)) - - i := int64(0) - err := document.IterateInOrder(fb, func(_ string, value document.Value) error { - i++ - require.Equal(t, i, value.V.(int64)) - return nil - }) - require.NoError(t, err) -} diff --git a/sql/planner/hash_set.go b/sql/planner/hash_set.go index de01b1daa..ccf133fb1 100644 --- a/sql/planner/hash_set.go +++ b/sql/planner/hash_set.go @@ -27,17 +27,26 @@ func newDocumentHashSet(hash hash.Hash64) *documentHashSet { func (s documentHashSet) generateKey(d document.Document) (uint64, error) { defer s.hash.Reset() - err := document.IterateInOrder(d, func(field string, value document.Value) error { + fields, err := document.Fields(d) + if err != nil { + return 0, err + } + + for _, field := range fields { + value, err := d.GetByField(field) + if err != nil { + return 0, err + } + buf, err := key.AppendValue(nil, value) if err != nil { - return err + return 0, err } _, err = s.hash.Write(buf) - return err - }) - if err != nil { - return 0, err + if err != nil { + return 0, err + } } return s.hash.Sum64(), nil diff --git a/sql/planner/projection.go b/sql/planner/projection.go index 77ef8741f..ecede40aa 100644 --- a/sql/planner/projection.go +++ b/sql/planner/projection.go @@ -126,14 +126,35 @@ type documentMask struct { var _ document.Document = documentMask{} -func (r documentMask) GetByField(field string) (document.Value, error) { +func (r documentMask) GetByField(field string) (v document.Value, err error) { for _, rf := range r.resultFields { if rf.Name() == field || rf.Name() == "*" { - return r.d.GetByField(field) + v, err = r.d.GetByField(field) + if err != document.ErrFieldNotFound { + return + } + + stack := expr.EvalStack{ + Document: r.d, + Info: r.info, + } + var found bool + err = rf.Iterate(stack, func(f string, value document.Value) error { + if f == field { + v = value + found = true + } + return nil + }) + + if found || err != nil { + return + } } } - return document.Value{}, document.ErrFieldNotFound + err = document.ErrFieldNotFound + return } func (r documentMask) Iterate(fn func(field string, value document.Value) error) error {