Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SELECT DISTINCT queries support #264

Merged
merged 10 commits into from
Oct 31, 2020
28 changes: 28 additions & 0 deletions document/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,34 @@ func Fields(d Document) ([]string, error) {
return fields, nil
}

func IterateInOrder(d Document, fn func(string, Value) error) error {
tdakkota marked this conversation as resolved.
Show resolved Hide resolved
type pair struct {
field string
value Value
}
var pairs []pair

err := d.Iterate(func(field string, value Value) error {
pairs = append(pairs, pair{field, value})
return nil
})
if err != nil {
return err
}

sort.Slice(pairs, func(i, j int) bool {
return strings.Compare(pairs[i].field, pairs[j].field) == -1
})

for _, p := range pairs {
err := fn(p.field, p.value)
if err != nil {
return err
}
}
return nil
}

// FieldBuffer stores a group of fields in memory. It implements the Document interface.
type FieldBuffer struct {
fields []fieldValue
Expand Down
16 changes: 16 additions & 0 deletions document/document_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -606,3 +606,19 @@ func BenchmarkDocumentIterate(b *testing.B) {
}
})
}

func TestIterateInOrder(t *testing.T) {
fb := new(document.FieldBuffer)

fb.Add("zyx", document.NewIntegerValue(3))
fb.Add("abc", document.NewIntegerValue(1))
fb.Add("cba", document.NewIntegerValue(2))

i := int64(0)
err := document.IterateInOrder(fb, func(_ string, value document.Value) error {
i++
require.Equal(t, i, value.V.(int64))
return nil
})
require.NoError(t, err)
}
19 changes: 19 additions & 0 deletions sql/parser/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ func (p *Parser) parseSelectStatement() (*planner.Tree, error) {
var cfg selectConfig
var err error

cfg.Distinct, err = p.parseDistinct()
if err != nil {
return nil, err
}

// Parse path list or query.Wildcard
cfg.ProjectionExprs, err = p.parseResultFields()
if err != nil {
Expand Down Expand Up @@ -122,6 +127,15 @@ func (p *Parser) parseResultField() (planner.ProjectedField, error) {
return rf, nil
}

func (p *Parser) parseDistinct() (bool, error) {
if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.DISTINCT {
p.Unscan()
return false, nil
}

return true, nil
}

func (p *Parser) parseFrom() (string, bool, error) {
if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.FROM {
p.Unscan()
Expand Down Expand Up @@ -208,6 +222,7 @@ func (p *Parser) parseOffset() (expr.Expr, error) {
// SelectConfig holds SELECT configuration.
type selectConfig struct {
TableName string
Distinct bool
WhereExpr expr.Expr
GroupByExpr expr.Expr
OrderBy expr.Path
Expand Down Expand Up @@ -235,6 +250,10 @@ func (cfg selectConfig) ToTree() (*planner.Tree, error) {

n = planner.NewProjectionNode(n, cfg.ProjectionExprs, cfg.TableName)

if cfg.Distinct {
n = planner.NewDedupNode(n, cfg.TableName)
}

if cfg.OrderBy != nil {
n = planner.NewSortNode(n, cfg.OrderBy, cfg.OrderByDirection)
}
Expand Down
43 changes: 43 additions & 0 deletions sql/planner/distinct.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package planner

import (
"github.com/genjidb/genji/database"
"github.com/genjidb/genji/document"
"github.com/genjidb/genji/sql/query/expr"
)

type dedupNode struct {
node

tableName string
indexes map[string]database.Index
}

func NewDedupNode(n Node, tableName string) Node {
return &dedupNode{
node: node{
op: Dedup,
left: n,
},
tableName: tableName,
}
}

func (n *dedupNode) Bind(tx *database.Transaction, params []expr.Param) (err error) {
table, err := tx.GetTable(n.tableName)
if err != nil {
return
}

n.indexes, err = table.Indexes()
return
}

func (n *dedupNode) toStream(st document.Stream) (document.Stream, error) {
set := newDocumentHashSet(nil) // use default hashing algorithm
return st.Filter(set.Filter), nil
}

func (n *dedupNode) String() string {
return "Dedup()"
}
59 changes: 59 additions & 0 deletions sql/planner/hash_set.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package planner

import (
"hash"
"hash/maphash"

"github.com/genjidb/genji/document"
"github.com/genjidb/genji/key"
)

type documentHashSet struct {
hash hash.Hash64
set map[uint64]struct{}
}

func newDocumentHashSet(hash hash.Hash64) *documentHashSet {
if hash == nil {
hash = &maphash.Hash{}
}

return &documentHashSet{
hash: hash,
set: map[uint64]struct{}{},
}
}

func (s documentHashSet) generateKey(d document.Document) (uint64, error) {
defer s.hash.Reset()

err := document.IterateInOrder(d, func(field string, value document.Value) error {
buf, err := key.AppendValue(nil, value)
if err != nil {
return err
}

_, err = s.hash.Write(buf)
return err
})
if err != nil {
return 0, err
}

return s.hash.Sum64(), nil
}

func (s documentHashSet) Filter(d document.Document) (bool, error) {
k, err := s.generateKey(d)
if err != nil {
return false, err
}

_, ok := s.set[k]
if ok {
return false, nil
}

s.set[k] = struct{}{}
return true, nil
}
63 changes: 63 additions & 0 deletions sql/planner/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ var optimizerRules = []func(t *Tree) (*Tree, error){
SplitANDConditionRule,
PrecalculateExprRule,
RemoveUnnecessarySelectionNodesRule,
RemoveUnnecessaryDedupNodeRule,
UseIndexBasedOnSelectionNodeRule,
}

Expand Down Expand Up @@ -248,6 +249,68 @@ func RemoveUnnecessarySelectionNodesRule(t *Tree) (*Tree, error) {
return t, nil
}

// RemoveUnnecessaryDedupNodeRule removes any Dedup nodes
// where projection is already unique.
func RemoveUnnecessaryDedupNodeRule(t *Tree) (*Tree, error) {
tdakkota marked this conversation as resolved.
Show resolved Hide resolved
n := t.Root
var prev Node

for n != nil {
if n.Operation() == Dedup {
d, ok := n.(*dedupNode)
if !ok {
continue
}

pn, ok := d.left.(*ProjectionNode)
if !ok {
continue
}

// if the projection is unique, we remove the node from the tree
if isProjectionUnique(d.indexes, pn) {
if prev != nil {
prev.SetLeft(n.Left())
} else {
t.Root = n.Left()
}
}
}

prev = n
n = n.Left()
}

return t, nil
}

func isProjectionUnique(indexes map[string]database.Index, pn *ProjectionNode) bool {
pk := pn.info.GetPrimaryKey()
for _, field := range pn.Expressions {
e, ok := field.(ProjectedExpr)
if !ok {
return false
}

switch v := e.Expr.(type) {
case expr.Path:
if pk != nil && pk.Path.IsEqual(document.Path(v)) {
continue
}

if idx, ok := indexes[v.String()]; ok && idx.Unique {
continue
}
case expr.PKFunc:
continue
}

return false // if one field is not unique, so projection is not unique too.
}

return true
}

// UseIndexBasedOnSelectionNodeRule scans the tree for the first selection node whose condition is an
// operator that satisfies the following criterias:
// - implements the indexIteratorOperator interface
Expand Down
Loading