From d05f25397bc6205040485af6ffaec8935f531e11 Mon Sep 17 00:00:00 2001 From: srfrog Date: Sat, 5 Jan 2019 00:02:57 -0700 Subject: [PATCH 01/23] saving state --- worker/tokens.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/worker/tokens.go b/worker/tokens.go index d8c3be84c21..cc937e387ca 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -178,21 +178,23 @@ func getInequalityTokens(readTs uint64, attr, f string, continue } // if its lossy then we handle inequality comparison later - // on in handleCompareAttr + // on in handleCompareFunction if tokenizer.IsLossy() { out = append(out, k.Term) } else { // for non Lossy lets compare for inequality (gt & lt) // to see if key needs to be included - if f == "gt" { + switch { + case f == "gt": if bytes.Compare([]byte(k.Term), ineqTokenInBytes) > 0 { out = append(out, k.Term) } - } else if f == "lt" { + case f == "lt": if bytes.Compare([]byte(k.Term), ineqTokenInBytes) < 0 { out = append(out, k.Term) } - } else { //for le or ge or any other fn consider the key + default: + // for le or ge or any other fn consider the key out = append(out, k.Term) } } From 0043d6c329afe530976bea3a6febf7ff33666792 Mon Sep 17 00:00:00 2001 From: srfrog Date: Mon, 7 Jan 2019 20:03:59 -0700 Subject: [PATCH 02/23] added new fingerprint func using BLAKE2b --- x/hash.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 x/hash.go diff --git a/x/hash.go b/x/hash.go new file mode 100644 index 00000000000..a463fee5624 --- /dev/null +++ b/x/hash.go @@ -0,0 +1,24 @@ +/* + * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package x + +import "golang.org/x/crypto/blake2b" + +func Fingerprint256(data []byte) []byte { + h := blake2b.Sum256(data) + return h[:] +} From 437ed9609a01d814e15716ef9f1e718dd2bdca37 Mon Sep 17 00:00:00 2001 From: srfrog Date: Tue, 8 Jan 2019 15:20:30 -0700 Subject: [PATCH 03/23] renamed function to Hash256 for clarity. --- x/hash.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x/hash.go b/x/hash.go index a463fee5624..76b6a86009e 100644 --- a/x/hash.go +++ b/x/hash.go @@ -18,7 +18,7 @@ package x import "golang.org/x/crypto/blake2b" -func Fingerprint256(data []byte) []byte { +func Hash256(data []byte) []byte { h := blake2b.Sum256(data) return h[:] } From 5617e043433891230d4eb0222f5b19552b8869cd Mon Sep 17 00:00:00 2001 From: srfrog Date: Tue, 8 Jan 2019 15:20:56 -0700 Subject: [PATCH 04/23] replaced 64 fingerprint hash with Hash256 --- tok/tok.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tok/tok.go b/tok/tok.go index 5b1c9e32cdf..89d0fd13b1d 100644 --- a/tok/tok.go +++ b/tok/tok.go @@ -21,7 +21,6 @@ import ( "plugin" "time" - farm "github.com/dgryski/go-farm" "github.com/golang/glog" geom "github.com/twpayne/go-geom" @@ -344,9 +343,11 @@ func (t HashTokenizer) Tokens(v interface{}) ([]string, error) { if !ok { return nil, x.Errorf("Hash tokenizer only supported for string types") } - var hash [8]byte - binary.BigEndian.PutUint64(hash[:], farm.Hash64([]byte(term))) - return []string{string(hash[:])}, nil + hash := x.Hash256([]byte(term)) + if len(hash) == 0 { + return nil, x.Errorf("Hash tokenizer failed to create hash") + } + return []string{string(hash)}, nil } func (t HashTokenizer) Identifier() byte { return 0xB } func (t HashTokenizer) IsSortable() bool { return false } From 89eeabb2bf98be67fbbc4f1fa0f59b9ec1aff743 Mon Sep 17 00:00:00 2001 From: srfrog Date: Wed, 9 Jan 2019 20:18:53 -0700 Subject: [PATCH 05/23] pickTokenizer use hash tokenizer when list is lossy. --- worker/tokens.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/worker/tokens.go b/worker/tokens.go index cc937e387ca..9773f201217 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -87,18 +87,22 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { tokenizers := schema.State().Tokenizer(attr) - var tokenizer tok.Tokenizer - for _, t := range tokenizers { + tokIdx := -1 + for i, t := range tokenizers { if !t.IsLossy() { - tokenizer = t + tokIdx = i break } + // prefer hash over other lossy tokenizers. + if t.Identifier() == tok.IdentHash { + tokIdx = i + } } // If function is eq and we found a tokenizer thats !Lossy(), lets return // it to avoid the second lookup. - if f == "eq" && tokenizer != nil { - return tokenizer, nil + if f == "eq" && tokIdx != -1 { + return tokenizers[tokIdx], nil } // Lets try to find a sortable tokenizer. From 4750b6486ea2e2a12651009141ee415173f57a4c Mon Sep 17 00:00:00 2001 From: srfrog Date: Wed, 9 Jan 2019 20:19:46 -0700 Subject: [PATCH 06/23] added tokenizer identifier list for enforcing tokenizer. --- tok/tok.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tok/tok.go b/tok/tok.go index 89d0fd13b1d..e3866b3e676 100644 --- a/tok/tok.go +++ b/tok/tok.go @@ -28,6 +28,27 @@ import ( "github.com/dgraph-io/dgraph/x" ) +// Tokenizer identifiers are unique and can't be reused. +// The range 0x00 - 0x79 is system reserved. +// The range 0x80 - 0xff is for custom tokenizers. +// TODO: use these everywhere where we must ensure a system tokenizer. +const ( + IdentNone = 0x0 + IdentTerm = 0x1 + IdentExact = 0x2 + IdentYear = 0x4 + IdentGeo = 0x5 + IdentInt = 0x6 + IdentFloat = 0x7 + IdentFullText = 0x8 + IdentBool = 0x9 + IdentTrigram = 0xA + IdentHash = 0xB + IdentMonth = 0x41 + IdentDay = 0x42 + IdentHour = 0x43 +) + // Tokenizer defines what a tokenizer must provide. type Tokenizer interface { From 8433ccca54e3a5410dd408d6ac6f018c6416c7a3 Mon Sep 17 00:00:00 2001 From: srfrog Date: Wed, 9 Jan 2019 20:20:46 -0700 Subject: [PATCH 07/23] compare func using hash index if available and eq won't compare values --- worker/task.go | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/worker/task.go b/worker/task.go index a88b2617c78..10f033b74ad 100644 --- a/worker/task.go +++ b/worker/task.go @@ -964,12 +964,21 @@ func (qs *queryState) handleRegexFunction(ctx context.Context, arg funcArgs) err } func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) error { + span := otrace.FromContext(ctx) + stop := x.SpanTimer(span, "handleCompareFunction") + defer stop() + if span != nil { + span.Annotatef(nil, "Number of uids: %d. args.srcFn: %+v", arg.srcFn.n, arg.srcFn) + } + attr := arg.q.Attr + span.Annotatef(nil, "Attr: %s. Fname: %s", attr, arg.srcFn.fname) tokenizer, err := pickTokenizer(attr, arg.srcFn.fname) // We should already have checked this in getInequalityTokens. x.Check(err) // Only if the tokenizer that we used IsLossy, then we need to fetch // and compare the actual values. + span.Annotatef(nil, "Tokenizer: %s, Lossy: %t", tokenizer.Name(), tokenizer.IsLossy()) if tokenizer.IsLossy() { // Need to evaluate inequality for entries in the first bucket. typ, err := schema.State().TypeOf(attr) @@ -977,6 +986,17 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e return x.Errorf("Attribute not scalar: %s %v", attr, typ) } + var keyFn func(int, uint64) []byte + if tokenizer.Identifier() == tok.IdentHash { + keyFn = func(row int, _ uint64) []byte { + return x.IndexKey(attr, arg.srcFn.tokens[row]) + } + } else { + keyFn = func(_ int, uid uint64) []byte { + return x.DataKey(attr, uid) + } + } + x.AssertTrue(len(arg.out.UidMatrix) > 0) rowsToFilter := 0 if arg.srcFn.fname == eq { @@ -1000,8 +1020,9 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e algo.ApplyFilter(arg.out.UidMatrix[row], func(uid uint64, i int) bool { switch lang { case "": + // TODO: use hash index in list if isList { - pl, err := posting.GetNoStore(x.DataKey(attr, uid)) + pl, err := posting.GetNoStore(keyFn(row, uid)) if err != nil { filterErr = err return false @@ -1023,11 +1044,15 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e return false } - pl, err := posting.GetNoStore(x.DataKey(attr, uid)) + pl, err := posting.GetNoStore(keyFn(row, uid)) if err != nil { filterErr = err return false } + if arg.q.SrcFunc.Name == "eq" { + span.Annotate(nil, fmt.Sprintf("--- eq token: %d:%s", row, arg.srcFn.eqTokens[row].Value)) + return true + } sv, err := pl.Value(arg.q.ReadTs) if err != nil { if err != posting.ErrNoValue { @@ -1039,7 +1064,7 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e return err == nil && types.CompareVals(arg.q.SrcFunc.Name, dst, arg.srcFn.eqTokens[row]) case ".": - pl, err := posting.GetNoStore(x.DataKey(attr, uid)) + pl, err := posting.GetNoStore(keyFn(row, uid)) if err != nil { filterErr = err return false @@ -1058,17 +1083,24 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e } return false default: - sv, err := fetchValue(uid, attr, arg.q.Langs, typ, arg.q.ReadTs) + pl, err := posting.GetNoStore(keyFn(row, uid)) if err != nil { if err != posting.ErrNoValue { filterErr = err } return false } - if sv.Value == nil { + src, err := pl.ValueFor(arg.q.ReadTs, arg.q.Langs) + if err != nil { + filterErr = err + return false + } + dst, err := types.Convert(src, typ) + if err != nil { + filterErr = err return false } - return types.CompareVals(arg.q.SrcFunc.Name, sv, arg.srcFn.eqTokens[row]) + return types.CompareVals(arg.q.SrcFunc.Name, dst, arg.srcFn.eqTokens[row]) } }) if filterErr != nil { From 7682c55450f2f92775d629ecbbf4ad6366fc976a Mon Sep 17 00:00:00 2001 From: srfrog Date: Wed, 9 Jan 2019 20:22:12 -0700 Subject: [PATCH 08/23] fixed minor comment glitches --- posting/lists.go | 2 +- x/hash.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/posting/lists.go b/posting/lists.go index 7757520275b..efa13fde8a7 100644 --- a/posting/lists.go +++ b/posting/lists.go @@ -159,7 +159,7 @@ func Cleanup() { // to lru cache and returns it. // // plist := Get(key, group) -// ... // Use plist +// ... Use plist // TODO: This should take a node id and index. And just append all indices to a list. // When doing a commit, it should update all the sync index watermarks. // worker pkg would push the indices to the watermarks held by lists. diff --git a/x/hash.go b/x/hash.go index 76b6a86009e..5b2d5243ca0 100644 --- a/x/hash.go +++ b/x/hash.go @@ -1,5 +1,5 @@ /* - * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors + * Copyright 2018 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 47fee98700be32debf7cb25f2eaf3724b0538b58 Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 07:59:34 -0700 Subject: [PATCH 09/23] use tokenizer identifier consts, change hash to non-lossy. --- tok/tok.go | 39 ++++++++++++++++++++------------------- x/hash.go | 24 ------------------------ 2 files changed, 20 insertions(+), 43 deletions(-) delete mode 100644 x/hash.go diff --git a/tok/tok.go b/tok/tok.go index e3866b3e676..9777326dbf0 100644 --- a/tok/tok.go +++ b/tok/tok.go @@ -23,6 +23,7 @@ import ( "github.com/golang/glog" geom "github.com/twpayne/go-geom" + "golang.org/x/crypto/blake2b" "github.com/dgraph-io/dgraph/types" "github.com/dgraph-io/dgraph/x" @@ -37,6 +38,9 @@ const ( IdentTerm = 0x1 IdentExact = 0x2 IdentYear = 0x4 + IdentMonth = 0x41 + IdentDay = 0x42 + IdentHour = 0x43 IdentGeo = 0x5 IdentInt = 0x6 IdentFloat = 0x7 @@ -44,9 +48,6 @@ const ( IdentBool = 0x9 IdentTrigram = 0xA IdentHash = 0xB - IdentMonth = 0x41 - IdentDay = 0x42 - IdentHour = 0x43 ) // Tokenizer defines what a tokenizer must provide. @@ -148,7 +149,7 @@ func (t GeoTokenizer) Type() string { return "geo" } func (t GeoTokenizer) Tokens(v interface{}) ([]string, error) { return types.IndexGeoTokens(v.(geom.T)) } -func (t GeoTokenizer) Identifier() byte { return 0x5 } +func (t GeoTokenizer) Identifier() byte { return IdentGeo } func (t GeoTokenizer) IsSortable() bool { return false } func (t GeoTokenizer) IsLossy() bool { return true } @@ -159,7 +160,7 @@ func (t IntTokenizer) Type() string { return "int" } func (t IntTokenizer) Tokens(v interface{}) ([]string, error) { return []string{encodeInt(v.(int64))}, nil } -func (t IntTokenizer) Identifier() byte { return 0x6 } +func (t IntTokenizer) Identifier() byte { return IdentInt } func (t IntTokenizer) IsSortable() bool { return true } func (t IntTokenizer) IsLossy() bool { return false } @@ -170,7 +171,7 @@ func (t FloatTokenizer) Type() string { return "float" } func (t FloatTokenizer) Tokens(v interface{}) ([]string, error) { return []string{encodeInt(int64(v.(float64)))}, nil } -func (t FloatTokenizer) Identifier() byte { return 0x7 } +func (t FloatTokenizer) Identifier() byte { return IdentFloat } func (t FloatTokenizer) IsSortable() bool { return true } func (t FloatTokenizer) IsLossy() bool { return true } @@ -184,7 +185,7 @@ func (t YearTokenizer) Tokens(v interface{}) ([]string, error) { binary.BigEndian.PutUint16(buf[0:2], uint16(tval.Year())) return []string{string(buf)}, nil } -func (t YearTokenizer) Identifier() byte { return 0x4 } +func (t YearTokenizer) Identifier() byte { return IdentYear } func (t YearTokenizer) IsSortable() bool { return true } func (t YearTokenizer) IsLossy() bool { return true } @@ -199,7 +200,7 @@ func (t MonthTokenizer) Tokens(v interface{}) ([]string, error) { binary.BigEndian.PutUint16(buf[2:4], uint16(tval.Month())) return []string{string(buf)}, nil } -func (t MonthTokenizer) Identifier() byte { return 0x41 } +func (t MonthTokenizer) Identifier() byte { return IdentMonth } func (t MonthTokenizer) IsSortable() bool { return true } func (t MonthTokenizer) IsLossy() bool { return true } @@ -215,7 +216,7 @@ func (t DayTokenizer) Tokens(v interface{}) ([]string, error) { binary.BigEndian.PutUint16(buf[4:6], uint16(tval.Day())) return []string{string(buf)}, nil } -func (t DayTokenizer) Identifier() byte { return 0x42 } +func (t DayTokenizer) Identifier() byte { return IdentDay } func (t DayTokenizer) IsSortable() bool { return true } func (t DayTokenizer) IsLossy() bool { return true } @@ -232,7 +233,7 @@ func (t HourTokenizer) Tokens(v interface{}) ([]string, error) { binary.BigEndian.PutUint16(buf[6:8], uint16(tval.Hour())) return []string{string(buf)}, nil } -func (t HourTokenizer) Identifier() byte { return 0x43 } +func (t HourTokenizer) Identifier() byte { return IdentHour } func (t HourTokenizer) IsSortable() bool { return true } func (t HourTokenizer) IsLossy() bool { return true } @@ -248,7 +249,7 @@ func (t TermTokenizer) Tokens(v interface{}) ([]string, error) { tokens := termAnalyzer.Analyze([]byte(str)) return uniqueTerms(tokens), nil } -func (t TermTokenizer) Identifier() byte { return 0x1 } +func (t TermTokenizer) Identifier() byte { return IdentTerm } func (t TermTokenizer) IsSortable() bool { return false } func (t TermTokenizer) IsLossy() bool { return true } @@ -262,7 +263,7 @@ func (t ExactTokenizer) Tokens(v interface{}) ([]string, error) { } return nil, x.Errorf("Exact indices only supported for string types") } -func (t ExactTokenizer) Identifier() byte { return 0x2 } +func (t ExactTokenizer) Identifier() byte { return IdentExact } func (t ExactTokenizer) IsSortable() bool { return true } func (t ExactTokenizer) IsLossy() bool { return false } @@ -285,7 +286,7 @@ func (t FullTextTokenizer) Tokens(v interface{}) ([]string, error) { // finally, return the terms. return uniqueTerms(tokens), nil } -func (t FullTextTokenizer) Identifier() byte { return 0x8 } +func (t FullTextTokenizer) Identifier() byte { return IdentFullText } func (t FullTextTokenizer) IsSortable() bool { return false } func (t FullTextTokenizer) IsLossy() bool { return true } @@ -327,7 +328,7 @@ func (t BoolTokenizer) Tokens(v interface{}) ([]string, error) { } return []string{encodeInt(b)}, nil } -func (t BoolTokenizer) Identifier() byte { return 0x9 } +func (t BoolTokenizer) Identifier() byte { return IdentBool } func (t BoolTokenizer) IsSortable() bool { return false } func (t BoolTokenizer) IsLossy() bool { return false } @@ -351,7 +352,7 @@ func (t TrigramTokenizer) Tokens(v interface{}) ([]string, error) { } return nil, nil } -func (t TrigramTokenizer) Identifier() byte { return 0xA } +func (t TrigramTokenizer) Identifier() byte { return IdentTrigram } func (t TrigramTokenizer) IsSortable() bool { return false } func (t TrigramTokenizer) IsLossy() bool { return true } @@ -364,15 +365,15 @@ func (t HashTokenizer) Tokens(v interface{}) ([]string, error) { if !ok { return nil, x.Errorf("Hash tokenizer only supported for string types") } - hash := x.Hash256([]byte(term)) + hash := blake2b.Sum256([]byte(term)) if len(hash) == 0 { return nil, x.Errorf("Hash tokenizer failed to create hash") } - return []string{string(hash)}, nil + return []string{string(hash[:])}, nil } -func (t HashTokenizer) Identifier() byte { return 0xB } +func (t HashTokenizer) Identifier() byte { return IdentHash } func (t HashTokenizer) IsSortable() bool { return false } -func (t HashTokenizer) IsLossy() bool { return true } +func (t HashTokenizer) IsLossy() bool { return false } // PluginTokenizer is implemented by external plugins loaded dynamically via // *.so files. It follows the implementation semantics of the Tokenizer diff --git a/x/hash.go b/x/hash.go deleted file mode 100644 index 5b2d5243ca0..00000000000 --- a/x/hash.go +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2018 Dgraph Labs, Inc. and Contributors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package x - -import "golang.org/x/crypto/blake2b" - -func Hash256(data []byte) []byte { - h := blake2b.Sum256(data) - return h[:] -} From 4cf914bd6b2b295b93f85e178b2ef09261320782 Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 08:05:10 -0700 Subject: [PATCH 10/23] using non-lossy hash so no need for extra logic in handleCompareFunction --- worker/task.go | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/worker/task.go b/worker/task.go index 10f033b74ad..648cf6e0423 100644 --- a/worker/task.go +++ b/worker/task.go @@ -986,17 +986,6 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e return x.Errorf("Attribute not scalar: %s %v", attr, typ) } - var keyFn func(int, uint64) []byte - if tokenizer.Identifier() == tok.IdentHash { - keyFn = func(row int, _ uint64) []byte { - return x.IndexKey(attr, arg.srcFn.tokens[row]) - } - } else { - keyFn = func(_ int, uid uint64) []byte { - return x.DataKey(attr, uid) - } - } - x.AssertTrue(len(arg.out.UidMatrix) > 0) rowsToFilter := 0 if arg.srcFn.fname == eq { @@ -1020,9 +1009,8 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e algo.ApplyFilter(arg.out.UidMatrix[row], func(uid uint64, i int) bool { switch lang { case "": - // TODO: use hash index in list if isList { - pl, err := posting.GetNoStore(keyFn(row, uid)) + pl, err := posting.GetNoStore(x.DataKey(attr, uid)) if err != nil { filterErr = err return false @@ -1044,15 +1032,11 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e return false } - pl, err := posting.GetNoStore(keyFn(row, uid)) + pl, err := posting.GetNoStore(x.DataKey(attr, uid)) if err != nil { filterErr = err return false } - if arg.q.SrcFunc.Name == "eq" { - span.Annotate(nil, fmt.Sprintf("--- eq token: %d:%s", row, arg.srcFn.eqTokens[row].Value)) - return true - } sv, err := pl.Value(arg.q.ReadTs) if err != nil { if err != posting.ErrNoValue { @@ -1064,7 +1048,7 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e return err == nil && types.CompareVals(arg.q.SrcFunc.Name, dst, arg.srcFn.eqTokens[row]) case ".": - pl, err := posting.GetNoStore(keyFn(row, uid)) + pl, err := posting.GetNoStore(x.DataKey(attr, uid)) if err != nil { filterErr = err return false @@ -1083,24 +1067,17 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e } return false default: - pl, err := posting.GetNoStore(keyFn(row, uid)) + sv, err := fetchValue(uid, attr, arg.q.Langs, typ, arg.q.ReadTs) if err != nil { if err != posting.ErrNoValue { filterErr = err } return false } - src, err := pl.ValueFor(arg.q.ReadTs, arg.q.Langs) - if err != nil { - filterErr = err - return false - } - dst, err := types.Convert(src, typ) - if err != nil { - filterErr = err + if sv.Value == nil { return false } - return types.CompareVals(arg.q.SrcFunc.Name, dst, arg.srcFn.eqTokens[row]) + return types.CompareVals(arg.q.SrcFunc.Name, sv, arg.srcFn.eqTokens[row]) } }) if filterErr != nil { From 990c9bc58ecab6cfa8c176d59ff0cf8f4639da8a Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 08:16:48 -0700 Subject: [PATCH 11/23] simplify pickTokenizer and --- worker/tokens.go | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/worker/tokens.go b/worker/tokens.go index 9773f201217..8df41430713 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -89,29 +89,20 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { tokIdx := -1 for i, t := range tokenizers { - if !t.IsLossy() { - tokIdx = i - break + // If function is eq and we found a tokenizer thats !Lossy(), lets return it + if f == "eq" && !t.IsLossy() { + return t, nil } - // prefer hash over other lossy tokenizers. - if t.Identifier() == tok.IdentHash { + if t.IsSortable() && tokIdx == -1 { tokIdx = i } } - // If function is eq and we found a tokenizer thats !Lossy(), lets return - // it to avoid the second lookup. - if f == "eq" && tokIdx != -1 { + // Check if we found a sortable tokenizer and return that. + if tokIdx != -1 { return tokenizers[tokIdx], nil } - // Lets try to find a sortable tokenizer. - for _, t := range tokenizers { - if t.IsSortable() { - return t, nil - } - } - // rest of the cases, ge, gt , le , lt require a sortable tokenizer. if f != "eq" { return nil, x.Errorf("Attribute:%s does not have proper index for comparison", From 2af6293bf9dced5e367978d4ff7624c078921b49 Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 08:24:48 -0700 Subject: [PATCH 12/23] simplify pickTokenizer --- worker/tokens.go | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/worker/tokens.go b/worker/tokens.go index 8df41430713..7c8aa6e7524 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -87,20 +87,20 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { tokenizers := schema.State().Tokenizer(attr) - tokIdx := -1 + sortIdx := -1 for i, t := range tokenizers { // If function is eq and we found a tokenizer thats !Lossy(), lets return it if f == "eq" && !t.IsLossy() { return t, nil } - if t.IsSortable() && tokIdx == -1 { - tokIdx = i + if t.IsSortable() && sortIdx == -1 { + sortIdx = i } } // Check if we found a sortable tokenizer and return that. - if tokIdx != -1 { - return tokenizers[tokIdx], nil + if sortIdx != -1 { + return tokenizers[sortIdx], nil } // rest of the cases, ge, gt , le , lt require a sortable tokenizer. @@ -162,7 +162,8 @@ func getInequalityTokens(readTs uint64, attr, f string, itr := txn.NewIterator(itOpt) defer itr.Close() - ineqTokenInBytes := []byte(ineqToken) //used for inequality comparison below + // used for inequality comparison below + ineqTokenInBytes := []byte(ineqToken) var out []string for itr.Seek(seekKey); itr.Valid(); itr.Next() { @@ -176,22 +177,22 @@ func getInequalityTokens(readTs uint64, attr, f string, // on in handleCompareFunction if tokenizer.IsLossy() { out = append(out, k.Term) - } else { - // for non Lossy lets compare for inequality (gt & lt) - // to see if key needs to be included - switch { - case f == "gt": - if bytes.Compare([]byte(k.Term), ineqTokenInBytes) > 0 { - out = append(out, k.Term) - } - case f == "lt": - if bytes.Compare([]byte(k.Term), ineqTokenInBytes) < 0 { - out = append(out, k.Term) - } - default: - // for le or ge or any other fn consider the key + continue + } + // for non Lossy lets compare for inequality (gt & lt) + // to see if key needs to be included + switch { + case f == "gt": + if bytes.Compare([]byte(k.Term), ineqTokenInBytes) > 0 { out = append(out, k.Term) } + case f == "lt": + if bytes.Compare([]byte(k.Term), ineqTokenInBytes) < 0 { + out = append(out, k.Term) + } + default: + // for le or ge or any other fn consider the key + out = append(out, k.Term) } } return out, ineqToken, nil From c9fa41a8fe077714113553440ebd2837cbbb3ab5 Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 09:37:09 -0700 Subject: [PATCH 13/23] using tokenizer id --- posting/index.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/posting/index.go b/posting/index.go index f0ca5775839..bf3001b58bc 100644 --- a/posting/index.go +++ b/posting/index.go @@ -54,7 +54,8 @@ func indexTokens(attr, lang string, src types.Val) ([]string, error) { // Schema will know the mapping from attr to tokenizer. var tokens []string for _, it := range schema.State().Tokenizer(attr) { - if it.Name() == "exact" && schemaType == types.StringID && len(sv.Value.(string)) > 100 { + if it.Identifier() == tok.IdentExact && + schemaType == types.StringID && len(sv.Value.(string)) > 100 { // Exact index can only be applied for strings so we can safely try to convert Value to // string. glog.Infof("Long term for exact index on predicate: [%s]. "+ From c46b520c19d5b44cd4c3625673c5674f07a85fff Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 09:37:55 -0700 Subject: [PATCH 14/23] added id value for custom tokenizers, IdentCustom --- tok/tok.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tok/tok.go b/tok/tok.go index 9777326dbf0..766d04f4983 100644 --- a/tok/tok.go +++ b/tok/tok.go @@ -30,7 +30,7 @@ import ( ) // Tokenizer identifiers are unique and can't be reused. -// The range 0x00 - 0x79 is system reserved. +// The range 0x00 - 0x7f is system reserved. // The range 0x80 - 0xff is for custom tokenizers. // TODO: use these everywhere where we must ensure a system tokenizer. const ( @@ -48,6 +48,7 @@ const ( IdentBool = 0x9 IdentTrigram = 0xA IdentHash = 0xB + IdentCustom = 0x80 ) // Tokenizer defines what a tokenizer must provide. @@ -123,7 +124,7 @@ func LoadCustomTokenizer(soFile string) { tokenizer := symb.(func() interface{})().(PluginTokenizer) id := tokenizer.Identifier() - x.AssertTruef(id >= 0x80, + x.AssertTruef(id < IdentCustom, "custom tokenizer identifier byte must be >= 0x80, but was %#x", id) registerTokenizer(CustomTokenizer{PluginTokenizer: tokenizer}) } From a6d461c6685a27a9e386b116ff3bb86679b9297a Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 09:54:23 -0700 Subject: [PATCH 15/23] using tokenizer ids when possible fixed bug in getInequalityTokens with fulltext indexes. --- worker/tokens.go | 66 +++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/worker/tokens.go b/worker/tokens.go index 7c8aa6e7524..824a0da9aaf 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -17,8 +17,6 @@ package worker import ( - "strings" - "github.com/dgraph-io/badger" "bytes" @@ -30,35 +28,32 @@ import ( ) func verifyStringIndex(attr string, funcType FuncType) (string, bool) { - var requiredTokenizer string - switch funcType { - case FullTextSearchFn: - requiredTokenizer = tok.FullTextTokenizer{}.Name() - default: - requiredTokenizer = tok.TermTokenizer{}.Name() + var requiredTokenizer tok.Tokenizer + if funcType == FullTextSearchFn { + requiredTokenizer = tok.FullTextTokenizer{} + } else { + requiredTokenizer = tok.TermTokenizer{} } if !schema.State().IsIndexed(attr) { - return requiredTokenizer, false + return requiredTokenizer.Name(), false } - tokenizers := schema.State().Tokenizer(attr) - for _, tokenizer := range tokenizers { - // check for prefix, in case of explicit usage of language specific full text tokenizer - if strings.HasPrefix(tokenizer.Name(), requiredTokenizer) { - return requiredTokenizer, true + id := requiredTokenizer.Identifier() + for _, t := range schema.State().Tokenizer(attr) { + if t.Identifier() == id { + return requiredTokenizer.Name(), true } } - - return requiredTokenizer, false + return requiredTokenizer.Name(), false } func verifyCustomIndex(attr string, tokenizerName string) bool { if !schema.State().IsIndexed(attr) { return false } - for _, tn := range schema.State().TokenizerNames(attr) { - if tn == tokenizerName { + for _, t := range schema.State().Tokenizer(attr) { + if t.Identifier() >= tok.IdentCustom && t.Name() == tokenizerName { return true } } @@ -71,12 +66,10 @@ func getStringTokens(funcArgs []string, lang string, funcType FuncType) ([]strin if lang == "." { lang = "en" } - switch funcType { - case FullTextSearchFn: + if funcType == FullTextSearchFn { return tok.GetFullTextTokens(funcArgs, lang) - default: - return tok.GetTermTokens(funcArgs) } + return tok.GetTermTokens(funcArgs) } func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { @@ -85,22 +78,21 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { return nil, x.Errorf("Attribute %s is not indexed.", attr) } + sortableIdx := -1 tokenizers := schema.State().Tokenizer(attr) - - sortIdx := -1 for i, t := range tokenizers { // If function is eq and we found a tokenizer thats !Lossy(), lets return it if f == "eq" && !t.IsLossy() { return t, nil } - if t.IsSortable() && sortIdx == -1 { - sortIdx = i + if t.IsSortable() && sortableIdx == -1 { + sortableIdx = i } } // Check if we found a sortable tokenizer and return that. - if sortIdx != -1 { - return tokenizers[sortIdx], nil + if sortableIdx != -1 { + return tokenizers[sortableIdx], nil } // rest of the cases, ge, gt , le , lt require a sortable tokenizer. @@ -134,7 +126,8 @@ func getInequalityTokens(readTs uint64, attr, f string, return nil, "", nil // Allow eq with term/fulltext tokenizers, even though they give multiple tokens. - case f == "eq" && (tokenizer.Name() == "term" || tokenizer.Name() == "fulltext"): + case f == "eq" && + (tokenizer.Identifier() == tok.IdentTerm || tokenizer.Identifier() == tok.IdentFullText): break case len(ineqTokens) > 1: @@ -170,18 +163,17 @@ func getInequalityTokens(readTs uint64, attr, f string, item := itr.Item() key := item.Key() k := x.Parse(key) - if k == nil { - continue - } + + switch { + case k == nil: + // if its lossy then we handle inequality comparison later - // on in handleCompareFunction - if tokenizer.IsLossy() { + // in handleCompareFunction + case tokenizer.IsLossy(): out = append(out, k.Term) - continue - } + // for non Lossy lets compare for inequality (gt & lt) // to see if key needs to be included - switch { case f == "gt": if bytes.Compare([]byte(k.Term), ineqTokenInBytes) > 0 { out = append(out, k.Term) From 2d51a3da9c291559021289416880f0fde9245f5b Mon Sep 17 00:00:00 2001 From: srfrog Date: Thu, 10 Jan 2019 22:26:00 -0700 Subject: [PATCH 16/23] added hash index tests --- systest/queries_test.go | 114 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/systest/queries_test.go b/systest/queries_test.go index e3d647b1467..2cc9f652b21 100644 --- a/systest/queries_test.go +++ b/systest/queries_test.go @@ -46,6 +46,7 @@ func TestQuery(t *testing.T) { t.Run("schema predicate names", wrap(SchemaQueryTestPredicate1)) t.Run("schema specific predicate fields", wrap(SchemaQueryTestPredicate2)) t.Run("schema specific predicate field", wrap(SchemaQueryTestPredicate3)) + t.Run("hash index queries", wrap(QueryHashIndex)) t.Run("cleanup", wrap(SchemaQueryCleanup)) } @@ -316,3 +317,116 @@ func SchemaQueryTestHTTP(t *testing.T, c *dgo.Dgraph) { }` CompareJSON(t, js, string(m["data"])) } + +func QueryHashIndex(t *testing.T, c *dgo.Dgraph) { + ctx := context.Background() + + require.NoError(t, c.Alter(ctx, &api.Operation{ + Schema: ` + name: string @index(hash) @lang . + `, + })) + + txn := c.NewTxn() + _, err := txn.Mutate(ctx, &api.Mutation{ + SetNquads: []byte(` + _:p0 "" . + _:p1 "0" . + _:p2 "srfrog" . + _:p3 "Lorem ipsum" . + _:p4 "Lorem ipsum dolor sit amet" . + _:p5 "Lorem ipsum dolor sit amet, consectetur adipiscing elit" . + _:p6 "Lorem ipsum"@en . + _:p7 "Lorem ipsum dolor sit amet"@en . + _:p8 "Lorem ipsum dolor sit amet, consectetur adipiscing elit"@en . + _:p9 "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed varius tellus ut sem bibendum, eu tristique augue congue. Praesent eget odio tincidunt, pellentesque ante sit amet, tempus sem. Donec et tellus et diam facilisis egestas ut ac risus. Proin feugiat risus tristique erat condimentum placerat. Nulla eget ligula tempus, blandit leo vel, accumsan tortor. Phasellus et felis in diam ultricies porta nec in ipsum. Phasellus id leo sagittis, bibendum enim ut, pretium lectus. Quisque ac ex viverra, suscipit turpis sed, scelerisque metus. Sed non dui facilisis, viverra leo eget, vulputate erat. Etiam nec enim sed nisi imperdiet cursus. Suspendisse sed ligula non nisi pharetra varius." . + _:pa ""@fr . + `), + }) + require.NoError(t, err) + require.NoError(t, txn.Commit(ctx)) + + tests := []struct { + in, out string + }{ + { + in: `schema(pred: [name]) {}`, + out: ` + { + "schema": [ + { + "index": true, + "lang": true, + "predicate": "name", + "tokenizer": [ + "hash" + ], + "type": "string" + } + ] + }`, + }, + { + in: `{q(func:eq(name,"")){name}}`, + out: `{"q": [{"name":""}]}`, + }, + { + in: `{q(func:eq(name,"0")){name}}`, + out: `{"q": [{"name":"0"}]}`, + }, + { + in: `{q(func:eq(name,"srfrog")){name}}`, + out: `{"q": [{"name":"srfrog"}]}`, + }, + { + in: `{q(func:eq(name,"Lorem ipsum")){name}}`, + out: `{"q": [{"name":"Lorem ipsum"}]}`, + }, + { + in: `{q(func:eq(name,"Lorem ipsum dolor sit amet")){name}}`, + out: `{"q": [{"name":"Lorem ipsum dolor sit amet"}]}`, + }, + { + in: `{q(func:eq(name@en,"Lorem ipsum")){name@en}}`, + out: `{"q": [{"name@en":"Lorem ipsum"}]}`, + }, + { + in: `{q(func:eq(name@.,"Lorem ipsum dolor sit amet")){name@en}}`, + out: `{"q": [{"name@en":"Lorem ipsum dolor sit amet"}]}`, + }, + { + in: `{q(func:eq(name,["srfrog"])){name}}`, + out: `{"q": [{"name":"srfrog"}]}`, + }, + { + in: `{q(func:eq(name,["srfrog","srf","srfrogg","sr","s"])){name}}`, + out: `{"q": [{"name":"srfrog"}]}`, + }, + { + in: `{q(func:eq(name,["Lorem ipsum","Lorem ipsum dolor sit amet, consectetur adipiscing elit",""])){name}}`, + out: `{"q": [{"name":""},{"name":"Lorem ipsum"},{"name":"Lorem ipsum dolor sit amet, consectetur adipiscing elit"}]}`, + }, + { + in: `{q(func:eq(name,["Lorem ipsum","Lorem ipsum","Lorem ipsum","Lorem ipsum","Lorem ipsum"])){name}}`, + out: `{"q": [{"name":"Lorem ipsum"}]}`, + }, + { + in: `{q(func:eq(name@en,["Lorem ipsum","Lorem ipsum dolor sit amet, consectetur adipiscing elit",""])){name@en}}`, + out: `{"q": [{"name@en":"Lorem ipsum"},{"name@en":"Lorem ipsum dolor sit amet, consectetur adipiscing elit"}]}`, + }, + { + in: `{q(func:eq(name@en,["Lorem ipsum","Lorem ipsum","Lorem ipsum","Lorem ipsum","Lorem ipsum"])){name@en}}`, + out: `{"q": [{"name@en":"Lorem ipsum"}]}`, + }, + { + in: `{q(func:eq(name@.,"")){name@fr}}`, + out: `{"q": [{"name@fr":""}]}`, + }, + } + + for _, tc := range tests { + resp, err := c.NewTxn().Query(ctx, tc.in) + require.NoError(t, err) + CompareJSON(t, tc.out, string(resp.Json)) + } +} From 16e8a9f4a929d942dcce7c3a0b960ecf82a24787 Mon Sep 17 00:00:00 2001 From: Manish R Jain Date: Mon, 14 Jan 2019 13:44:07 -0800 Subject: [PATCH 17/23] Manish's review. Fixed a new bug introduced by this PR during IdentCustom comparison. Simplify pickTokenizer. Added comments. --- tok/tok.go | 13 ++++++++++--- worker/tokens.go | 26 +++++++++++++------------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/tok/tok.go b/tok/tok.go index 766d04f4983..f35c63f679b 100644 --- a/tok/tok.go +++ b/tok/tok.go @@ -124,8 +124,8 @@ func LoadCustomTokenizer(soFile string) { tokenizer := symb.(func() interface{})().(PluginTokenizer) id := tokenizer.Identifier() - x.AssertTruef(id < IdentCustom, - "custom tokenizer identifier byte must be >= 0x80, but was %#x", id) + x.AssertTruef(id >= IdentCustom, + "custom tokenizer identifier byte must be >= %#x, but was %#x", IdentCustom, id) registerTokenizer(CustomTokenizer{PluginTokenizer: tokenizer}) } @@ -366,6 +366,8 @@ func (t HashTokenizer) Tokens(v interface{}) ([]string, error) { if !ok { return nil, x.Errorf("Hash tokenizer only supported for string types") } + // Blake2 is a hash function equivalent of SHA series, but faster. SHA is the best hash function + // for doing checksum of content, because they have low collision ratios. See issue #2776. hash := blake2b.Sum256([]byte(term)) if len(hash) == 0 { return nil, x.Errorf("Hash tokenizer failed to create hash") @@ -374,7 +376,12 @@ func (t HashTokenizer) Tokens(v interface{}) ([]string, error) { } func (t HashTokenizer) Identifier() byte { return IdentHash } func (t HashTokenizer) IsSortable() bool { return false } -func (t HashTokenizer) IsLossy() bool { return false } + +// We have switched HashTokenizer to be non-lossy. This allows us to avoid having to retrieve values +// for the returned results, and compare them against the value in the query, which is slow. There +// is very low probability of collisions with a 256-bit hash. We use that fact to speed up equality +// query operations using the hash index. +func (t HashTokenizer) IsLossy() bool { return false } // PluginTokenizer is implemented by external plugins loaded dynamically via // *.so files. It follows the implementation semantics of the Tokenizer diff --git a/worker/tokens.go b/worker/tokens.go index 824a0da9aaf..1c16cffdf4d 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -78,24 +78,24 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { return nil, x.Errorf("Attribute %s is not indexed.", attr) } - sortableIdx := -1 tokenizers := schema.State().Tokenizer(attr) - for i, t := range tokenizers { + for _, t := range tokenizers { // If function is eq and we found a tokenizer thats !Lossy(), lets return it - if f == "eq" && !t.IsLossy() { - return t, nil - } - if t.IsSortable() && sortableIdx == -1 { - sortableIdx = i + switch f { + case "eq": + // For equality, find a non-lossy tokenizer. + if !t.IsLossy() { + return t, nil + } + default: + // rest of the cases: ge, gt, le, lt require a sortable tokenizer. + if t.IsSortable() { + return t, nil + } } } - // Check if we found a sortable tokenizer and return that. - if sortableIdx != -1 { - return tokenizers[sortableIdx], nil - } - - // rest of the cases, ge, gt , le , lt require a sortable tokenizer. + // TODO: Should we return an error if we don't find a non-lossy tokenizer for eq function. if f != "eq" { return nil, x.Errorf("Attribute:%s does not have proper index for comparison", attr) From 0f164a2b7ce69efba60e711816cc75ca74301a40 Mon Sep 17 00:00:00 2001 From: Manish R Jain Date: Mon, 14 Jan 2019 13:46:13 -0800 Subject: [PATCH 18/23] Remove Long term for exact index warning. --- posting/index.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/posting/index.go b/posting/index.go index b961a8393aa..38e5c6e77b9 100644 --- a/posting/index.go +++ b/posting/index.go @@ -55,13 +55,6 @@ func indexTokens(attr, lang string, src types.Val) ([]string, error) { // Schema will know the mapping from attr to tokenizer. var tokens []string for _, it := range schema.State().Tokenizer(attr) { - if it.Identifier() == tok.IdentExact && - schemaType == types.StringID && len(sv.Value.(string)) > 100 { - // Exact index can only be applied for strings so we can safely try to convert Value to - // string. - glog.Infof("Long term for exact index on predicate: [%s]. "+ - "Consider switching to hash for better performance.\n", attr) - } toks, err := tok.BuildTokens(sv.Value, tok.GetLangTokenizer(it, lang)) if err != nil { return tokens, err From 3db5ba5420330a9754bdd2f27fe9f629c4fff393 Mon Sep 17 00:00:00 2001 From: srfrog Date: Mon, 14 Jan 2019 15:05:42 -0700 Subject: [PATCH 19/23] fixed logic --- tok/tok.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tok/tok.go b/tok/tok.go index f35c63f679b..b937fd014f8 100644 --- a/tok/tok.go +++ b/tok/tok.go @@ -124,8 +124,8 @@ func LoadCustomTokenizer(soFile string) { tokenizer := symb.(func() interface{})().(PluginTokenizer) id := tokenizer.Identifier() - x.AssertTruef(id >= IdentCustom, - "custom tokenizer identifier byte must be >= %#x, but was %#x", IdentCustom, id) + x.AssertTruef(id < IdentCustom, + "custom tokenizer identifier byte must be >= 0x80, but was %#x", id) registerTokenizer(CustomTokenizer{PluginTokenizer: tokenizer}) } From 5658ded1e3d26bb0b3985e72ed893a1f61241cc8 Mon Sep 17 00:00:00 2001 From: srfrog Date: Mon, 14 Jan 2019 15:09:48 -0700 Subject: [PATCH 20/23] pickTokenizer return error when comparison func doesn't have non-lossy (eq) or sortable (le, ge, gt, lt) index --- worker/tokens.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/worker/tokens.go b/worker/tokens.go index 1c16cffdf4d..885f18cc7d8 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -95,14 +95,7 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { } } - // TODO: Should we return an error if we don't find a non-lossy tokenizer for eq function. - if f != "eq" { - return nil, x.Errorf("Attribute:%s does not have proper index for comparison", - attr) - } - - // We didn't find a sortable or !isLossy() tokenizer, lets return the first one. - return tokenizers[0], nil + return nil, x.Errorf("Attribute:%s does not have proper index for comparison", attr) } // getInequalityTokens gets tokens ge / le compared to given token using the first sortable From 8b914de77797e8d1ea81d4426441de857f9c07f0 Mon Sep 17 00:00:00 2001 From: srfrog Date: Mon, 14 Jan 2019 16:07:11 -0700 Subject: [PATCH 21/23] added warning for eq comparison without non-lossy tokenizer --- worker/tokens.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/worker/tokens.go b/worker/tokens.go index 885f18cc7d8..8b6c11c5f32 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -18,6 +18,7 @@ package worker import ( "github.com/dgraph-io/badger" + "github.com/golang/glog" "bytes" @@ -95,7 +96,14 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { } } - return nil, x.Errorf("Attribute:%s does not have proper index for comparison", attr) + // Should we return an error if we don't find a non-lossy tokenizer for eq function. + if f != "eq" { + return nil, x.Errorf("Attribute:%s does not have proper index for comparison", attr) + } + glog.Infof("Attribute:%s couldn't find a non-lossy tokenizer for 'eq' comparison", attr) + + // We didn't find a sortable or !isLossy() tokenizer, lets return the first one. + return tokenizers[0], nil } // getInequalityTokens gets tokens ge / le compared to given token using the first sortable From 41a3d4d3494b70f392d5c45e141ca7492a995de5 Mon Sep 17 00:00:00 2001 From: srfrog Date: Mon, 14 Jan 2019 18:50:53 -0700 Subject: [PATCH 22/23] re-fixed this slippery lil bug --- tok/tok.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tok/tok.go b/tok/tok.go index b937fd014f8..f3c1546d043 100644 --- a/tok/tok.go +++ b/tok/tok.go @@ -124,7 +124,7 @@ func LoadCustomTokenizer(soFile string) { tokenizer := symb.(func() interface{})().(PluginTokenizer) id := tokenizer.Identifier() - x.AssertTruef(id < IdentCustom, + x.AssertTruef(id >= IdentCustom, "custom tokenizer identifier byte must be >= 0x80, but was %#x", id) registerTokenizer(CustomTokenizer{PluginTokenizer: tokenizer}) } From 3bf1b92bfe856b44f91f50d147b37499bd727577 Mon Sep 17 00:00:00 2001 From: srfrog Date: Tue, 15 Jan 2019 20:03:07 -0700 Subject: [PATCH 23/23] removed extra glog --- worker/tokens.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/worker/tokens.go b/worker/tokens.go index 8b6c11c5f32..c787503384e 100644 --- a/worker/tokens.go +++ b/worker/tokens.go @@ -18,7 +18,6 @@ package worker import ( "github.com/dgraph-io/badger" - "github.com/golang/glog" "bytes" @@ -100,7 +99,6 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) { if f != "eq" { return nil, x.Errorf("Attribute:%s does not have proper index for comparison", attr) } - glog.Infof("Attribute:%s couldn't find a non-lossy tokenizer for 'eq' comparison", attr) // We didn't find a sortable or !isLossy() tokenizer, lets return the first one. return tokenizers[0], nil