diff --git a/query/query3_test.go b/query/query3_test.go index f8c8284b35b..070c19f345a 100644 --- a/query/query3_test.go +++ b/query/query3_test.go @@ -1509,15 +1509,16 @@ func TestFilterRegex1(t *testing.T) { { me(func: uid(0x01)) { name - friend @filter(regexp(name, /^[a-z A-Z]+$/)) { + friend @filter(regexp(name, /^[Glen Rh]+$/)) { name } } } ` - _, err := processToFastJson(t, query) - require.Error(t, err) + js := processToFastJsonNoErr(t, query) + require.JSONEq(t, + `{"data": {"me":[{"name":"Michonne", "friend":[{"name":"Glenn Rhee"}]}]}}`, js) } func TestFilterRegex2(t *testing.T) { @@ -1533,8 +1534,9 @@ func TestFilterRegex2(t *testing.T) { } ` - _, err := processToFastJson(t, query) - require.Error(t, err) + js := processToFastJsonNoErr(t, query) + require.JSONEq(t, + `{"data": {"me":[{"name":"Michonne", "friend":[{"name":"Rick Grimes"},{"name":"Glenn Rhee"}]}]}}`, js) } func TestFilterRegex3(t *testing.T) { diff --git a/schema/schema.go b/schema/schema.go index 584eb95d591..4ae1aa2499c 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -180,6 +180,17 @@ func (s *state) TokenizerNames(pred string) []string { return names } +// HasTokenizer is a convenience func that checks if a given tokenizer is found in pred. +// Returns true if found, else false. +func (s *state) HasTokenizer(id byte, pred string) bool { + for _, t := range s.Tokenizer(pred) { + if t.Identifier() == id { + return true + } + } + return false +} + // IsReversed returns whether the predicate has reverse edge or not func (s *state) IsReversed(pred string) bool { s.RLock() diff --git a/systest/queries_test.go b/systest/queries_test.go index 0ab7dbef319..856bc1bcf67 100644 --- a/systest/queries_test.go +++ b/systest/queries_test.go @@ -50,6 +50,7 @@ func TestQuery(t *testing.T) { t.Run("multiple block eval", wrap(MultipleBlockEval)) t.Run("unmatched var assignment eval", wrap(UnmatchedVarEval)) t.Run("hash index queries", wrap(QueryHashIndex)) + t.Run("regexp with toggled trigram index", wrap(RegexpToggleTrigramIndex)) t.Run("cleanup", wrap(SchemaQueryCleanup)) } @@ -646,3 +647,76 @@ func QueryHashIndex(t *testing.T, c *dgo.Dgraph) { CompareJSON(t, tc.out, string(resp.Json)) } } + +func RegexpToggleTrigramIndex(t *testing.T, c *dgo.Dgraph) { + ctx := context.Background() + + require.NoError(t, c.Alter(ctx, &api.Operation{ + Schema: ` + name: string @index(term) @lang . + `, + })) + + txn := c.NewTxn() + _, err := txn.Mutate(ctx, &api.Mutation{ + SetNquads: []byte(` + _:x1 "The luck is in the details" . + _:x1 "The art is in the details"@en . + _:x1 "L'art est dans les détails"@fr . + + _:x2 "Detach yourself from ostentation" . + _:x2 "Detach yourself from artificiality"@en . + _:x2 "Desprenderse de la artificialidad"@es . + `), + }) + require.NoError(t, err) + require.NoError(t, txn.Commit(ctx)) + + tests := []struct { + in, out string + }{ + { + in: `{q(func:has(name)) @filter(regexp(name, /art/)) {name}}`, + out: `{"q":[]}`, + }, + { + in: `{q(func:has(name)) @filter(regexp(name@es, /art/)) {name@es}}`, + out: `{"q":[{"name@es": "Desprenderse de la artificialidad"}]}`, + }, + { + in: `{q(func:has(name)) @filter(regexp(name@fr, /art/)) {name@fr}}`, + out: `{"q":[{"name@fr": "L'art est dans les détails"}]}`, + }, + } + + t.Log("testing without trigram index") + for _, tc := range tests { + resp, err := c.NewTxn().Query(ctx, tc.in) + require.NoError(t, err) + CompareJSON(t, tc.out, string(resp.Json)) + } + + require.NoError(t, c.Alter(ctx, &api.Operation{ + Schema: ` + name: string @index(trigram) @lang . + `, + })) + + t.Log("testing with trigram index") + for _, tc := range tests { + resp, err := c.NewTxn().Query(ctx, tc.in) + require.NoError(t, err) + CompareJSON(t, tc.out, string(resp.Json)) + } + + require.NoError(t, c.Alter(ctx, &api.Operation{ + Schema: ` + name: string @index(term) @lang . + `, + })) + + t.Log("testing without trigram index at root") + _, err = c.NewTxn().Query(ctx, `{q(func:regexp(name, /art/)) {name}}`) + require.Error(t, err) + require.Contains(t, err.Error(), "Attribute name does not have trigram index for regex matching.") +} diff --git a/worker/task.go b/worker/task.go index a587b188c86..a1ab023d1a4 100644 --- a/worker/task.go +++ b/worker/task.go @@ -52,8 +52,8 @@ var ( emptyValueList = pb.ValueList{Values: []*pb.TaskValue{}} ) -func invokeNetworkRequest( - ctx context.Context, addr string, f func(context.Context, pb.WorkerClient) (interface{}, error)) (interface{}, error) { +func invokeNetworkRequest(ctx context.Context, addr string, + f func(context.Context, pb.WorkerClient) (interface{}, error)) (interface{}, error) { pl, err := conn.Get().Get(addr) if err != nil { return &emptyResult, x.Wrapf(err, "dispatchTaskOverNetwork: while retrieving connection.") @@ -270,7 +270,7 @@ func parseFuncTypeHelper(name string) (FuncType, string) { func needsIndex(fnType FuncType) bool { switch fnType { - case CompareAttrFn, GeoFn, RegexFn, FullTextSearchFn, StandardFn: + case CompareAttrFn, GeoFn, FullTextSearchFn, StandardFn: return true default: return false @@ -876,88 +876,102 @@ func (qs *queryState) handleCompareScalarFunction(arg funcArgs) error { } func (qs *queryState) handleRegexFunction(ctx context.Context, arg funcArgs) error { + span := otrace.FromContext(ctx) + stop := x.SpanTimer(span, "handleRegexFunction") + defer stop() + if span != nil { + span.Annotatef(nil, "Number of uids: %d. args.srcFn: %+v", arg.srcFn.n, arg.srcFn) + } + attr := arg.q.Attr typ, err := schema.State().TypeOf(attr) + span.Annotatef(nil, "Attr: %s. Type: %s", attr, typ.Name()) if err != nil || !typ.IsScalar() { return x.Errorf("Attribute not scalar: %s %v", attr, typ) } if typ != types.StringID { return x.Errorf("Got non-string type. Regex match is allowed only on string type.") } - tokenizers := schema.State().TokenizerNames(attr) - var found bool - for _, t := range tokenizers { - if t == "trigram" { // TODO(tzdybal) - maybe just rename to 'regex' tokenizer? - found = true - } - } - if !found { - return x.Errorf("Attribute %v does not have trigram index for regex matching.", attr) - } + useIndex := schema.State().HasTokenizer(tok.IdentTrigram, attr) + span.Annotatef(nil, "Trigram index found: %t, func at root: %t", + useIndex, arg.srcFn.isFuncAtRoot) query := cindex.RegexpQuery(arg.srcFn.regex.Syntax) empty := pb.List{} - uids, err := uidsForRegex(attr, arg, query, &empty) + uids := &pb.List{} + + // Here we determine the list of uids to match. + switch { + // If this is a filter eval, use the given uid list (good) + case arg.q.UidList != nil && len(arg.q.UidList.Uids) != 0: + uids = arg.q.UidList + + // Prefer to use an index (fast) + case useIndex: + uids, err = uidsForRegex(attr, arg, query, &empty) + if err != nil { + return err + } + + // No index and at root, return error instructing user to use `has` or index. + default: + return x.Errorf( + "Attribute %v does not have trigram index for regex matching. "+ + "Please add a trigram index or use has/uid function with regexp() as filter.", + attr) + } + + arg.out.UidMatrix = append(arg.out.UidMatrix, uids) isList := schema.State().IsList(attr) lang := langForFunc(arg.q.Langs) - if uids != nil { - arg.out.UidMatrix = append(arg.out.UidMatrix, uids) - filtered := &pb.List{} - for _, uid := range uids.Uids { - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - pl, err := qs.cache.Get(x.DataKey(attr, uid)) - if err != nil { - return err - } + span.Annotatef(nil, "Total uids: %d, list: %t lang: %v", len(uids.Uids), isList, lang) - var val types.Val - if lang != "" { - val, err = pl.ValueForTag(arg.q.ReadTs, lang) - } else if isList { - vals, err := pl.AllUntaggedValues(arg.q.ReadTs) - if err == posting.ErrNoValue { - continue - } else if err != nil { - return err - } - for _, val := range vals { - // convert data from binary to appropriate format - strVal, err := types.Convert(val, types.StringID) - if err == nil && matchRegex(strVal, arg.srcFn.regex) { - filtered.Uids = append(filtered.Uids, uid) - break - } - } + filtered := &pb.List{} + for _, uid := range uids.Uids { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + pl, err := qs.cache.Get(x.DataKey(attr, uid)) + if err != nil { + return err + } - continue - } else { - val, err = pl.Value(arg.q.ReadTs) - } + vals := make([]types.Val, 1) + switch { + case lang != "": + vals[0], err = pl.ValueForTag(arg.q.ReadTs, lang) + + case isList: + vals, err = pl.AllUntaggedValues(arg.q.ReadTs) + default: + vals[0], err = pl.Value(arg.q.ReadTs) + } + if err != nil { if err == posting.ErrNoValue { continue - } else if err != nil { - return err } + return err + } + for _, val := range vals { // convert data from binary to appropriate format strVal, err := types.Convert(val, types.StringID) if err == nil && matchRegex(strVal, arg.srcFn.regex) { filtered.Uids = append(filtered.Uids, uid) + // NOTE: We only add the uid once. + break } } + } - for i := 0; i < len(arg.out.UidMatrix); i++ { - algo.IntersectWith(arg.out.UidMatrix[i], filtered, arg.out.UidMatrix[i]) - } - } else { - return err + for i := 0; i < len(arg.out.UidMatrix); i++ { + algo.IntersectWith(arg.out.UidMatrix[i], filtered, arg.out.UidMatrix[i]) } + return nil }