Skip to content

Commit

Permalink
Support filtering by facets on values. (#4217)
Browse files Browse the repository at this point in the history
  • Loading branch information
martinmr authored Nov 1, 2019
1 parent 491f6d4 commit 07ed842
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 44 deletions.
7 changes: 7 additions & 0 deletions posting/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,13 @@ func (l *List) ValueFor(readTs uint64, langs []string) (rval types.Val, rerr err
return valueToTypesVal(p), nil
}

// PostingFor returns the posting according to the preferred language list.
func (l *List) PostingFor(readTs uint64, langs []string) (p *pb.Posting, rerr error) {
l.RLock()
defer l.RUnlock()
return l.postingFor(readTs, langs)
}

func (l *List) postingFor(readTs uint64, langs []string) (p *pb.Posting, rerr error) {
l.AssertRLock() // Avoid recursive locking by asserting a lock here.
return l.postingForLangs(readTs, langs)
Expand Down
1 change: 1 addition & 0 deletions query/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ type Node {
}
name : string @index(term, exact, trigram) @count @lang .
alt_name : [string] @index(term, exact, trigram) @count .
alias : string @index(exact, term, fulltext) .
abbr : string .
dob : dateTime @index(year) .
Expand Down
161 changes: 145 additions & 16 deletions query/query_facets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package query

import (
"context"
"fmt"
"testing"

Expand All @@ -35,8 +34,11 @@ func populateClusterWithFacets() {
}

triples := `
<1> <name> "Michelle"@en (origin = "french") .
<25> <name> "Daryl Dixon" .
<25> <alt_name> "Daryl Dick" .
<31> <name> "Andrea" .
<31> <alt_name> "Andy" .
<33> <name> "Michale" .
<320> <name> "Test facet"@en (type = "Test facet with lang") .
Expand Down Expand Up @@ -66,10 +68,12 @@ func populateClusterWithFacets() {
triples += fmt.Sprintf("<31> <friend> <1> %s .\n", friendFacets5)
triples += fmt.Sprintf("<31> <friend> <25> %s .\n", friendFacets6)

nameFacets := "(origin = \"french\")"
nameFacets := "(origin = \"french\", dummy = true)"
triples += fmt.Sprintf("<1> <name> \"Michonne\" %s .\n", nameFacets)
triples += fmt.Sprintf("<23> <name> \"Rick Grimes\" %s .\n", nameFacets)
triples += fmt.Sprintf("<24> <name> \"Glenn Rhee\" %s .\n", nameFacets)
triples += fmt.Sprintf("<1> <alt_name> \"Michelle\" %s .\n", nameFacets)
triples += fmt.Sprintf("<1> <alt_name> \"Michelin\" %s .\n", nameFacets)

addTriplesToCluster(triples)

Expand Down Expand Up @@ -173,8 +177,8 @@ func TestRetrieveFacetsSimple(t *testing.T) {

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data":{"me":[{"name|origin":"french","name":"Michonne","gender":"female"}]}}`,
js)
`{"data":{"me":[{"name|origin":"french","name|dummy":true,"name":"Michonne",
"gender":"female"}]}}`, js)
}

func TestOrderFacets(t *testing.T) {
Expand Down Expand Up @@ -275,7 +279,12 @@ func TestRetrieveFacetsUidValues(t *testing.T) {

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data":{"me":[{"friend":[{"name|origin":"french","name":"Rick Grimes","friend|since":"2006-01-02T15:04:05Z"},{"name|origin":"french","name":"Glenn Rhee","friend|close":true,"friend|family":true,"friend|since":"2004-05-02T15:04:05Z","friend|tag":"Domain3"},{"name":"Daryl Dixon","friend|close":false,"friend|family":true,"friend|since":"2007-05-02T15:04:05Z","friend|tag":34},{"name":"Andrea","friend|since":"2006-01-02T15:04:05Z"},{"friend|age":33,"friend|close":true,"friend|family":false,"friend|since":"2005-05-02T15:04:05Z"}]}]}}`,
`{"data":{"me":[{"friend":[
{"name|origin":"french","name|dummy":true,"name":"Rick Grimes","friend|since":"2006-01-02T15:04:05Z"},
{"name|origin":"french","name|dummy":true,"name":"Glenn Rhee","friend|close":true,"friend|family":true,"friend|since":"2004-05-02T15:04:05Z","friend|tag":"Domain3"},
{"name":"Daryl Dixon","friend|close":false,"friend|family":true,"friend|since":"2007-05-02T15:04:05Z","friend|tag":34},
{"name":"Andrea","friend|since":"2006-01-02T15:04:05Z"},
{"friend|age":33,"friend|close":true,"friend|family":false,"friend|since":"2005-05-02T15:04:05Z"}]}]}}`,
js)
}

Expand All @@ -296,7 +305,14 @@ func TestRetrieveFacetsAll(t *testing.T) {

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data":{"me":[{"name|origin":"french","name":"Michonne","friend":[{"name|origin":"french","name":"Rick Grimes","gender":"male","friend|since":"2006-01-02T15:04:05Z"},{"name|origin":"french","name":"Glenn Rhee","friend|close":true,"friend|family":true,"friend|since":"2004-05-02T15:04:05Z","friend|tag":"Domain3"},{"name":"Daryl Dixon","friend|close":false,"friend|family":true,"friend|since":"2007-05-02T15:04:05Z","friend|tag":34},{"name":"Andrea","friend|since":"2006-01-02T15:04:05Z"},{"friend|age":33,"friend|close":true,"friend|family":false,"friend|since":"2005-05-02T15:04:05Z"}],"gender":"female"}]}}`,
`{"data":{"me":[
{"name|origin":"french","name|dummy":true,"name":"Michonne","friend":[
{"name|origin":"french","name|dummy":true,"name":"Rick Grimes","gender":"male","friend|since":"2006-01-02T15:04:05Z"},
{"name|origin":"french","name|dummy":true,"name":"Glenn Rhee","friend|close":true,"friend|family":true,"friend|since":"2004-05-02T15:04:05Z","friend|tag":"Domain3"},
{"name":"Daryl Dixon","friend|close":false,"friend|family":true,"friend|since":"2007-05-02T15:04:05Z","friend|tag":34},
{"name":"Andrea","friend|since":"2006-01-02T15:04:05Z"},
{"friend|age":33,"friend|close":true,"friend|family":false,"friend|since":"2005-05-02T15:04:05Z"}],
"gender":"female"}]}}`,
js)
}

Expand Down Expand Up @@ -846,21 +862,134 @@ func TestFacetsFilterAllofAndanyofterms(t *testing.T) {
js)
}

func TestFacetsFilterAtValueFail(t *testing.T) {
func TestFacetsFilterAtValueBasic(t *testing.T) {
populateClusterWithFacets()
// facet filtering is not supported at value level.
query := `
{
me(func: uid(1)) {
friend {
name @facets(eq(origin, "french"))
}
me(func: has(name)) {
name @facets(eq(origin, "french"))
}
}
`
}`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"name": "Michonne"}, {"name":"Rick Grimes"}, {"name": "Glenn Rhee"}]}}`,
js)
}

func TestFacetsFilterAtValueListType(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
alt_name @facets(eq(origin, "french"))
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"alt_name": ["Michelle", "Michelin"]}]}}`, js)
}

func TestFacetsFilterAtValueComplex1(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
name @facets(eq(origin, "french") AND eq(dummy, true))
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"name": "Michonne"}, {"name":"Rick Grimes"}, {"name": "Glenn Rhee"}]}}`,
js)
}

func TestFacetsFilterAtValueComplex2(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
name @facets(eq(origin, "french") AND eq(dummy, false))
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t, `{"data": {"me":[]}}`, js)
}

func TestFacetsFilterAtValueWithLangs(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
name@en @facets(eq(origin, "french"))
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"name@en": "Michelle"}]}}`, js)
}

func TestFacetsFilterAtValueWithBadLang(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
name@hi @facets(eq(origin, "french"))
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t, `{"data": {"me":[]}}`, js)
}

_, err := processQuery(context.Background(), t, query)
require.Error(t, err)
func TestFacetsFilterAtValueWithFacet(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
name @facets(eq(origin, "french")) @facets(origin)
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"name": "Michonne", "name|origin": "french"},
{"name": "Rick Grimes", "name|origin": "french"},
{"name": "Glenn Rhee", "name|origin": "french"}]}}`, js)
}

func TestFacetsFilterAtValueWithFacetAndLangs(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
name@en @facets(eq(origin, "french")) @facets(origin)
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"name@en": "Michelle", "name@en|origin": "french"}]}}`, js)
}

func TestFacetsFilterAtValueWithDifferentFacet(t *testing.T) {
populateClusterWithFacets()
query := `
{
me(func: has(name)) {
name @facets(eq(dummy, "true")) @facets(origin)
}
}`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"name": "Michonne", "name|origin": "french"},
{"name": "Rick Grimes", "name|origin": "french"},
{"name": "Glenn Rhee", "name|origin": "french"}]}}`, js)
}

func TestFacetsFilterAndRetrieval(t *testing.T) {
Expand Down
116 changes: 88 additions & 28 deletions worker/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/golang/glog"
otrace "go.opencensus.io/trace"

"github.com/golang/protobuf/proto"
cindex "github.com/google/codesearch/index"
cregexp "github.com/google/codesearch/regexp"
"github.com/pkg/errors"
Expand Down Expand Up @@ -342,7 +343,7 @@ func (qs *queryState) handleValuePostings(ctx context.Context, args funcArgs) er
return nil
}

// This function has small boiletplate as handleUidPostings, around how the code gets
// This function has small boilerplate as handleUidPostings, around how the code gets
// concurrently executed. I didn't see much value in trying to separate it out, because the core
// logic constitutes most of the code volume here.
numGo, width := x.DivideAndRule(srcFn.n)
Expand Down Expand Up @@ -371,24 +372,16 @@ func (qs *queryState) handleValuePostings(ctx context.Context, args funcArgs) er
if err != nil {
return err
}
var vals []types.Val
if q.ExpandAll {
vals, err = pl.AllValues(args.q.ReadTs)
} else if listType && len(q.Langs) == 0 {
vals, err = pl.AllUntaggedValues(args.q.ReadTs)
} else {
var val types.Val
val, err = pl.ValueFor(args.q.ReadTs, q.Langs)
vals = append(vals, val)
}

vals, fcs, err := retrieveValuesAndFacets(args, pl, listType)
if err == posting.ErrNoValue || len(vals) == 0 {
out.UidMatrix = append(out.UidMatrix, &pb.List{})
out.FacetMatrix = append(out.FacetMatrix, &pb.FacetsList{})
if q.DoCount {
out.Counts = append(out.Counts, 0)
} else {
out.ValueMatrix = append(out.ValueMatrix, &pb.ValueList{Values: []*pb.TaskValue{}})
out.ValueMatrix = append(out.ValueMatrix,
&pb.ValueList{Values: []*pb.TaskValue{}})
if q.ExpandAll {
// To keep the cardinality same as that of ValueMatrix.
out.LangMatrix = append(out.LangMatrix, &pb.LangList{})
Expand Down Expand Up @@ -432,22 +425,9 @@ func (qs *queryState) handleValuePostings(ctx context.Context, args funcArgs) er
}
out.ValueMatrix = append(out.ValueMatrix, &vl)

if q.FacetsFilter != nil { // else part means isValueEdge
// This is Value edge and we are asked to do facet filtering. Not supported.
return errors.Errorf("Facet filtering is not supported on values.")
}

// add facets to result.
if q.FacetParam != nil {
fs, err := pl.Facets(args.q.ReadTs, q.FacetParam, q.Langs)
if err != nil {
fs = []*api.Facet{}
}
out.FacetMatrix = append(out.FacetMatrix,
&pb.FacetsList{FacetsList: []*pb.Facets{{Facets: fs}}})
} else {
out.FacetMatrix = append(out.FacetMatrix, &pb.FacetsList{})
}
// Add facets to result.
out.FacetMatrix = append(out.FacetMatrix,
&pb.FacetsList{FacetsList: []*pb.Facets{{Facets: fcs}}})

switch {
case q.DoCount:
Expand Down Expand Up @@ -513,6 +493,86 @@ func (qs *queryState) handleValuePostings(ctx context.Context, args funcArgs) er
return nil
}

func retrieveValuesAndFacets(args funcArgs, pl *posting.List, listType bool) (
[]types.Val, []*api.Facet, error) {
q := args.q
var err error
var vals []types.Val
var fcs []*api.Facet

// Retrieve values when facet filtering is not being requested.
if q.FacetsFilter == nil {
// Retrieve values.
if q.ExpandAll {
vals, err = pl.AllValues(args.q.ReadTs)
} else if listType && len(q.Langs) == 0 {
vals, err = pl.AllUntaggedValues(args.q.ReadTs)
} else {
var val types.Val
val, err = pl.ValueFor(args.q.ReadTs, q.Langs)
vals = append(vals, val)
}
if err != nil {
return nil, nil, err
}

// Retrieve facets.
if q.FacetParam != nil {
fcs, err = pl.Facets(args.q.ReadTs, q.FacetParam, q.Langs)
}
if err != nil {
return nil, nil, err
}

return vals, fcs, nil
}

// Retrieve values when facet filtering is being requested.
facetsTree, err := preprocessFilter(q.FacetsFilter)
if err != nil {
return nil, nil, err
}

// Retrieve the posting that matches the language preferences.
langMatch, err := pl.PostingFor(q.ReadTs, q.Langs)
if err != nil && err != posting.ErrNoValue {
return nil, nil, err
}
err = pl.Iterate(q.ReadTs, 0, func(p *pb.Posting) error {
if listType && len(q.Langs) == 0 {
// Don't retrieve tagged values unless explicitly asked.
if len(p.LangTag) > 0 {
return nil
}
} else {
// Only consider the posting that matches our language preferences.
if !proto.Equal(p, langMatch) {
return nil
}
}

picked, err := applyFacetsTree(p.Facets, facetsTree)
if err != nil {
return err
}
if picked {
vals = append(vals, types.Val{
Tid: types.TypeID(p.ValType),
Value: p.Value,
})
if q.FacetParam != nil {
fcs = append(fcs, facets.CopyFacets(p.Facets, q.FacetParam)...)
}
}
return nil // continue iteration.
})
if err != nil {
return nil, nil, err
}

return vals, fcs, nil
}

// This function handles operations on uid posting lists. Index keys, reverse keys and some data
// keys store uid posting lists.
func (qs *queryState) handleUidPostings(
Expand Down

0 comments on commit 07ed842

Please sign in to comment.