Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport master <-> experimental search features #5201

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions services/search/pkg/content/cs3.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ func newCS3Retriever(client gateway.GatewayAPIClient, logger log.Logger, insecur

// Retrieve downloads the file from a cs3 service
// The caller MUST make sure to close the returned ReadCloser
func (s cs3) Retrieve(ctx context.Context, rid *provider.ResourceId) (io.ReadCloser, error) {
func (s cs3) Retrieve(ctx context.Context, rID *provider.ResourceId) (io.ReadCloser, error) {
at, ok := contextGet(ctx, revactx.TokenHeader)
if !ok {
return nil, fmt.Errorf("context without %s", revactx.TokenHeader)
}

res, err := s.gwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{Ref: &provider.Reference{ResourceId: rid, Path: "."}})
res, err := s.gwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{Ref: &provider.Reference{ResourceId: rID, Path: "."}})
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion services/search/pkg/content/mocks/Extractor.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions services/search/pkg/content/mocks/Retriever.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion services/search/pkg/content/retriever.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
//
//go:generate mockery --name=Retriever
type Retriever interface {
Retrieve(ctx context.Context, rid *provider.ResourceId) (io.ReadCloser, error)
Retrieve(ctx context.Context, rID *provider.ResourceId) (io.ReadCloser, error)
}

func contextGet(ctx context.Context, k string) (string, bool) {
Expand Down
111 changes: 33 additions & 78 deletions services/search/pkg/engine/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@ package engine
import (
"context"
"errors"
"fmt"
"math"
"path"
"path/filepath"
"strings"
"time"

"github.com/blevesearch/bleve/v2/analysis/token/porter"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"

"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/porter"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search/query"
storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
Expand All @@ -25,9 +25,6 @@ import (
searchMessage "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0"
searchService "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0"
"github.com/owncloud/ocis/v2/services/search/pkg/content"
sq "github.com/owncloud/ocis/v2/services/search/pkg/query"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"google.golang.org/protobuf/types/known/timestamppb"
)

Expand Down Expand Up @@ -119,26 +116,31 @@ func (b *Bleve) Search(_ context.Context, sir *searchService.SearchIndexRequest)
Bool: false,
FieldVal: "Deleted",
},
&query.TermQuery{
FieldVal: "RootID",
Term: storagespace.FormatResourceID(
storageProvider.ResourceId{
StorageId: sir.Ref.GetResourceId().GetStorageId(),
SpaceId: sir.Ref.GetResourceId().GetSpaceId(),
OpaqueId: sir.Ref.GetResourceId().GetOpaqueId(),
},
),
},
// investigate what's wrong and why this is slow, see filter in for loop workaround
//&query.PrefixQuery{
// Prefix: escapeQuery(utils.MakeRelativePath(path.Join(sir.Ref.Path, "/"))),
// FieldVal: "Path",
//},
&query.QueryStringQuery{
Query: b.buildQuery(sir.Query),
Query: formatQuery(sir.Query),
},
)

if sir.Ref != nil {
q.Conjuncts = append(
q.Conjuncts,
&query.TermQuery{
FieldVal: "RootID",
Term: storagespace.FormatResourceID(
storageProvider.ResourceId{
StorageId: sir.Ref.GetResourceId().GetStorageId(),
SpaceId: sir.Ref.GetResourceId().GetSpaceId(),
OpaqueId: sir.Ref.GetResourceId().GetOpaqueId(),
},
),
},
&query.PrefixQuery{
Prefix: utils.MakeRelativePath(path.Join(sir.Ref.Path, "/")),
FieldVal: "Path",
},
)
}

bleveReq := bleve.NewSearchRequest(q)

switch {
Expand All @@ -158,14 +160,6 @@ func (b *Bleve) Search(_ context.Context, sir *searchService.SearchIndexRequest)

matches := []*searchMessage.Match{}
for _, hit := range res.Hits {
// Limit search to this directory in the space
if !strings.HasPrefix(
getValue[string](hit.Fields, "Path"),
utils.MakeRelativePath(path.Join(sir.Ref.Path, "/")),
) {
continue
}

rootID, err := storagespace.ParseID(getValue[string](hit.Fields, "RootID"))
if err != nil {
return nil, err
Expand Down Expand Up @@ -356,56 +350,17 @@ func (b *Bleve) setDeleted(id string, deleted bool) error {
return nil
}

func (b *Bleve) buildQuery(si string) string {
var queries [][]string
var so []string
lexer := sq.NewLexer(strings.NewReader(si))
allowedFields := []string{"content", "title", "tags"}

for {
tok, lit := lexer.Scan()
if tok == sq.TField {
for _, field := range allowedFields {
if strings.EqualFold(field, lit) {
queries = append(queries, []string{cases.Title(language.Und, cases.NoLower).String(lit)})
}
}
}

if tok == sq.TValue {
if len(queries) == 0 {
queries = append(queries, []string{"*"})
}

queries[len(queries)-1] = append(queries[len(queries)-1], lit)
}

if tok == sq.TEof {
break
}
func formatQuery(q string) string {
cq := q
fields := []string{"RootID", "Path", "ID", "Name", "Size", "Mtime", "MimeType", "Type"}
for _, field := range fields {
cq = strings.ReplaceAll(cq, strings.ToLower(field)+":", field+":")
}

for _, q := range queries {
if len(q) <= 1 {
continue
}

fields := []string{q[0]}

if fields[0] == "*" {
fields = []string{"Content", "Name", "Tags"}
}

for _, field := range fields {
ss := strings.ToLower(strings.Join(q[1:], `\ `))

if !strings.Contains(ss, "*") && field != "Content" {
ss = "*" + ss + "*"
}

so = append(so, fmt.Sprintf("%s:%s", field, ss))
}
if strings.Contains(cq, ":") {
return cq // Sophisticated field based search
}

return strings.Join(so, " ")
// this is a basic filename search
return "Name:*" + strings.ReplaceAll(strings.ToLower(cq), " ", `\ `) + "*"
}
Loading