-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[full-ci] experimental search backport (#5221)
* experimental search backport fix basic extractor resource name move escapeQuery regex into global variable minor pr review changes rename DebounceDuration env variable add document title and content when rebuilding bleve resource Co-authored-by: David Christofas <[email protected]>
- Loading branch information
Showing
48 changed files
with
2,777 additions
and
2,245 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
Bugfix: Enhancement search | ||
|
||
Provides multiple enhancement to the current search implementation. | ||
* content extraction, search now supports apache tika to extract resource contents. | ||
* search engine, underlying search engine is swappable now. | ||
* event consumers, the number of event consumers can now be set, which improves the speed of the individual tasks | ||
|
||
https://github.com/owncloud/ocis/pull/5221 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package config | ||
|
||
// Extractor defines which extractor to use | ||
type Extractor struct { | ||
Type string `yaml:"type" env:"SEARCH_EXTRACTOR_TYPE" desc:"Defines the content extraction engine."` | ||
CS3AllowInsecure bool `yaml:"cs3_allow_insecure" env:"OCIS_INSECURE;SEARCH_EXTRACTOR_CS3SOURCE_INSECURE" desc:"Ignore untrusted SSL certificates when connecting to the CS3 source."` | ||
Tika ExtractorTika `yaml:"tika"` | ||
} | ||
|
||
// ExtractorTika configures the Tika extractor | ||
type ExtractorTika struct { | ||
TikaURL string `yaml:"tika_url" env:"SEARCH_EXTRACTOR_TIKA_TIKA_URL" desc:"URL of the tika server."` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package config | ||
|
||
// Engine defines which search engine to use | ||
type Engine struct { | ||
Type string `yaml:"type" env:"SEARCH_ENGINE_TYPE" desc:"Defines which search engine to use."` | ||
Bleve EngineBleve `yaml:"bleve"` | ||
} | ||
|
||
// EngineBleve configures the bleve engine | ||
type EngineBleve struct { | ||
Datapath string `yaml:"data_path" env:"SEARCH_ENGINE_BLEVE_DATA_PATH" desc:"Path for the search persistence directory."` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
package config | ||
|
||
// Reva defines all available REVA configuration. | ||
type Reva struct { | ||
Address string `ocisConfig:"address" env:"REVA_GATEWAY" desc:"The CS3 gateway endpoint."` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package config | ||
|
||
// Events combines the configuration options for the event bus. | ||
type Events struct { | ||
Endpoint string `yaml:"endpoint" env:"SEARCH_EVENTS_ENDPOINT" desc:"The address of the event system. The event system is the message queuing service. It is used as message broker for the microservice architecture."` | ||
Cluster string `yaml:"cluster" env:"SEARCH_EVENTS_CLUSTER" desc:"The clusterID of the event system. The event system is the message queuing service. It is used as message broker for the microservice architecture. Mandatory when using NATS as event system."` | ||
AsyncUploads bool `yaml:"async_uploads" env:"STORAGE_USERS_OCIS_ASYNC_UPLOADS;SEARCH_EVENTS_ASYNC_UPLOADS" desc:"Enable asynchronous file uploads."` | ||
NumConsumers int `yaml:"num_consumers" env:"SEARCH_EVENTS_NUM_CONSUMERS" desc:"number of event consumers per service instance"` | ||
DebounceDuration int `yaml:"debounce_duration" env:"SEARCH_EVENTS_REINDEX_DEBOUNCE_DURATION" desc:"The duration in milliseconds the reindex debouncer waits before triggering a reindex of a space that was modified."` | ||
|
||
TLSInsecure bool `yaml:"tls_insecure" env:"OCIS_INSECURE;SEARCH_EVENTS_TLS_INSECURE" desc:"Whether to verify the server TLS certificates."` | ||
TLSRootCACertificate string `yaml:"tls_root_ca_certificate" env:"SEARCH_EVENTS_TLS_ROOT_CA_CERTIFICATE" desc:"The root CA certificate used to validate the server's TLS certificate. If provided SEARCH_EVENTS_TLS_INSECURE will be seen as false."` | ||
EnableTLS bool `yaml:"enable_tls" env:"OCIS_EVENTS_ENABLE_TLS;SEARCH_EVENTS_ENABLE_TLS" desc:"Enable TLS for the connection to the events broker. The events broker is the ocis service which receives and delivers events between the services.."` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package content | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" | ||
//"github.com/cs3org/reva/v2/pkg/tags" | ||
"github.com/owncloud/ocis/v2/ocis-pkg/log" | ||
) | ||
|
||
// Basic is the simplest Extractor implementation. | ||
type Basic struct { | ||
logger log.Logger | ||
} | ||
|
||
// NewBasicExtractor creates a new Basic instance. | ||
func NewBasicExtractor(logger log.Logger) (*Basic, error) { | ||
return &Basic{logger: logger}, nil | ||
} | ||
|
||
// Extract literally just rearranges the inputs and processes them into a Document. | ||
func (b Basic) Extract(_ context.Context, ri *storageProvider.ResourceInfo) (Document, error) { | ||
doc := Document{ | ||
Name: ri.Name, | ||
Size: ri.Size, | ||
MimeType: ri.MimeType, | ||
} | ||
|
||
//if m := ri.ArbitraryMetadata.GetMetadata(); m != nil { | ||
//if t, ok := m["tags"]; ok { | ||
//doc.Tags = tags.FromList(t).AsSlice() | ||
//} | ||
//} | ||
|
||
if ri.Mtime != nil { | ||
doc.Mtime = time.Unix(int64(ri.Mtime.Seconds), int64(ri.Mtime.Nanos)).UTC().Format(time.RFC3339) | ||
} | ||
|
||
return doc, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package content_test | ||
|
||
import ( | ||
"context" | ||
|
||
storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" | ||
cs3Types "github.com/cs3org/go-cs3apis/cs3/types/v1beta1" | ||
. "github.com/onsi/ginkgo/v2" | ||
. "github.com/onsi/gomega" | ||
"github.com/owncloud/ocis/v2/ocis-pkg/log" | ||
"github.com/owncloud/ocis/v2/services/search/pkg/content" | ||
) | ||
|
||
var _ = Describe("Basic", func() { | ||
var ( | ||
basic content.Extractor | ||
logger = log.NewLogger() | ||
ctx = context.TODO() | ||
) | ||
|
||
BeforeEach(func() { | ||
basic, _ = content.NewBasicExtractor(logger) | ||
}) | ||
|
||
Describe("extract", func() { | ||
It("basic fields", func() { | ||
ri := &storageProvider.ResourceInfo{ | ||
Name: "bar.pdf", | ||
Path: "./foo/bar.pdf", | ||
Size: 1024, | ||
MimeType: "application/pdf", | ||
} | ||
|
||
doc, err := basic.Extract(ctx, ri) | ||
|
||
Expect(err).To(BeNil()) | ||
Expect(doc).ToNot(BeNil()) | ||
Expect(doc.Name).To(Equal(ri.Name)) | ||
Expect(doc.Size).To(Equal(ri.Size)) | ||
Expect(doc.MimeType).To(Equal(ri.MimeType)) | ||
}) | ||
|
||
/*It("adds tags", func() { | ||
for _, data := range []struct { | ||
tags string | ||
expect []string | ||
}{ | ||
{tags: "", expect: []string{}}, | ||
{tags: ",,,", expect: []string{}}, | ||
{tags: ",foo,,", expect: []string{"foo"}}, | ||
{tags: ",foo,,bar,", expect: []string{"foo", "bar"}}, | ||
} { | ||
ri := &storageProvider.ResourceInfo{ | ||
ArbitraryMetadata: &storageProvider.ArbitraryMetadata{ | ||
Metadata: map[string]string{ | ||
"tags": data.tags, | ||
}, | ||
}, | ||
} | ||
doc, err := basic.Extract(ctx, ri) | ||
Expect(err).To(BeNil()) | ||
Expect(doc).ToNot(BeNil()) | ||
Expect(doc.Tags).To(Equal(data.expect)) | ||
} | ||
})*/ | ||
|
||
It("RFC3339 mtime", func() { | ||
for _, data := range []struct { | ||
second uint64 | ||
expect string | ||
}{ | ||
{second: 4000, expect: "1970-01-01T01:06:40Z"}, | ||
{second: 3000, expect: "1970-01-01T00:50:00Z"}, | ||
{expect: ""}, | ||
} { | ||
ri := &storageProvider.ResourceInfo{} | ||
|
||
if data.second != 0 { | ||
ri.Mtime = &cs3Types.Timestamp{Seconds: data.second} | ||
} | ||
|
||
doc, err := basic.Extract(ctx, ri) | ||
Expect(err).To(BeNil()) | ||
Expect(doc).ToNot(BeNil()) | ||
Expect(doc.Mtime).To(Equal(data.expect)) | ||
} | ||
}) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package content | ||
|
||
// Document wraps all resource meta fields, | ||
// it is used as a content extraction result. | ||
type Document struct { | ||
Title string | ||
Name string | ||
Content string | ||
Size uint64 | ||
Mtime string | ||
MimeType string | ||
//Tags []string | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.