From f9b69afa9e224975b807f84f078a8552755db25d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 15 Aug 2023 06:44:05 +0000 Subject: [PATCH] Bump github.com/blevesearch/bleve/v2 from 2.3.7 to 2.3.9 Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.3.7 to 2.3.9. - [Release notes](https://github.com/blevesearch/bleve/releases) - [Commits](https://github.com/blevesearch/bleve/compare/v2.3.7...v2.3.9) --- updated-dependencies: - dependency-name: github.com/blevesearch/bleve/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 18 +- go.sum | 36 +-- .../RoaringBitmap/roaring/.travis.yml | 32 -- .../RoaringBitmap/roaring/README.md | 28 +- .../RoaringBitmap/roaring/arraycontainer.go | 34 +- .../RoaringBitmap/roaring/bitmapcontainer.go | 43 ++- .../github.com/RoaringBitmap/roaring/clz.go | 2 + .../RoaringBitmap/roaring/clz_compat.go | 1 + .../github.com/RoaringBitmap/roaring/ctz.go | 2 + .../RoaringBitmap/roaring/ctz_compat.go | 1 + .../RoaringBitmap/roaring/fastaggregation.go | 4 + .../RoaringBitmap/roaring/popcnt.go | 2 + .../RoaringBitmap/roaring/popcnt_asm.go | 1 + .../RoaringBitmap/roaring/popcnt_compat.go | 1 + .../RoaringBitmap/roaring/popcnt_generic.go | 1 + .../RoaringBitmap/roaring/roaring.go | 261 +++++++++++---- .../RoaringBitmap/roaring/roaringarray.go | 13 +- .../RoaringBitmap/roaring/runcontainer.go | 37 ++- .../roaring/serialization_generic.go | 12 + .../roaring/serialization_littleendian.go | 301 ++++++++++++++++-- .../roaring/serializationfuzz.go | 1 + .../RoaringBitmap/roaring/setutil_arm64.go | 1 + .../RoaringBitmap/roaring/setutil_generic.go | 1 + .../github.com/RoaringBitmap/roaring/smat.go | 3 +- .../github.com/blevesearch/bleve/v2/README.md | 12 +- .../v2/analysis/lang/en/plural_stemmer.go | 174 ++++++++++ .../bleve/v2/index/scorch/scorch.go | 4 + .../v2/index/scorch/snapshot_index_tfr.go | 8 +- .../bleve/v2/index/upsidedown/index_reader.go | 8 +- .../bleve/v2/index/upsidedown/upsidedown.go | 67 ++-- .../blevesearch/bleve/v2/index_impl.go | 16 +- .../blevesearch/bleve/v2/mapping/document.go | 35 +- .../blevesearch/bleve/v2/mapping/index.go | 18 +- .../github.com/blevesearch/bleve/v2/query.go | 25 ++ .../github.com/blevesearch/bleve/v2/search.go | 30 +- .../bleve/v2/search/collector/topn.go | 19 +- .../bleve/v2/search/query/geo_boundingbox.go | 2 + .../v2/search/query/geo_boundingpolygon.go | 2 + .../bleve/v2/search/query/geo_distance.go | 2 + .../bleve/v2/search/query/geo_shape.go | 2 + .../bleve/v2/search/query/numeric_range.go | 1 + .../blevesearch/bleve/v2/search/search.go | 4 - .../bleve/v2/search/searcher/search_fuzzy.go | 13 +- .../search/searcher/search_geoboundingbox.go | 11 +- .../searcher/search_geopointdistance.go | 9 +- .../v2/search/searcher/search_geopolygon.go | 9 +- .../v2/search/searcher/search_geoshape.go | 9 +- .../search/searcher/search_numeric_range.go | 6 +- .../bleve/v2/search/searcher/search_regexp.go | 3 +- .../bleve/v2/search/searcher/search_term.go | 14 + .../v2/search/searcher/search_term_prefix.go | 3 +- .../v2/search/searcher/search_term_range.go | 3 +- .../blevesearch/bleve/v2/search/util.go | 51 +++ .../github.com/blevesearch/vellum/builder.go | 5 - .../github.com/blevesearch/zapx/v15/merge.go | 4 + .../blevesearch/zapx/v15/posting.go | 9 +- .../blevesearch/zapx/v15/segment.go | 6 +- vendor/modules.txt | 30 +- 58 files changed, 1129 insertions(+), 321 deletions(-) delete mode 100644 vendor/github.com/RoaringBitmap/roaring/.travis.yml create mode 100644 vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go diff --git a/go.mod b/go.mod index cee8df820bb..3dadb9a05cf 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/MicahParks/keyfunc v1.5.1 github.com/Nerzal/gocloak/v13 v13.1.0 github.com/bbalet/stopwords v1.0.0 - github.com/blevesearch/bleve/v2 v2.3.7 + github.com/blevesearch/bleve/v2 v2.3.9 github.com/coreos/go-oidc v2.2.1+incompatible github.com/coreos/go-oidc/v3 v3.6.0 github.com/cs3org/go-cs3apis v0.0.0-20230516150832-730ac860c71d @@ -115,7 +115,7 @@ require ( github.com/Microsoft/go-winio v0.6.0 // indirect github.com/OneOfOne/xxhash v1.2.8 // indirect github.com/ProtonMail/go-crypto v0.0.0-20220930113650-c6815a8c17ad // indirect - github.com/RoaringBitmap/roaring v0.9.4 // indirect + github.com/RoaringBitmap/roaring v1.2.3 // indirect github.com/acomagu/bufpipe v1.0.3 // indirect github.com/agnivade/levenshtein v1.1.1 // indirect github.com/ajg/form v1.5.1 // indirect @@ -134,16 +134,16 @@ require ( github.com/blevesearch/go-porterstemmer v1.0.3 // indirect github.com/blevesearch/gtreap v0.1.1 // indirect github.com/blevesearch/mmap-go v1.0.4 // indirect - github.com/blevesearch/scorch_segment_api/v2 v2.1.4 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.1.5 // indirect github.com/blevesearch/segment v0.9.1 // indirect github.com/blevesearch/snowballstem v0.9.0 // indirect github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect - github.com/blevesearch/vellum v1.0.9 // indirect - github.com/blevesearch/zapx/v11 v11.3.7 // indirect - github.com/blevesearch/zapx/v12 v12.3.7 // indirect - github.com/blevesearch/zapx/v13 v13.3.7 // indirect - github.com/blevesearch/zapx/v14 v14.3.7 // indirect - github.com/blevesearch/zapx/v15 v15.3.9 // indirect + github.com/blevesearch/vellum v1.0.10 // indirect + github.com/blevesearch/zapx/v11 v11.3.9 // indirect + github.com/blevesearch/zapx/v12 v12.3.9 // indirect + github.com/blevesearch/zapx/v13 v13.3.9 // indirect + github.com/blevesearch/zapx/v14 v14.3.9 // indirect + github.com/blevesearch/zapx/v15 v15.3.12 // indirect github.com/bluele/gcache v0.0.2 // indirect github.com/bmizerany/pat v0.0.0-20210406213842-e4b6760bdd6f // indirect github.com/bombsimon/logrusr/v3 v3.1.0 // indirect diff --git a/go.sum b/go.sum index aca23cc0cbd..4ec8d52dfab 100644 --- a/go.sum +++ b/go.sum @@ -669,8 +669,8 @@ github.com/ProtonMail/go-crypto v0.0.0-20220930113650-c6815a8c17ad h1:QeeqI2zxxg github.com/ProtonMail/go-crypto v0.0.0-20220930113650-c6815a8c17ad/go.mod h1:UBYPn8k0D56RtnR8RFQMjmh4KrZzWJ5o7Z9SYjossQ8= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo= -github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA= +github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY= +github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/acomagu/bufpipe v1.0.3 h1:fxAGrHZTgQ9w5QqVItgzwj235/uYZYgbXitB+dLupOk= @@ -746,8 +746,8 @@ github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edY github.com/bits-and-blooms/bitset v1.2.1 h1:M+/hrU9xlMp7t4TyTDQW97d3tRPVuKFC6zBEK16QnXY= github.com/bits-and-blooms/bitset v1.2.1/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= -github.com/blevesearch/bleve/v2 v2.3.7 h1:nIfIrhv28tvgBpbVF8Dq7/U1zW/YiwSqg/PBgE3x8bo= -github.com/blevesearch/bleve/v2 v2.3.7/go.mod h1:2tToYD6mDeseIA13jcZiEEqYrVLg6xdk0v6+F7dWquU= +github.com/blevesearch/bleve/v2 v2.3.9 h1:pUMvK0mxAexqasZcVj8lazmWnEW5XiV0tASIqANiNTQ= +github.com/blevesearch/bleve/v2 v2.3.9/go.mod h1:1PibElcjlQMQHF9uS9mRv58ODQgj4pCWHA1Wfd+qagU= github.com/blevesearch/bleve_index_api v1.0.5 h1:Lc986kpC4Z0/n1g3gg8ul7H+lxgOQPcXb9SxvQGu+tw= github.com/blevesearch/bleve_index_api v1.0.5/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms= github.com/blevesearch/geo v0.1.17 h1:AguzI6/5mHXapzB0gE9IKWo+wWPHZmXZoscHcjFgAFA= @@ -758,26 +758,26 @@ github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZG github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= -github.com/blevesearch/scorch_segment_api/v2 v2.1.4 h1:LmGmo5twU3gV+natJbKmOktS9eMhokPGKWuR+jX84vk= -github.com/blevesearch/scorch_segment_api/v2 v2.1.4/go.mod h1:PgVnbbg/t1UkgezPDu8EHLi1BHQ17xUwsFdU6NnOYS0= +github.com/blevesearch/scorch_segment_api/v2 v2.1.5 h1:1g713kpCQZ8u4a3stRGBfrwVOuGRnmxOVU5MQkUPrHU= +github.com/blevesearch/scorch_segment_api/v2 v2.1.5/go.mod h1:f2nOkKS1HcjgIWZgDAErgBdxmr2eyt0Kn7IY+FU1Xe4= github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= -github.com/blevesearch/vellum v1.0.9 h1:PL+NWVk3dDGPCV0hoDu9XLLJgqU4E5s/dOeEJByQ2uQ= -github.com/blevesearch/vellum v1.0.9/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k= -github.com/blevesearch/zapx/v11 v11.3.7 h1:Y6yIAF/DVPiqZUA/jNgSLXmqewfzwHzuwfKyfdG+Xaw= -github.com/blevesearch/zapx/v11 v11.3.7/go.mod h1:Xk9Z69AoAWIOvWudNDMlxJDqSYGf90LS0EfnaAIvXCA= -github.com/blevesearch/zapx/v12 v12.3.7 h1:DfQ6rsmZfEK4PzzJJRXjiM6AObG02+HWvprlXQ1Y7eI= -github.com/blevesearch/zapx/v12 v12.3.7/go.mod h1:SgEtYIBGvM0mgIBn2/tQE/5SdrPXaJUaT/kVqpAPxm0= -github.com/blevesearch/zapx/v13 v13.3.7 h1:igIQg5eKmjw168I7av0Vtwedf7kHnQro/M+ubM4d2l8= -github.com/blevesearch/zapx/v13 v13.3.7/go.mod h1:yyrB4kJ0OT75UPZwT/zS+Ru0/jYKorCOOSY5dBzAy+s= -github.com/blevesearch/zapx/v14 v14.3.7 h1:gfe+fbWslDWP/evHLtp/GOvmNM3sw1BbqD7LhycBX20= -github.com/blevesearch/zapx/v14 v14.3.7/go.mod h1:9J/RbOkqZ1KSjmkOes03AkETX7hrXT0sFMpWH4ewC4w= -github.com/blevesearch/zapx/v15 v15.3.9 h1:/s9zqKxFaZKQTTcMO2b/Tup0ch5MSztlvw+frVDfIBk= -github.com/blevesearch/zapx/v15 v15.3.9/go.mod h1:m7Y6m8soYUvS7MjN9eKlz1xrLCcmqfFadmu7GhWIrLY= +github.com/blevesearch/vellum v1.0.10 h1:HGPJDT2bTva12hrHepVT3rOyIKFFF4t7Gf6yMxyMIPI= +github.com/blevesearch/vellum v1.0.10/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k= +github.com/blevesearch/zapx/v11 v11.3.9 h1:y3ijS4h4MJdmQ07MHASxat4owAixreK2xdo76w9ncrw= +github.com/blevesearch/zapx/v11 v11.3.9/go.mod h1:jcAYnQwlr+LqD2vLjDWjWiZDXDXGFqPbpPDRTd3XmS4= +github.com/blevesearch/zapx/v12 v12.3.9 h1:MXGLlZ03oxXH3DMJTZaBaRj2xb6t4wQVZeZK/wu1M6w= +github.com/blevesearch/zapx/v12 v12.3.9/go.mod h1:QXCMwmOkdLnMDgTN1P4CcuX5F851iUOtOwXbw0HMBYs= +github.com/blevesearch/zapx/v13 v13.3.9 h1:+VAz9V0VmllHXlZV4DCvfYj0nqaZHgF3MeEHwOyRBwQ= +github.com/blevesearch/zapx/v13 v13.3.9/go.mod h1:s+WjNp4WSDtrBVBpa37DUOd7S/Gr/jTZ7ST/MbCVj/0= +github.com/blevesearch/zapx/v14 v14.3.9 h1:wuqxATgsTCNHM9xsOFOeFp8H2heZ/gMX/tsl9lRK8U4= +github.com/blevesearch/zapx/v14 v14.3.9/go.mod h1:MWZ4v8AzFBRurhDzkLvokFW8ljcq9Evm27mkWe8OGbM= +github.com/blevesearch/zapx/v15 v15.3.12 h1:w/kU9aHyfMDEdwHGZzCiakC3HZ9z5gYlXaALDC4Dct8= +github.com/blevesearch/zapx/v15 v15.3.12/go.mod h1:tx53gDJS/7Oa3Je820cmVurqCuJ4dqdAy1kiDMV/IUo= github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw= github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= diff --git a/vendor/github.com/RoaringBitmap/roaring/.travis.yml b/vendor/github.com/RoaringBitmap/roaring/.travis.yml deleted file mode 100644 index 0a4c4e918f6..00000000000 --- a/vendor/github.com/RoaringBitmap/roaring/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -language: go -sudo: false -install: -- go get -t github.com/RoaringBitmap/roaring -- go get -t golang.org/x/tools/cmd/cover -- go get -t github.com/mattn/goveralls -- go get -t github.com/mschoch/smat -notifications: - email: false -go: -- "1.13.x" -- "1.14.x" -- tip - -# whitelist -branches: - only: - - master -script: -- goveralls -v -service travis-ci -ignore rle16_gen.go,rle_gen.go,rle.go || go test -- go test -race -run TestConcurrent* -- go build -tags appengine -- go test -tags appengine -- GOARCH=arm64 go build -- GOARCH=386 go build -- GOARCH=386 go test -- GOARCH=arm go build -- GOARCH=arm64 go build - -matrix: - allow_failures: - - go: tip diff --git a/vendor/github.com/RoaringBitmap/roaring/README.md b/vendor/github.com/RoaringBitmap/roaring/README.md index 2a7a1290609..753b8068b41 100644 --- a/vendor/github.com/RoaringBitmap/roaring/README.md +++ b/vendor/github.com/RoaringBitmap/roaring/README.md @@ -1,4 +1,4 @@ -roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) +roaring [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) [![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring) ![Go-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-CI/badge.svg) ![Go-ARM-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-ARM-CI/badge.svg) @@ -7,10 +7,8 @@ roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https This is a go version of the Roaring bitmap data structure. - - Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and -[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing. +[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing. [lucene]: https://lucene.apache.org/ [solr]: https://lucene.apache.org/solr/ @@ -18,7 +16,7 @@ Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene [druid]: https://druid.apache.org/ [spark]: https://spark.apache.org/ [opensearchserver]: http://www.opensearchserver.com -[cloudtorrent]: https://github.com/jpillora/cloud-torrent +[anacrolix/torrent]: https://github.com/anacrolix/torrent [whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home [pilosa]: https://www.pilosa.com/ [kylin]: http://kylin.apache.org/ @@ -32,7 +30,7 @@ Roaring bitmaps are found to work well in many important applications: The ``roaring`` Go library is used by -* [Cloud Torrent](https://github.com/jpillora/cloud-torrent) +* [anacrolix/torrent] * [runv](https://github.com/hyperhq/runv) * [InfluxDB](https://www.influxdata.com) * [Pilosa](https://www.pilosa.com/) @@ -42,6 +40,7 @@ The ``roaring`` Go library is used by * [SourceGraph](https://github.com/sourcegraph/sourcegraph) * [M3](https://github.com/m3db/m3) * [trident](https://github.com/NetApp/trident) +* [Husky](https://www.datadoghq.com/blog/engineering/introducing-husky/) This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com). @@ -148,10 +147,8 @@ formats like WAH, EWAH, Concise... Maybe surprisingly, Roaring also generally of - Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) - Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, Better bitmap performance with Roaring bitmaps, -Software: Practice and Experience 46 (5), 2016. -http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html -- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549 - +Software: Practice and Experience 46 (5), 2016.[arXiv:1402.6407](http://arxiv.org/abs/1402.6407) This paper used data from http://lemire.me/data/realroaring2014.html +- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. [arXiv:1603.06549](http://arxiv.org/abs/1603.06549) ### Dependencies @@ -170,6 +167,15 @@ Note that the smat library requires Go 1.6 or better. - go get -t github.com/RoaringBitmap/roaring +### Instructions for contributors + +Using bash or other common shells: +``` +$ git clone git@github.com:RoaringBitmap/roaring.git +$ export GO111MODULE=on +$ go mod tidy +$ go test -v +``` ### Example @@ -325,7 +331,7 @@ Only the 32-bit roaring format is standard and cross-operable between Java, C++, ### Documentation -Current documentation is available at http://godoc.org/github.com/RoaringBitmap/roaring and http://godoc.org/github.com/RoaringBitmap/roaring64 +Current documentation is available at https://pkg.go.dev/github.com/RoaringBitmap/roaring and https://pkg.go.dev/github.com/RoaringBitmap/roaring/roaring64 ### Goroutine safety diff --git a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go index 80b7eecf7ef..9541fd53693 100644 --- a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go @@ -1007,16 +1007,42 @@ func (ac *arrayContainer) containerType() contype { return arrayContype } -func (ac *arrayContainer) addOffset(x uint16) []container { - low := &arrayContainer{} - high := &arrayContainer{} +func (ac *arrayContainer) addOffset(x uint16) (container, container) { + var low, high *arrayContainer + + if len(ac.content) == 0 { + return nil, nil + } + + if y := uint32(ac.content[0]) + uint32(x); highbits(y) == 0 { + // Some elements will fall into low part, allocate a container. + // Checking the first one is enough because they are ordered. + low = &arrayContainer{} + } + if y := uint32(ac.content[len(ac.content)-1]) + uint32(x); highbits(y) > 0 { + // Some elements will fall into high part, allocate a container. + // Checking the last one is enough because they are ordered. + high = &arrayContainer{} + } + for _, val := range ac.content { y := uint32(val) + uint32(x) if highbits(y) > 0 { + // OK, if high == nil then highbits(y) == 0 for all y. high.content = append(high.content, lowbits(y)) } else { + // OK, if low == nil then highbits(y) > 0 for all y. low.content = append(low.content, lowbits(y)) } } - return []container{low, high} + + // Ensure proper nil interface. + if low == nil { + return nil, high + } + if high == nil { + return low, nil + } + + return low, high } diff --git a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go index f8367da0e5f..71029f4ff76 100644 --- a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go @@ -350,7 +350,6 @@ func (bc *bitmapContainer) getCardinality() int { return bc.cardinality } - func (bc *bitmapContainer) isEmpty() bool { return bc.cardinality == 0 } @@ -1125,15 +1124,20 @@ func (bc *bitmapContainer) containerType() contype { return bitmapContype } -func (bc *bitmapContainer) addOffset(x uint16) []container { - low := newBitmapContainer() - high := newBitmapContainer() +func (bc *bitmapContainer) addOffset(x uint16) (container, container) { + var low, high *bitmapContainer + + if bc.cardinality == 0 { + return nil, nil + } + b := uint32(x) >> 6 i := uint32(x) % 64 end := uint32(1024) - b + + low = newBitmapContainer() if i == 0 { copy(low.bitmap[b:], bc.bitmap[:end]) - copy(high.bitmap[:b], bc.bitmap[end:]) } else { low.bitmap[b] = bc.bitmap[0] << i for k := uint32(1); k < end; k++ { @@ -1141,6 +1145,26 @@ func (bc *bitmapContainer) addOffset(x uint16) []container { newval |= bc.bitmap[k-1] >> (64 - i) low.bitmap[b+k] = newval } + } + low.computeCardinality() + + if low.cardinality == bc.cardinality { + // All elements from bc ended up in low, meaning high will be empty. + return low, nil + } + + if low.cardinality == 0 { + // low is empty, let's reuse the container for high. + high = low + low = nil + } else { + // None of the containers will be empty, so allocate both. + high = newBitmapContainer() + } + + if i == 0 { + copy(high.bitmap[:b], bc.bitmap[end:]) + } else { for k := end; k < 1024; k++ { newval := bc.bitmap[k] << i newval |= bc.bitmap[k-1] >> (64 - i) @@ -1148,7 +1172,12 @@ func (bc *bitmapContainer) addOffset(x uint16) []container { } high.bitmap[b] = bc.bitmap[1023] >> (64 - i) } - low.computeCardinality() high.computeCardinality() - return []container{low, high} + + // Ensure proper nil interface. + if low == nil { + return nil, high + } + + return low, high } diff --git a/vendor/github.com/RoaringBitmap/roaring/clz.go b/vendor/github.com/RoaringBitmap/roaring/clz.go index bcd80d32f08..ee0ebc6c96d 100644 --- a/vendor/github.com/RoaringBitmap/roaring/clz.go +++ b/vendor/github.com/RoaringBitmap/roaring/clz.go @@ -1,4 +1,6 @@ +//go:build go1.9 // +build go1.9 + // "go1.9", from Go version 1.9 onward // See https://golang.org/pkg/go/build/#hdr-Build_Constraints diff --git a/vendor/github.com/RoaringBitmap/roaring/clz_compat.go b/vendor/github.com/RoaringBitmap/roaring/clz_compat.go index eeef4de35bb..7ee16b4aeb6 100644 --- a/vendor/github.com/RoaringBitmap/roaring/clz_compat.go +++ b/vendor/github.com/RoaringBitmap/roaring/clz_compat.go @@ -1,3 +1,4 @@ +//go:build !go1.9 // +build !go1.9 package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/ctz.go b/vendor/github.com/RoaringBitmap/roaring/ctz.go index e399dddebd0..fbcfe9128a5 100644 --- a/vendor/github.com/RoaringBitmap/roaring/ctz.go +++ b/vendor/github.com/RoaringBitmap/roaring/ctz.go @@ -1,4 +1,6 @@ +//go:build go1.9 // +build go1.9 + // "go1.9", from Go version 1.9 onward // See https://golang.org/pkg/go/build/#hdr-Build_Constraints diff --git a/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go b/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go index 80220e6bee0..d01df825a0b 100644 --- a/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go +++ b/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go @@ -1,3 +1,4 @@ +//go:build !go1.9 // +build !go1.9 package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go b/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go index 47bda7125df..7d0a92fe0eb 100644 --- a/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go +++ b/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go @@ -121,6 +121,10 @@ func (x1 *Bitmap) repairAfterLazy() { // FastAnd computes the intersection between many bitmaps quickly // Compared to the And function, it can take many bitmaps as input, thus saving the trouble // of manually calling "And" many times. +// +// Performance hints: if you have very large and tiny bitmaps, +// it may be beneficial performance-wise to put a tiny bitmap +// in first position. func FastAnd(bitmaps ...*Bitmap) *Bitmap { if len(bitmaps) == 0 { return NewBitmap() diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt.go b/vendor/github.com/RoaringBitmap/roaring/popcnt.go index 9d99508ce0f..b4980aadb8c 100644 --- a/vendor/github.com/RoaringBitmap/roaring/popcnt.go +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt.go @@ -1,4 +1,6 @@ +//go:build go1.9 // +build go1.9 + // "go1.9", from Go version 1.9 onward // See https://golang.org/pkg/go/build/#hdr-Build_Constraints diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go b/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go index 882d7f4ecfa..ba2dac91efb 100644 --- a/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go @@ -1,3 +1,4 @@ +//go:build amd64 && !appengine && !go1.9 // +build amd64,!appengine,!go1.9 package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go b/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go index 7ae82d4c830..5933e52fc98 100644 --- a/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go @@ -1,3 +1,4 @@ +//go:build !go1.9 // +build !go1.9 package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go b/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go index edf2083f190..4ae6d5afa4e 100644 --- a/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go @@ -1,3 +1,4 @@ +//go:build !amd64 || appengine || go1.9 // +build !amd64 appengine go1.9 package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/roaring.go b/vendor/github.com/RoaringBitmap/roaring/roaring.go index 53068e4d933..7220da272c0 100644 --- a/vendor/github.com/RoaringBitmap/roaring/roaring.go +++ b/vendor/github.com/RoaringBitmap/roaring/roaring.go @@ -53,6 +53,59 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { return rb.highlowcontainer.toBytes() } +// Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for +// using bitmaps as elements in hash sets or as keys in hash maps, as well as +// generally quicker comparisons. +// The implementation is biased towards efficiency in little endian machines, so +// expect some extra CPU cycles and memory to be used if your machine is big endian. +// Likewise, don't use this to verify integrity unless you're certain you'll load +// the bitmap on a machine with the same endianess used to create it. +func (rb *Bitmap) Checksum() uint64 { + const ( + offset = 14695981039346656037 + prime = 1099511628211 + ) + + var bytes []byte + + hash := uint64(offset) + + bytes = uint16SliceAsByteSlice(rb.highlowcontainer.keys) + + for _, b := range bytes { + hash ^= uint64(b) + hash *= prime + } + + for _, c := range rb.highlowcontainer.containers { + // 0 separator + hash ^= 0 + hash *= prime + + switch c := c.(type) { + case *bitmapContainer: + bytes = uint64SliceAsByteSlice(c.bitmap) + case *arrayContainer: + bytes = uint16SliceAsByteSlice(c.content) + case *runContainer16: + bytes = interval16SliceAsByteSlice(c.iv) + default: + panic("invalid container type") + } + + if len(bytes) == 0 { + panic("empty containers are not supported") + } + + for _, b := range bytes { + hash ^= uint64(b) + hash *= prime + } + } + + return hash +} + // ReadFrom reads a serialized version of this bitmap from stream. // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: @@ -218,6 +271,14 @@ type intIterator struct { hs uint32 iter shortPeekable highlowcontainer *roaringArray + + // These embedded iterators per container type help reduce load in the GC. + // This way, instead of making up-to 64k allocations per full iteration + // we get a single allocation and simply reinitialize the appropriate + // iterator and point to it in the generic `iter` member on each key bound. + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -227,8 +288,19 @@ func (ii *intIterator) HasNext() bool { func (ii *intIterator) init() { if ii.highlowcontainer.size() > ii.pos { - ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getShortIterator() ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 + c := ii.highlowcontainer.getContainerAtIndex(ii.pos) + switch t := c.(type) { + case *arrayContainer: + ii.shortIter = shortIterator{t.content, 0} + ii.iter = &ii.shortIter + case *runContainer16: + ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0} + ii.iter = &ii.runIter + case *bitmapContainer: + ii.bitmapIter = bitmapContainerShortIterator{t, t.NextSetBit(0)} + ii.iter = &ii.bitmapIter + } } } @@ -249,14 +321,14 @@ func (ii *intIterator) PeekNext() uint32 { // AdvanceIfNeeded advances as long as the next value is smaller than minval func (ii *intIterator) AdvanceIfNeeded(minval uint32) { - to := minval >> 16 + to := minval & 0xffff0000 - for ii.HasNext() && (ii.hs>>16) < to { + for ii.HasNext() && ii.hs < to { ii.pos++ ii.init() } - if ii.HasNext() && (ii.hs>>16) == to { + if ii.HasNext() && ii.hs == to { ii.iter.advanceIfNeeded(lowbits(minval)) if !ii.iter.hasNext() { @@ -266,12 +338,17 @@ func (ii *intIterator) AdvanceIfNeeded(minval uint32) { } } -func newIntIterator(a *Bitmap) *intIterator { - p := new(intIterator) +// IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) +type IntIterator = intIterator + + +// Initialize configures the existing iterator so that it can iterate through the values of +// the provided bitmap. +// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). +func (p *intIterator) Initialize(a *Bitmap) { p.pos = 0 p.highlowcontainer = &a.highlowcontainer p.init() - return p } type intReverseIterator struct { @@ -279,6 +356,10 @@ type intReverseIterator struct { hs uint32 iter shortIterable highlowcontainer *roaringArray + + shortIter reverseIterator + runIter runReverseIterator16 + bitmapIter reverseBitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -288,8 +369,30 @@ func (ii *intReverseIterator) HasNext() bool { func (ii *intReverseIterator) init() { if ii.pos >= 0 { - ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator() ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 + c := ii.highlowcontainer.getContainerAtIndex(ii.pos) + switch t := c.(type) { + case *arrayContainer: + ii.shortIter = reverseIterator{t.content, len(t.content) - 1} + ii.iter = &ii.shortIter + case *runContainer16: + index := int(len(t.iv)) - 1 + pos := uint16(0) + + if index >= 0 { + pos = t.iv[index].length + } + + ii.runIter = runReverseIterator16{rc: t, curIndex: index, curPosInIndex: pos} + ii.iter = &ii.runIter + case *bitmapContainer: + pos := -1 + if t.cardinality > 0 { + pos = int(t.maximum()) + } + ii.bitmapIter = reverseBitmapContainerShortIterator{t, pos} + ii.iter = &ii.bitmapIter + } } else { ii.iter = nil } @@ -305,12 +408,16 @@ func (ii *intReverseIterator) Next() uint32 { return x } -func newIntReverseIterator(a *Bitmap) *intReverseIterator { - p := new(intReverseIterator) +// IntReverseIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) +type IntReverseIterator = intReverseIterator + +// Initialize configures the existing iterator so that it can iterate through the values of +// the provided bitmap. +// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). +func (p *intReverseIterator) Initialize(a *Bitmap) { p.highlowcontainer = &a.highlowcontainer p.pos = a.highlowcontainer.size() - 1 p.init() - return p } // ManyIntIterable allows you to iterate over the values in a Bitmap @@ -326,12 +433,27 @@ type manyIntIterator struct { hs uint32 iter manyIterable highlowcontainer *roaringArray + + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerManyIterator } func (ii *manyIntIterator) init() { if ii.highlowcontainer.size() > ii.pos { - ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getManyIterator() ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 + c := ii.highlowcontainer.getContainerAtIndex(ii.pos) + switch t := c.(type) { + case *arrayContainer: + ii.shortIter = shortIterator{t.content, 0} + ii.iter = &ii.shortIter + case *runContainer16: + ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0} + ii.iter = &ii.runIter + case *bitmapContainer: + ii.bitmapIter = bitmapContainerManyIterator{t, -1, 0} + ii.iter = &ii.bitmapIter + } } else { ii.iter = nil } @@ -373,12 +495,17 @@ func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int { return n } -func newManyIntIterator(a *Bitmap) *manyIntIterator { - p := new(manyIntIterator) + +// ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) +type ManyIntIterator = manyIntIterator + +// Initialize configures the existing iterator so that it can iterate through the values of +// the provided bitmap. +// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). +func (p *manyIntIterator) Initialize(a *Bitmap) { p.pos = 0 p.highlowcontainer = &a.highlowcontainer p.init() - return p } // String creates a string representation of the Bitmap @@ -410,7 +537,7 @@ func (rb *Bitmap) String() string { // Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns // false, the iteration is halted. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). -// There is no guarantee as to what order the values will be iterated +// There is no guarantee as to what order the values will be iterated. func (rb *Bitmap) Iterate(cb func(x uint32) bool) { for i := 0; i < rb.highlowcontainer.size(); i++ { hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16 @@ -442,19 +569,25 @@ func (rb *Bitmap) Iterate(cb func(x uint32) bool) { // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) Iterator() IntPeekable { - return newIntIterator(rb) + p := new(intIterator) + p.Initialize(rb) + return p } // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) ReverseIterator() IntIterable { - return newIntReverseIterator(rb) + p := new(intReverseIterator) + p.Initialize(rb) + return p } // ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) ManyIterator() ManyIntIterable { - return newManyIntIterator(rb) + p := new(manyIntIterator) + p.Initialize(rb) + return p } // Clone creates a copy of the Bitmap @@ -466,11 +599,17 @@ func (rb *Bitmap) Clone() *Bitmap { // Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty func (rb *Bitmap) Minimum() uint32 { + if len(rb.highlowcontainer.containers) == 0 { + panic("Empty bitmap") + } return uint32(rb.highlowcontainer.containers[0].minimum()) | (uint32(rb.highlowcontainer.keys[0]) << 16) } // Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty func (rb *Bitmap) Maximum() uint32 { + if len(rb.highlowcontainer.containers) == 0 { + panic("Empty bitmap") + } lastindex := len(rb.highlowcontainer.containers) - 1 return uint32(rb.highlowcontainer.containers[lastindex].maximum()) | (uint32(rb.highlowcontainer.keys[lastindex]) << 16) } @@ -514,34 +653,38 @@ func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) { containerOffset64 = offset >> 16 } - if containerOffset64 >= (1<<16) || containerOffset64 <= -(1<<16) { - return New() + answer = New() + + if containerOffset64 >= (1<<16) || containerOffset64 < -(1<<16) { + return answer } containerOffset := int32(containerOffset64) inOffset := (uint16)(offset - containerOffset64*(1<<16)) if inOffset == 0 { - answer = x.Clone() - for pos := 0; pos < answer.highlowcontainer.size(); pos++ { - key := int32(answer.highlowcontainer.getKeyAtIndex(pos)) + for pos := 0; pos < x.highlowcontainer.size(); pos++ { + key := int32(x.highlowcontainer.getKeyAtIndex(pos)) key += containerOffset if key >= 0 && key <= MaxUint16 { - answer.highlowcontainer.keys[pos] = uint16(key) + c := x.highlowcontainer.getContainerAtIndex(pos).clone() + answer.highlowcontainer.appendContainer(uint16(key), c, false) } } } else { - answer = New() - for pos := 0; pos < x.highlowcontainer.size(); pos++ { key := int32(x.highlowcontainer.getKeyAtIndex(pos)) key += containerOffset + if key+1 < 0 || key > MaxUint16 { + continue + } + c := x.highlowcontainer.getContainerAtIndex(pos) - offsetted := c.addOffset(inOffset) + lo, hi := c.addOffset(inOffset) - if !offsetted[0].isEmpty() && (key >= 0 && key <= MaxUint16) { + if lo != nil && key >= 0 { curSize := answer.highlowcontainer.size() lastkey := int32(0) @@ -551,15 +694,15 @@ func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) { if curSize > 0 && lastkey == key { prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1) - orrseult := prev.ior(offsetted[0]) - answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult) + orresult := prev.ior(lo) + answer.highlowcontainer.setContainerAtIndex(curSize-1, orresult) } else { - answer.highlowcontainer.appendContainer(uint16(key), offsetted[0], false) + answer.highlowcontainer.appendContainer(uint16(key), lo, false) } } - if !offsetted[1].isEmpty() && ((key+1) >= 0 && (key+1) <= MaxUint16) { - answer.highlowcontainer.appendContainer(uint16(key+1), offsetted[1], false) + if hi != nil && key+1 <= MaxUint16 { + answer.highlowcontainer.appendContainer(uint16(key+1), hi, false) } } } @@ -693,10 +836,6 @@ func (rb *Bitmap) Rank(x uint32) uint64 { // the smallest element. Note that this function differs in convention from // the Rank function which returns 1 on the smallest value. func (rb *Bitmap) Select(x uint32) (uint32, error) { - if rb.GetCardinality() <= uint64(x) { - return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) - } - remaining := x for i := 0; i < rb.highlowcontainer.size(); i++ { c := rb.highlowcontainer.getContainerAtIndex(i) @@ -860,6 +999,28 @@ main: return answer } +// IntersectsWithInterval checks whether a bitmap 'rb' and an open interval '[x,y)' intersect. +func (rb *Bitmap) IntersectsWithInterval(x, y uint64) bool { + if x >= y { + return false + } + if x > MaxUint32 { + return false + } + + it := intIterator{} + it.Initialize(rb) + it.AdvanceIfNeeded(uint32(x)) + if !it.HasNext() { + return false + } + if uint64(it.Next()) >= y { + return false + } + + return true +} + // Intersects checks whether two bitmap intersects, bitmaps are not modified func (rb *Bitmap) Intersects(x2 *Bitmap) bool { pos1 := 0 @@ -1552,27 +1713,3 @@ func (rb *Bitmap) Stats() Statistics { } return stats } - -func (rb *Bitmap) checkValidity() bool { - for _, c := range rb.highlowcontainer.containers { - - switch c.(type) { - case *arrayContainer: - if c.getCardinality() > arrayDefaultMaxSize { - fmt.Println("Array containers are limited to size ", arrayDefaultMaxSize) - return false - } - case *bitmapContainer: - if c.getCardinality() <= arrayDefaultMaxSize { - fmt.Println("Bitmaps would be more concise as an array!") - return false - } - case *runContainer16: - if c.getSizeInBytes() > minOfInt(bitmapContainerSizeInBytes(), arrayContainerSizeInBytes(c.getCardinality())) { - fmt.Println("Inefficient run container!") - return false - } - } - } - return true -} \ No newline at end of file diff --git a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go index f7b7d732bf9..eeb3d313159 100644 --- a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go +++ b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go @@ -4,12 +4,15 @@ import ( "bytes" "encoding/binary" "fmt" - "io" "github.com/RoaringBitmap/roaring/internal" + "io" ) type container interface { - addOffset(uint16) []container + // addOffset returns the (low, high) parts of the shifted container. + // Whenever one of them would be empty, nil will be returned instead to + // avoid unnecessary allocations. + addOffset(uint16) (container, container) clone() container and(container) container @@ -551,9 +554,9 @@ func (ra *roaringArray) toBytes() ([]byte, error) { } func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) { - var cookie uint32 + var cookie uint32 var err error - if len(cookieHeader) > 0 && len(cookieHeader) != 4 { + if len(cookieHeader) > 0 && len(cookieHeader) != 4 { return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header") } if len(cookieHeader) == 4 { @@ -645,7 +648,7 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte } nb := runContainer16{ - iv: byteSliceAsInterval16Slice(buf), + iv: byteSliceAsInterval16Slice(buf), } ra.containers[i] = &nb diff --git a/vendor/github.com/RoaringBitmap/roaring/runcontainer.go b/vendor/github.com/RoaringBitmap/roaring/runcontainer.go index a722760b488..4ce48a294ca 100644 --- a/vendor/github.com/RoaringBitmap/roaring/runcontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/runcontainer.go @@ -2281,7 +2281,7 @@ func runArrayUnionToRuns(rc *runContainer16, ac *arrayContainer) ([]interval16, pos2++ } } - cardMinusOne += previousInterval.length + 1 + cardMinusOne += previousInterval.length target = append(target, previousInterval) return target, cardMinusOne @@ -2582,9 +2582,27 @@ func (rc *runContainer16) serializedSizeInBytes() int { return 2 + len(rc.iv)*4 } -func (rc *runContainer16) addOffset(x uint16) []container { - low := newRunContainer16() - high := newRunContainer16() +func (rc *runContainer16) addOffset(x uint16) (container, container) { + var low, high *runContainer16 + + if len(rc.iv) == 0 { + return nil, nil + } + + first := uint32(rc.iv[0].start) + uint32(x) + if highbits(first) == 0 { + // Some elements will fall into low part, allocate a container. + // Checking the first one is enough because they are ordered. + low = newRunContainer16() + } + last := uint32(rc.iv[len(rc.iv)-1].start) + last += uint32(rc.iv[len(rc.iv)-1].length) + last += uint32(x) + if highbits(last) > 0 { + // Some elements will fall into high part, allocate a container. + // Checking the last one is enough because they are ordered. + high = newRunContainer16() + } for _, iv := range rc.iv { val := int(iv.start) + int(x) @@ -2600,5 +2618,14 @@ func (rc *runContainer16) addOffset(x uint16) []container { high.iv = append(high.iv, interval16{uint16(val & 0xffff), iv.length}) } } - return []container{low, high} + + // Ensure proper nil interface. + if low == nil { + return nil, high + } + if high == nil { + return low, nil + } + + return low, high } diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go b/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go index 90a336cdaec..7e1f1802cb4 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go +++ b/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go @@ -1,3 +1,4 @@ +//go:build (!amd64 && !386 && !arm && !arm64 && !ppc64le && !mipsle && !mips64le && !mips64p32le && !wasm) || appengine // +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine package roaring @@ -84,6 +85,17 @@ func uint16SliceAsByteSlice(slice []uint16) []byte { return by } +func interval16SliceAsByteSlice(slice []interval16) []byte { + by := make([]byte, len(slice)*4) + + for i, v := range slice { + binary.LittleEndian.PutUint16(by[i*2:], v.start) + binary.LittleEndian.PutUint16(by[i*2+2:], v.length) + } + + return by +} + func byteSliceAsUint16Slice(slice []byte) []uint16 { if len(slice)%2 != 0 { panic("Slice size should be divisible by 2") diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go index 221e173fe48..2e4ea595439 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go +++ b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go @@ -1,3 +1,4 @@ +//go:build (386 && !appengine) || (amd64 && !appengine) || (arm && !appengine) || (arm64 && !appengine) || (ppc64le && !appengine) || (mipsle && !appengine) || (mips64le && !appengine) || (mips64p32le && !appengine) || (wasm && !appengine) // +build 386,!appengine amd64,!appengine arm,!appengine arm64,!appengine ppc64le,!appengine mipsle,!appengine mips64le,!appengine mips64p32le,!appengine wasm,!appengine package roaring @@ -56,6 +57,22 @@ func uint16SliceAsByteSlice(slice []uint16) []byte { return result } +func interval16SliceAsByteSlice(slice []interval16) []byte { + // make a new slice header + header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + + // update its capacity and length + header.Len *= 4 + header.Cap *= 4 + + // instantiate result and use KeepAlive so data isn't unmapped. + result := *(*[]byte)(unsafe.Pointer(&header)) + runtime.KeepAlive(&slice) + + // return it + return result +} + func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { return uint64SliceAsByteSlice(bc.bitmap) } @@ -134,7 +151,124 @@ func byteSliceAsInterval16Slice(slice []byte) (result []interval16) { return } -// FromBuffer creates a bitmap from its serialized version stored in buffer. +func byteSliceAsContainerSlice(slice []byte) (result []container) { + var c container + containerSize := int(unsafe.Sizeof(c)) + + if len(slice)%containerSize != 0 { + panic("Slice size should be divisible by unsafe.Sizeof(container)") + } + // reference: https://go101.org/article/unsafe.html + + // make a new slice header + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) + + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / containerSize + rHeader.Cap = bHeader.Cap / containerSize + + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return +} + +func byteSliceAsBitsetSlice(slice []byte) (result []bitmapContainer) { + bitsetSize := int(unsafe.Sizeof(bitmapContainer{})) + if len(slice)%bitsetSize != 0 { + panic("Slice size should be divisible by unsafe.Sizeof(bitmapContainer)") + } + // reference: https://go101.org/article/unsafe.html + + // make a new slice header + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) + + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / bitsetSize + rHeader.Cap = bHeader.Cap / bitsetSize + + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return +} + +func byteSliceAsArraySlice(slice []byte) (result []arrayContainer) { + arraySize := int(unsafe.Sizeof(arrayContainer{})) + if len(slice)%arraySize != 0 { + panic("Slice size should be divisible by unsafe.Sizeof(arrayContainer)") + } + // reference: https://go101.org/article/unsafe.html + + // make a new slice header + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) + + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / arraySize + rHeader.Cap = bHeader.Cap / arraySize + + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return +} + +func byteSliceAsRun16Slice(slice []byte) (result []runContainer16) { + run16Size := int(unsafe.Sizeof(runContainer16{})) + if len(slice)%run16Size != 0 { + panic("Slice size should be divisible by unsafe.Sizeof(runContainer16)") + } + // reference: https://go101.org/article/unsafe.html + + // make a new slice header + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) + + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / run16Size + rHeader.Cap = bHeader.Cap / run16Size + + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return +} + +func byteSliceAsBoolSlice(slice []byte) (result []bool) { + boolSize := int(unsafe.Sizeof(true)) + if len(slice)%boolSize != 0 { + panic("Slice size should be divisible by unsafe.Sizeof(bool)") + } + // reference: https://go101.org/article/unsafe.html + + // make a new slice header + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) + + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / boolSize + rHeader.Cap = bHeader.Cap / boolSize + + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return +} + +// FrozenView creates a static view of a serialized bitmap stored in buf. // It uses CRoaring's frozen bitmap format. // // The format specification is available here: @@ -198,13 +332,13 @@ func (rb *Bitmap) FrozenView(buf []byte) error { const FROZEN_COOKIE = 13766 var ( - FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE") - FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported") - FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap") - FrozenBitmapOverpopulated = errors.New("too many containers") - FrozenBitmapUnexpectedData = errors.New("spurious data in input") + FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE") + FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported") + FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap") + FrozenBitmapOverpopulated = errors.New("too many containers") + FrozenBitmapUnexpectedData = errors.New("spurious data in input") FrozenBitmapInvalidTypecode = errors.New("unrecognized typecode") - FrozenBitmapBufferTooSmall = errors.New("buffer too small") + FrozenBitmapBufferTooSmall = errors.New("buffer too small") ) func (ra *roaringArray) frozenView(buf []byte) error { @@ -213,14 +347,14 @@ func (ra *roaringArray) frozenView(buf []byte) error { } headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:]) - if headerBE & 0x7fff == FROZEN_COOKIE { + if headerBE&0x7fff == FROZEN_COOKIE { return FrozenBitmapBigEndian } header := binary.LittleEndian.Uint32(buf[len(buf)-4:]) buf = buf[:len(buf)-4] - if header & 0x7fff != FROZEN_COOKIE { + if header&0x7fff != FROZEN_COOKIE { return FrozenBitmapInvalidCookie } @@ -243,29 +377,29 @@ func (ra *roaringArray) frozenView(buf []byte) error { keys := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:]) buf = buf[:len(buf)-2*nCont] - nBitmap, nArray, nRun := uint64(0), uint64(0), uint64(0) - nArrayEl, nRunEl := uint64(0), uint64(0) + nBitmap, nArray, nRun := 0, 0, 0 + nArrayEl, nRunEl := 0, 0 for i, t := range types { - switch (t) { + switch t { case 1: nBitmap++ case 2: nArray++ - nArrayEl += uint64(counts[i])+1 + nArrayEl += int(counts[i]) + 1 case 3: nRun++ - nRunEl += uint64(counts[i]) + nRunEl += int(counts[i]) default: return FrozenBitmapInvalidTypecode } } - if uint64(len(buf)) < (1 << 13)*nBitmap + 4*nRunEl + 2*nArrayEl { + if len(buf) < (1<<13)*nBitmap+4*nRunEl+2*nArrayEl { return FrozenBitmapIncomplete } - bitsetsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBitmap]) - buf = buf[(1 << 13)*nBitmap:] + bitsetsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBitmap]) + buf = buf[(1<<13)*nBitmap:] runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl]) buf = buf[4*nRunEl:] @@ -277,27 +411,44 @@ func (ra *roaringArray) frozenView(buf []byte) error { return FrozenBitmapUnexpectedData } - // TODO: maybe arena_alloc all this. - containers := make([]container, nCont) - bitsets := make([]bitmapContainer, nBitmap) - arrays := make([]arrayContainer, nArray) - runs := make([]runContainer16, nRun) - needCOW := make([]bool, nCont) + var c container + containersSz := int(unsafe.Sizeof(c))*nCont + bitsetsSz := int(unsafe.Sizeof(bitmapContainer{}))*nBitmap + arraysSz := int(unsafe.Sizeof(arrayContainer{}))*nArray + runsSz := int(unsafe.Sizeof(runContainer16{}))*nRun + needCOWSz := int(unsafe.Sizeof(true))*nCont + + bitmapArenaSz := containersSz + bitsetsSz + arraysSz + runsSz + needCOWSz + bitmapArena := make([]byte, bitmapArenaSz) + + containers := byteSliceAsContainerSlice(bitmapArena[:containersSz]) + bitmapArena = bitmapArena[containersSz:] + + bitsets := byteSliceAsBitsetSlice(bitmapArena[:bitsetsSz]) + bitmapArena = bitmapArena[bitsetsSz:] + + arrays := byteSliceAsArraySlice(bitmapArena[:arraysSz]) + bitmapArena = bitmapArena[arraysSz:] - iBitset, iArray, iRun := uint64(0), uint64(0), uint64(0) + runs := byteSliceAsRun16Slice(bitmapArena[:runsSz]) + bitmapArena = bitmapArena[runsSz:] + + needCOW := byteSliceAsBoolSlice(bitmapArena) + + iBitset, iArray, iRun := 0, 0, 0 for i, t := range types { needCOW[i] = true - switch (t) { + switch t { case 1: containers[i] = &bitsets[iBitset] - bitsets[iBitset].cardinality = int(counts[i])+1 + bitsets[iBitset].cardinality = int(counts[i]) + 1 bitsets[iBitset].bitmap = bitsetsArena[:1024] bitsetsArena = bitsetsArena[1024:] iBitset++ case 2: containers[i] = &arrays[iArray] - sz := int(counts[i])+1 + sz := int(counts[i]) + 1 arrays[iArray].content = arraysArena[:sz] arraysArena = arraysArena[sz:] iArray++ @@ -363,13 +514,13 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { } } - serialSize := 4 + 5*nCont + (1 << 13)*nBits + 4*nRunEl + 2*nArrayEl + serialSize := 4 + 5*nCont + (1<<13)*nBits + 4*nRunEl + 2*nArrayEl if len(buf) < serialSize { return 0, FrozenBitmapBufferTooSmall } - bitsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBits]) - buf = buf[(1 << 13)*nBits:] + bitsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBits]) + buf = buf[(1<<13)*nBits:] runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl]) buf = buf[4*nRunEl:] @@ -386,7 +537,7 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { types := buf[:nCont] buf = buf[nCont:] - header := uint32(FROZEN_COOKIE|(nCont << 15)) + header := uint32(FROZEN_COOKIE | (nCont << 15)) binary.LittleEndian.PutUint32(buf[:4], header) copy(keys, bm.highlowcontainer.keys[:]) @@ -396,13 +547,13 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { case *bitmapContainer: copy(bitsArena, v.bitmap) bitsArena = bitsArena[1024:] - counts[i] = uint16(v.cardinality-1) + counts[i] = uint16(v.cardinality - 1) types[i] = 1 case *arrayContainer: copy(arraysArena, v.content) arraysArena = arraysArena[len(v.content):] elems := len(v.content) - counts[i] = uint16(elems-1) + counts[i] = uint16(elems - 1) types[i] = 2 case *runContainer16: copy(runsArena, v.iv) @@ -415,3 +566,87 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { return serialSize, nil } + +func (bm *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) { + // FIXME: this is a naive version that iterates 4 times through the + // containers and allocates 3*len(containers) bytes; it's quite likely + // it can be done more efficiently. + containers := bm.highlowcontainer.containers + written := 0 + + for _, c := range containers { + c, ok := c.(*bitmapContainer) + if !ok { + continue + } + n, err := wr.Write(uint64SliceAsByteSlice(c.bitmap)) + written += n + if err != nil { + return written, err + } + } + + for _, c := range containers { + c, ok := c.(*runContainer16) + if !ok { + continue + } + n, err := wr.Write(interval16SliceAsByteSlice(c.iv)) + written += n + if err != nil { + return written, err + } + } + + for _, c := range containers { + c, ok := c.(*arrayContainer) + if !ok { + continue + } + n, err := wr.Write(uint16SliceAsByteSlice(c.content)) + written += n + if err != nil { + return written, err + } + } + + n, err := wr.Write(uint16SliceAsByteSlice(bm.highlowcontainer.keys)) + written += n + if err != nil { + return written, err + } + + countTypeBuf := make([]byte, 3*len(containers)) + counts := byteSliceAsUint16Slice(countTypeBuf[:2*len(containers)]) + types := countTypeBuf[2*len(containers):] + + for i, c := range containers { + switch c := c.(type) { + case *bitmapContainer: + counts[i] = uint16(c.cardinality - 1) + types[i] = 1 + case *arrayContainer: + elems := len(c.content) + counts[i] = uint16(elems - 1) + types[i] = 2 + case *runContainer16: + runs := len(c.iv) + counts[i] = uint16(runs) + types[i] = 3 + } + } + + n, err = wr.Write(countTypeBuf) + written += n + if err != nil { + return written, err + } + + header := uint32(FROZEN_COOKIE | (len(containers) << 15)) + if err := binary.Write(wr, binary.LittleEndian, header); err != nil { + return written, err + } + written += 4 + + return written, nil +} diff --git a/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go b/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go index 5eaa22202cf..c7fed02b13a 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go +++ b/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go @@ -1,3 +1,4 @@ +//go:build gofuzz // +build gofuzz package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go b/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go index debca813c4a..3e089650edf 100644 --- a/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go +++ b/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go @@ -1,3 +1,4 @@ +//go:build arm64 && !gccgo && !appengine // +build arm64,!gccgo,!appengine package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/setutil_generic.go b/vendor/github.com/RoaringBitmap/roaring/setutil_generic.go index 9edcc9025e7..4755fd5482b 100644 --- a/vendor/github.com/RoaringBitmap/roaring/setutil_generic.go +++ b/vendor/github.com/RoaringBitmap/roaring/setutil_generic.go @@ -1,3 +1,4 @@ +//go:build !arm64 || gccgo || appengine // +build !arm64 gccgo appengine package roaring diff --git a/vendor/github.com/RoaringBitmap/roaring/smat.go b/vendor/github.com/RoaringBitmap/roaring/smat.go index 972cd244da7..c52c5f07cf5 100644 --- a/vendor/github.com/RoaringBitmap/roaring/smat.go +++ b/vendor/github.com/RoaringBitmap/roaring/smat.go @@ -1,3 +1,4 @@ +//go:build gofuzz // +build gofuzz /* @@ -62,8 +63,8 @@ import ( "fmt" "sort" - "github.com/mschoch/smat" "github.com/bits-and-blooms/bitset" + "github.com/mschoch/smat" ) // fuzz test using state machine driven by byte stream. diff --git a/vendor/github.com/blevesearch/bleve/v2/README.md b/vendor/github.com/blevesearch/bleve/v2/README.md index 34f57a4e098..a89be4dd9a9 100644 --- a/vendor/github.com/blevesearch/bleve/v2/README.md +++ b/vendor/github.com/blevesearch/bleve/v2/README.md @@ -9,7 +9,7 @@ [![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) +A modern text indexing library in go ## Features @@ -24,8 +24,8 @@ modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) * [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md) * Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry * [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) Scoring -* Boosting -* Search result match highlighting +* Query time boosting +* Search result match highlighting with document fragments * Aggregations/faceting support: * Terms Facet * Numeric Range Facet @@ -97,6 +97,12 @@ Flags: Use "bleve [command] --help" for more information about a command. ``` +## Text Analysis + +Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages: + +Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr) + ## Text Analysis Wizard [bleveanalysis.couchbase.com](https://bleveanalysis.couchbase.com) diff --git a/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go b/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go new file mode 100644 index 00000000000..0de7c1bbf60 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go @@ -0,0 +1,174 @@ +/* + This code was ported from the Open Search Project + https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java + The algorithm itself was created by Mark Harwood + https://github.com/markharwood +*/ + +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package en + +import ( + "strings" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const PluralStemmerName = "stemmer_en_plural" + +type EnglishPluralStemmerFilter struct { +} + +func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter { + return &EnglishPluralStemmerFilter{} +} + +func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { + for _, token := range input { + token.Term = []byte(stem(string(token.Term))) + } + + return input +} + +func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { + return NewEnglishPluralStemmerFilter(), nil +} + +func init() { + registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor) +} + +// ---------------------------------------------------------------------------- + +// Words ending in oes that retain the e when stemmed +var oesExceptions = []string{"shoes", "canoes", "oboes"} + +// Words ending in ches that retain the e when stemmed +var chesExceptions = []string{ + "cliches", + "avalanches", + "mustaches", + "moustaches", + "quiches", + "headaches", + "heartaches", + "porsches", + "tranches", + "caches", +} + +func stem(word string) string { + runes := []rune(strings.ToLower(word)) + + if len(runes) < 3 || runes[len(runes)-1] != 's' { + return string(runes) + } + + switch runes[len(runes)-2] { + case 'u': + fallthrough + case 's': + return string(runes) + case 'e': + // Modified ies->y logic from original s-stemmer - only work on strings > 4 + // so spies -> spy still but pies->pie. + // The original code also special-cased aies and eies for no good reason as far as I can tell. + // ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies ) + if len(runes) > 4 && runes[len(runes)-3] == 'i' { + runes[len(runes)-3] = 'y' + return string(runes[0 : len(runes)-2]) + } + + // Suffix rules to remove any dangling "e" + if len(runes) > 3 { + // xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe") + if len(runes) > 4 && runes[len(runes)-3] == 'x' { + return string(runes[0 : len(runes)-2]) + } + + // oes + if len(runes) > 3 && runes[len(runes)-3] == 'o' { + if isException(runes, oesExceptions) { + // Only remove the S + return string(runes[0 : len(runes)-1]) + } + // Remove the es + return string(runes[0 : len(runes)-2]) + } + + if len(runes) > 4 { + // shes/sses + if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') { + return string(runes[0 : len(runes)-2]) + } + + // ches + if len(runes) > 4 { + if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' { + if isException(runes, chesExceptions) { + // Only remove the S + return string(runes[0 : len(runes)-1]) + } + // Remove the es + return string(runes[0 : len(runes)-2]) + } + } + } + } + fallthrough + default: + return string(runes[0 : len(runes)-1]) + } +} + +func isException(word []rune, exceptions []string) bool { + for _, exception := range exceptions { + + exceptionRunes := []rune(exception) + + exceptionPos := len(exceptionRunes) - 1 + wordPos := len(word) - 1 + + matched := true + for exceptionPos >= 0 && wordPos >= 0 { + if exceptionRunes[exceptionPos] != word[wordPos] { + matched = false + break + } + exceptionPos-- + wordPos-- + } + if matched { + return true + } + } + return false +} diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go index a4c88b765d7..f30d795e957 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go @@ -588,6 +588,10 @@ func (s *Scorch) StatsMap() map[string]interface{} { m := s.stats.ToMap() indexSnapshot := s.currentSnapshot() + if indexSnapshot == nil { + return nil + } + defer func() { _ = indexSnapshot.Close() }() diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go index 349620c7186..9f0315fa8f5 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go @@ -102,10 +102,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // this is because there are chances of having a series of loadChunk calls, // and they have to be added together before sending the bytesRead at this point // upstream. - if delta := i.iterators[i.segmentOffset].BytesRead() - prevBytesRead; delta > 0 { - i.incrementBytesRead(delta) + bytesRead := i.iterators[i.segmentOffset].BytesRead() + if bytesRead > prevBytesRead { + i.incrementBytesRead(bytesRead - prevBytesRead) } - return rv, nil } i.segmentOffset++ @@ -204,6 +204,8 @@ func (i *IndexSnapshotTermFieldReader) Close() error { // reader's bytesRead value statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead) } + + search.RecordSearchCost(i.ctx, search.AddM, i.bytesRead) } if i.snapshot != nil { diff --git a/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/index_reader.go b/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/index_reader.go index 5c164fc8ca7..44ccf591adc 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/index_reader.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/index_reader.go @@ -124,16 +124,16 @@ func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields [ } keyBuf := GetRowBuffer() - if tempRow.KeySize() > len(keyBuf) { - keyBuf = make([]byte, 2*tempRow.KeySize()) + if tempRow.KeySize() > len(keyBuf.buf) { + keyBuf.buf = make([]byte, 2*tempRow.KeySize()) } defer PutRowBuffer(keyBuf) - keySize, err := tempRow.KeyTo(keyBuf) + keySize, err := tempRow.KeyTo(keyBuf.buf) if err != nil { return err } - value, err := i.kvreader.Get(keyBuf[:keySize]) + value, err := i.kvreader.Get(keyBuf.buf[:keySize]) if err != nil { return err } diff --git a/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.go index 34c331942b2..6e14e657676 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.go @@ -134,18 +134,23 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { return } +type rowBuffer struct { + buf []byte +} + var rowBufferPool sync.Pool -func GetRowBuffer() []byte { - if rb, ok := rowBufferPool.Get().([]byte); ok { - return rb - } else { - return make([]byte, RowBufferSize) - } +func GetRowBuffer() *rowBuffer { + if rb, ok := rowBufferPool.Get().(*rowBuffer); ok { + return rb + } else { + buf := make([]byte, RowBufferSize) + return &rowBuffer{buf: buf} + } } -func PutRowBuffer(buf []byte) { - rowBufferPool.Put(buf) +func PutRowBuffer(rb *rowBuffer) { + rowBufferPool.Put(rb) } func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) { @@ -169,14 +174,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { - if tfr.DictionaryRowKeySize() > len(rowBuf) { - rowBuf = make([]byte, tfr.DictionaryRowKeySize()) + if tfr.DictionaryRowKeySize() > len(rowBuf.buf) { + rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize()) } - dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) + dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf) if err != nil { return err } - dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1 + dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] += 1 } addKeyBytes += row.KeySize() addValBytes += row.ValueSize() @@ -197,14 +202,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter - if tfr.DictionaryRowKeySize() > len(rowBuf) { - rowBuf = make([]byte, tfr.DictionaryRowKeySize()) + if tfr.DictionaryRowKeySize() > len(rowBuf.buf) { + rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize()) } - dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) + dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf) if err != nil { return err } - dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1 + dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] -= 1 } deleteKeyBytes += row.KeySize() } @@ -541,26 +546,26 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []In switch row := row.(type) { case *TermFrequencyRow: if existingTermKeys != nil { - if row.KeySize() > len(keyBuf) { - keyBuf = make([]byte, row.KeySize()) + if row.KeySize() > len(keyBuf.buf) { + keyBuf.buf = make([]byte, row.KeySize()) } - keySize, _ := row.KeyTo(keyBuf) - if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { + keySize, _ := row.KeyTo(keyBuf.buf) + if _, ok := existingTermKeys[string(keyBuf.buf[:keySize])]; ok { updateRows = append(updateRows, row) - delete(existingTermKeys, string(keyBuf[:keySize])) + delete(existingTermKeys, string(keyBuf.buf[:keySize])) continue } } addRows = append(addRows, row) case *StoredRow: if existingStoredKeys != nil { - if row.KeySize() > len(keyBuf) { - keyBuf = make([]byte, row.KeySize()) + if row.KeySize() > len(keyBuf.buf) { + keyBuf.buf = make([]byte, row.KeySize()) } - keySize, _ := row.KeyTo(keyBuf) - if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { + keySize, _ := row.KeyTo(keyBuf.buf) + if _, ok := existingStoredKeys[string(keyBuf.buf[:keySize])]; ok { updateRows = append(updateRows, row) - delete(existingStoredKeys, string(keyBuf[:keySize])) + delete(existingStoredKeys, string(keyBuf.buf[:keySize])) continue } } @@ -1047,23 +1052,23 @@ func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (* } keyBuf := GetRowBuffer() - if tempRow.KeySize() > len(keyBuf) { - keyBuf = make([]byte, 2*tempRow.KeySize()) + if tempRow.KeySize() > len(keyBuf.buf) { + keyBuf.buf = make([]byte, 2*tempRow.KeySize()) } defer PutRowBuffer(keyBuf) - keySize, err := tempRow.KeyTo(keyBuf) + keySize, err := tempRow.KeyTo(keyBuf.buf) if err != nil { return nil, err } - value, err := kvreader.Get(keyBuf[:keySize]) + value, err := kvreader.Get(keyBuf.buf[:keySize]) if err != nil { return nil, err } if value == nil { return nil, nil } - backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value) + backIndexRow, err := NewBackIndexRowKV(keyBuf.buf[:keySize], value) if err != nil { return nil, err } diff --git a/vendor/github.com/blevesearch/bleve/v2/index_impl.go b/vendor/github.com/blevesearch/bleve/v2/index_impl.go index c5a0c46f44e..b5f115411ef 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index_impl.go +++ b/vendor/github.com/blevesearch/bleve/v2/index_impl.go @@ -474,9 +474,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr // accounted by invoking this callback when the TFR is closed. // 2. the docvalues portion (accounted in collector) and the retrieval // of stored fields bytes (by LoadAndHighlightFields) - var totalBytesRead uint64 + var totalSearchCost uint64 sendBytesRead := func(bytesRead uint64) { - totalBytesRead += bytesRead + totalSearchCost += bytesRead } ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey, @@ -495,11 +495,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr err = serr } if sr != nil { - sr.BytesRead = totalBytesRead + sr.Cost = totalSearchCost } if sr, ok := indexReader.(*scorch.IndexSnapshot); ok { - sr.UpdateIOStats(totalBytesRead) + sr.UpdateIOStats(totalSearchCost) } + + search.RecordSearchCost(ctx, search.DoneM, 0) }() if req.Facets != nil { @@ -574,6 +576,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } } + var storedFieldsCost uint64 for _, hit := range hits { if i.name != "" { hit.Index = i.name @@ -582,9 +585,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr if err != nil { return nil, err } - totalBytesRead += storedFieldsBytes + storedFieldsCost += storedFieldsBytes } + totalSearchCost += storedFieldsCost + search.RecordSearchCost(ctx, search.AddM, storedFieldsCost) + atomic.AddUint64(&i.stats.searches, 1) searchDuration := time.Since(searchStart) atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration)) diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping/document.go b/vendor/github.com/blevesearch/bleve/v2/mapping/document.go index 3f3bbfd38df..7215e5b9272 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping/document.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping/document.go @@ -140,11 +140,11 @@ func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping { return nil } -// documentMappingForPath only returns EXACT matches for a sub document -// or for an explicitly mapped field, if you want to find the -// closest document mapping to a field not explicitly mapped -// use closestDocMapping -func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping { +// documentMappingForPath returns the EXACT and closest matches for a sub +// document or for an explicitly mapped field; the closest most specific +// document mapping could be one that matches part of the provided path. +func (dm *DocumentMapping) documentMappingForPath(path string) ( + *DocumentMapping, *DocumentMapping) { pathElements := decodePath(path) current := dm OUTER: @@ -165,27 +165,9 @@ OUTER: } } - return nil + return nil, current } - return current -} - -// closestDocMapping findest the most specific document mapping that matches -// part of the provided path -func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping { - pathElements := decodePath(path) - current := dm -OUTER: - for _, pathElement := range pathElements { - for name, subDocMapping := range current.Properties { - if name == pathElement { - current = subDocMapping - continue OUTER - } - } - break - } - return current + return current, current } // NewDocumentMapping returns a new document mapping @@ -408,8 +390,7 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) { pathString := encodePath(path) // look to see if there is a mapping for this field - subDocMapping := dm.documentMappingForPath(pathString) - closestDocMapping := dm.closestDocMapping(pathString) + subDocMapping, closestDocMapping := dm.documentMappingForPath(pathString) // check to see if we even need to do further processing if subDocMapping != nil && !subDocMapping.Enabled { diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping/index.go b/vendor/github.com/blevesearch/bleve/v2/mapping/index.go index 1d982dd414f..e2ac99f399e 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping/index.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping/index.go @@ -326,7 +326,7 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{} docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) // see if the _all field was disabled - allMapping := docMapping.documentMappingForPath("_all") + allMapping, _ := docMapping.documentMappingForPath("_all") if allMapping == nil || allMapping.Enabled { field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, index.IndexField|index.IncludeTermVectors) doc.AddField(field) @@ -364,8 +364,9 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string { return analyzerName } } + // now try the default mapping - pathMapping := im.DefaultMapping.documentMappingForPath(path) + pathMapping, _ := im.DefaultMapping.documentMappingForPath(path) if pathMapping != nil { if len(pathMapping.Fields) > 0 { if pathMapping.Fields[0].Analyzer != "" { @@ -377,7 +378,16 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string { // next we will try default analyzers for the path pathDecoded := decodePath(path) for _, docMapping := range im.TypeMapping { - rv := docMapping.defaultAnalyzerName(pathDecoded) + if docMapping.Enabled { + rv := docMapping.defaultAnalyzerName(pathDecoded) + if rv != "" { + return rv + } + } + } + // now the default analyzer for the default mapping + if im.DefaultMapping.Enabled { + rv := im.DefaultMapping.defaultAnalyzerName(pathDecoded) if rv != "" { return rv } @@ -411,7 +421,7 @@ func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string { // first we look for explicit mapping on the field for _, docMapping := range im.TypeMapping { - pathMapping := docMapping.documentMappingForPath(path) + pathMapping, _ := docMapping.documentMappingForPath(path) if pathMapping != nil { if len(pathMapping.Fields) > 0 { if pathMapping.Fields[0].Analyzer != "" { diff --git a/vendor/github.com/blevesearch/bleve/v2/query.go b/vendor/github.com/blevesearch/bleve/v2/query.go index 4f1f136ce31..3385ed779d0 100644 --- a/vendor/github.com/blevesearch/bleve/v2/query.go +++ b/vendor/github.com/blevesearch/bleve/v2/query.go @@ -225,3 +225,28 @@ func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQu func NewIPRangeQuery(cidr string) *query.IPRangeQuery { return query.NewIPRangeQuery(cidr) } + +// NewGeoShapeQuery creates a new Query for matching the given geo shape. +// This method can be used for creating geoshape queries for shape types +// like: point, linestring, polygon, multipoint, multilinestring, +// multipolygon and envelope. +func NewGeoShapeQuery(coordinates [][][][]float64, typ, relation string) (*query.GeoShapeQuery, error) { + return query.NewGeoShapeQuery(coordinates, typ, relation) +} + +// NewGeoShapeCircleQuery creates a new query for a geoshape that is a +// circle given center point and the radius. Radius formats supported: +// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers" +// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters" +// "17mi" "17miles" "19m" "19meters" If the unit cannot be determined, +// the entire string is parsed and the unit of meters is assumed. +func NewGeoShapeCircleQuery(coordinates []float64, radius, relation string) (*query.GeoShapeQuery, error) { + return query.NewGeoShapeCircleQuery(coordinates, radius, relation) +} + +// NewGeometryCollectionQuery creates a new query for the provided +// geometrycollection coordinates and types, which could contain +// multiple geo shapes. +func NewGeometryCollectionQuery(coordinates [][][][][]float64, types []string, relation string) (*query.GeoShapeQuery, error) { + return query.NewGeometryCollectionQuery(coordinates, types, relation) +} diff --git a/vendor/github.com/blevesearch/bleve/v2/search.go b/vendor/github.com/blevesearch/bleve/v2/search.go index acb812adaad..fe426164aae 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search.go +++ b/vendor/github.com/blevesearch/bleve/v2/search.go @@ -485,15 +485,27 @@ func (ss *SearchStatus) Merge(other *SearchStatus) { // A SearchResult describes the results of executing // a SearchRequest. +// +// Status - Whether the search was executed on the underlying indexes successfully +// or failed, and the corresponding errors. +// Request - The SearchRequest that was executed. +// Hits - The list of documents that matched the query and their corresponding +// scores, score explanation, location info and so on. +// Total - The total number of documents that matched the query. +// Cost - indicates how expensive was the query with respect to bytes read +// from the mmaped index files. +// MaxScore - The maximum score seen across all document hits seen for this query. +// Took - The time taken to execute the search. +// Facets - The facet results for the search. type SearchResult struct { - Status *SearchStatus `json:"status"` - Request *SearchRequest `json:"request"` - Hits search.DocumentMatchCollection `json:"hits"` - Total uint64 `json:"total_hits"` - BytesRead uint64 `json:"bytesRead"` - MaxScore float64 `json:"max_score"` - Took time.Duration `json:"took"` - Facets search.FacetResults `json:"facets"` + Status *SearchStatus `json:"status"` + Request *SearchRequest `json:"request"` + Hits search.DocumentMatchCollection `json:"hits"` + Total uint64 `json:"total_hits"` + Cost uint64 `json:"cost"` + MaxScore float64 `json:"max_score"` + Took time.Duration `json:"took"` + Facets search.FacetResults `json:"facets"` } func (sr *SearchResult) Size() int { @@ -566,7 +578,7 @@ func (sr *SearchResult) Merge(other *SearchResult) { sr.Status.Merge(other.Status) sr.Hits = append(sr.Hits, other.Hits...) sr.Total += other.Total - sr.BytesRead += other.BytesRead + sr.Cost += other.Cost if other.MaxScore > sr.MaxScore { sr.MaxScore = other.MaxScore } diff --git a/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go b/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go index 4d19cd4559e..270d5f924f4 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go @@ -200,6 +200,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, hc.needDocIds = hc.needDocIds || loadID select { case <-ctx.Done(): + search.RecordSearchCost(ctx, search.AbortM, 0) return ctx.Err() default: next, err = searcher.Next(searchContext) @@ -208,6 +209,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, if hc.total%CheckDoneEvery == 0 { select { case <-ctx.Done(): + search.RecordSearchCost(ctx, search.AbortM, 0) return ctx.Err() default: } @@ -232,6 +234,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, // total bytes read as part of docValues being read every hit // which must be accounted by invoking the callback. statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead) + + search.RecordSearchCost(ctx, search.AddM, hc.bytesRead) } // help finalize/flush the results in case @@ -367,7 +371,20 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc // SetFacetsBuilder registers a facet builder for this collector func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) { hc.facetsBuilder = facetsBuilder - hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...) + fieldsRequiredForFaceting := facetsBuilder.RequiredFields() + // for each of these fields, append only if not already there in hc.neededFields. + for _, field := range fieldsRequiredForFaceting { + found := false + for _, neededField := range hc.neededFields { + if field == neededField { + found = true + break + } + } + if !found { + hc.neededFields = append(hc.neededFields, field) + } + } } // finalizeResults starts with the heap containing the final top size+skip diff --git a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingbox.go b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingbox.go index ac9125393b5..1397c77996a 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingbox.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingbox.go @@ -63,6 +63,8 @@ func (q *GeoBoundingBoxQuery) Searcher(ctx context.Context, i index.IndexReader, field = m.DefaultSearchField() } + ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo) + if q.BottomRight[0] < q.TopLeft[0] { // cross date line, rewrite as two parts diff --git a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingpolygon.go b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingpolygon.go index 467f39b28df..baae514d961 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingpolygon.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_boundingpolygon.go @@ -61,6 +61,8 @@ func (q *GeoBoundingPolygonQuery) Searcher(ctx context.Context, i index.IndexRea field = m.DefaultSearchField() } + ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo) + return searcher.NewGeoBoundedPolygonSearcher(ctx, i, q.Points, field, q.BoostVal.Value(), options) } diff --git a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_distance.go b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_distance.go index f05bf67234f..7977d1538b3 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_distance.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_distance.go @@ -64,6 +64,8 @@ func (q *GeoDistanceQuery) Searcher(ctx context.Context, i index.IndexReader, m field = m.DefaultSearchField() } + ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo) + dist, err := geo.ParseDistance(q.Distance) if err != nil { return nil, err diff --git a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_shape.go b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_shape.go index a63ec80f7ee..2229dbe9c6f 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/query/geo_shape.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/query/geo_shape.go @@ -107,6 +107,8 @@ func (q *GeoShapeQuery) Searcher(ctx context.Context, i index.IndexReader, field = m.DefaultSearchField() } + ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo) + return searcher.NewGeoShapeSearcher(ctx, i, q.Geometry.Shape, q.Geometry.Relation, field, q.BoostVal.Value(), options) } diff --git a/vendor/github.com/blevesearch/bleve/v2/search/query/numeric_range.go b/vendor/github.com/blevesearch/bleve/v2/search/query/numeric_range.go index ad24741677f..205ceecf693 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/query/numeric_range.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/query/numeric_range.go @@ -77,6 +77,7 @@ func (q *NumericRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m if q.FieldVal == "" { field = m.DefaultSearchField() } + ctx = context.WithValue(ctx, search.QueryTypeKey, search.Numeric) return searcher.NewNumericRangeSearcher(ctx, i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options) } diff --git a/vendor/github.com/blevesearch/bleve/v2/search/search.go b/vendor/github.com/blevesearch/bleve/v2/search/search.go index 69d8945f9ce..d2dd33712e6 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/search.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/search.go @@ -27,10 +27,6 @@ var reflectStaticSizeDocumentMatch int var reflectStaticSizeSearchContext int var reflectStaticSizeLocation int -const SearchIOStatsCallbackKey = "_search_io_stats_callback_key" - -type SearchIOStatsCallbackFunc func(uint64) - func init() { var dm DocumentMatch reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size()) diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_fuzzy.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_fuzzy.go index 9423b611e60..5345c272bcc 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_fuzzy.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_fuzzy.go @@ -59,7 +59,8 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s } if ctx != nil { - reportIOStats(dictBytesRead, ctx) + reportIOStats(ctx, dictBytesRead) + search.RecordSearchCost(ctx, search.AddM, dictBytesRead) } return NewMultiTermSearcher(ctx, indexReader, candidates, field, @@ -71,13 +72,15 @@ type fuzzyCandidates struct { bytesRead uint64 } -func reportIOStats(bytesRead uint64, ctx context.Context) { +func reportIOStats(ctx context.Context, bytesRead uint64) { // The fuzzy, regexp like queries essentially load a dictionary, // which potentially incurs a cost that must be accounted by // using the callback to report the value. - statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey) - if statsCallbackFn != nil { - statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead) + if ctx != nil { + statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey) + if statsCallbackFn != nil { + statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead) + } } } diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoboundingbox.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoboundingbox.go index 05ca1bf9597..c889ddce06a 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoboundingbox.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoboundingbox.go @@ -49,7 +49,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade return nil, err } - return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(dvReader, + return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat)), nil } } @@ -85,7 +85,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade } // add filter to check points near the boundary onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher, - buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat)) + buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat)) openedSearchers = append(openedSearchers, onBoundarySearcher) } @@ -201,7 +201,7 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel return isIndexed, closeF, err } -func buildRectFilter(dvReader index.DocValueReader, field string, +func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string, minLon, minLat, maxLon, maxLat float64) FilterFunc { return func(d *search.DocumentMatch) bool { // check geo matches against all numeric type terms indexed @@ -222,6 +222,11 @@ func buildRectFilter(dvReader index.DocValueReader, field string, } }) if err == nil && found { + bytes := dvReader.BytesRead() + if bytes > 0 { + reportIOStats(ctx, bytes) + search.RecordSearchCost(ctx, search.AddM, bytes) + } for i := range lons { if geo.BoundingBoxContains(lons[i], lats[i], minLon, minLat, maxLon, maxLat) { diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopointdistance.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopointdistance.go index 01ed2092991..fbe958953a2 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopointdistance.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopointdistance.go @@ -66,7 +66,7 @@ func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexRea // wrap it in a filtering searcher which checks the actual distance return NewFilteringSearcher(ctx, rectSearcher, - buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil + buildDistFilter(ctx, dvReader, field, centerLon, centerLat, dist)), nil } // boxSearcher builds a searcher for the described bounding box @@ -113,7 +113,7 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader, return boxSearcher, nil } -func buildDistFilter(dvReader index.DocValueReader, field string, +func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string, centerLon, centerLat, maxDist float64) FilterFunc { return func(d *search.DocumentMatch) bool { // check geo matches against all numeric type terms indexed @@ -134,6 +134,11 @@ func buildDistFilter(dvReader index.DocValueReader, field string, } }) if err == nil && found { + bytes := dvReader.BytesRead() + if bytes > 0 { + reportIOStats(ctx, bytes) + search.RecordSearchCost(ctx, search.AddM, bytes) + } for i := range lons { dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat) if dist <= maxDist/1000 { diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopolygon.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopolygon.go index 1d6538adf05..a43edafbb5f 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopolygon.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geopolygon.go @@ -71,7 +71,7 @@ func NewGeoBoundedPolygonSearcher(ctx context.Context, indexReader index.IndexRe // wrap it in a filtering searcher that checks for the polygon inclusivity return NewFilteringSearcher(ctx, rectSearcher, - buildPolygonFilter(dvReader, field, coordinates)), nil + buildPolygonFilter(ctx, dvReader, field, coordinates)), nil } const float64EqualityThreshold = 1e-6 @@ -83,7 +83,7 @@ func almostEqual(a, b float64) bool { // buildPolygonFilter returns true if the point lies inside the // polygon. It is based on the ray-casting technique as referred // here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html -func buildPolygonFilter(dvReader index.DocValueReader, field string, +func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string, coordinates []geo.Point) FilterFunc { return func(d *search.DocumentMatch) bool { // check geo matches against all numeric type terms indexed @@ -107,6 +107,11 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string, // Note: this approach works for points which are strictly inside // the polygon. ie it might fail for certain points on the polygon boundaries. if err == nil && found { + bytes := dvReader.BytesRead() + if bytes > 0 { + reportIOStats(ctx, bytes) + search.RecordSearchCost(ctx, search.AddM, bytes) + } nVertices := len(coordinates) if len(coordinates) < 3 { return false diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoshape.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoshape.go index d2c6b1c55df..1107c94386a 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoshape.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_geoshape.go @@ -54,7 +54,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha } return NewFilteringSearcher(ctx, mSearcher, - buildRelationFilterOnShapes(dvReader, field, relation, shape)), nil + buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil } @@ -63,7 +63,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha // implementation of doc values. var termSeparatorSplitSlice = []byte{0xff} -func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string, +func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string, relation string, shape index.GeoJSON) FilterFunc { // this is for accumulating the shape's actual complete value // spread across multiple docvalue visitor callbacks. @@ -116,6 +116,11 @@ func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string, }) if err == nil && found { + bytes := dvReader.BytesRead() + if bytes > 0 { + reportIOStats(ctx, bytes) + search.RecordSearchCost(ctx, search.AddM, bytes) + } return found } diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_numeric_range.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_numeric_range.go index 68728c94c49..f086051c11f 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_numeric_range.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_numeric_range.go @@ -88,7 +88,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader, // reporting back the IO stats with respect to the dictionary // loaded, using the context if ctx != nil { - reportIOStats(dictBytesRead, ctx) + reportIOStats(ctx, dictBytesRead) + search.RecordSearchCost(ctx, search.AddM, dictBytesRead) } // cannot return MatchNoneSearcher because of interaction with @@ -110,7 +111,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader, } if ctx != nil { - reportIOStats(dictBytesRead, ctx) + reportIOStats(ctx, dictBytesRead) + search.RecordSearchCost(ctx, search.AddM, dictBytesRead) } return NewMultiTermSearcherBytes(ctx, indexReader, terms, field, diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_regexp.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_regexp.go index b419d547076..b88133e31a4 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_regexp.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_regexp.go @@ -102,7 +102,8 @@ func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, patte } if ctx != nil { - reportIOStats(dictBytesRead, ctx) + reportIOStats(ctx, dictBytesRead) + search.RecordSearchCost(ctx, search.AddM, dictBytesRead) } return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost, diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term.go index db18e5376f0..cd794ea3223 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term.go @@ -39,6 +39,9 @@ type TermSearcher struct { } func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { + if isTermQuery(ctx) { + ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term) + } return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options) } @@ -140,3 +143,14 @@ func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) ( return nil, nil } + +func isTermQuery(ctx context.Context) bool { + if ctx != nil { + // if the ctx already has a value set for query type + // it would've been done at a non term searcher level. + _, ok := ctx.Value(search.QueryTypeKey).(string) + return !ok + } + // if the context is nil, then don't set the query type + return false +} diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_prefix.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_prefix.go index 89f836a50cf..dc16e486487 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_prefix.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_prefix.go @@ -49,7 +49,8 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p } if ctx != nil { - reportIOStats(fieldDict.BytesRead(), ctx) + reportIOStats(ctx, fieldDict.BytesRead()) + search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead()) } return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true) diff --git a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_range.go b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_range.go index a2fb4e9939d..990c7386b0d 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_range.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_term_range.go @@ -84,7 +84,8 @@ func NewTermRangeSearcher(ctx context.Context, indexReader index.IndexReader, } if ctx != nil { - reportIOStats(fieldDict.BytesRead(), ctx) + reportIOStats(ctx, fieldDict.BytesRead()) + search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead()) } return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true) diff --git a/vendor/github.com/blevesearch/bleve/v2/search/util.go b/vendor/github.com/blevesearch/bleve/v2/search/util.go index 19dd5d68bd9..7a946868e15 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/util.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/util.go @@ -14,6 +14,8 @@ package search +import "context" + func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap { rv := locations[0] @@ -67,3 +69,52 @@ func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch) return dest } + +const SearchIOStatsCallbackKey = "_search_io_stats_callback_key" + +type SearchIOStatsCallbackFunc func(uint64) + +// Implementation of SearchIncrementalCostCallbackFn should handle the following messages +// - add: increment the cost of a search operation +// (which can be specific to a query type as well) +// - abort: query was aborted due to a cancel of search's context (for eg), +// which can be handled differently as well +// - done: indicates that a search was complete and the tracked cost can be +// handled safely by the implementation. +type SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg, + SearchQueryType, uint64) +type SearchIncrementalCostCallbackMsg uint +type SearchQueryType uint + +const ( + Term = SearchQueryType(1 << iota) + Geo + Numeric + GenericCost +) + +const ( + AddM = SearchIncrementalCostCallbackMsg(1 << iota) + AbortM + DoneM +) + +const SearchIncrementalCostKey = "_search_incremental_cost_key" +const QueryTypeKey = "_query_type_key" + +func RecordSearchCost(ctx context.Context, + msg SearchIncrementalCostCallbackMsg, bytes uint64) { + if ctx != nil { + queryType, ok := ctx.Value(QueryTypeKey).(SearchQueryType) + if !ok { + // for the cost of the non query type specific factors such as + // doc values and stored fields section. + queryType = GenericCost + } + + aggCallbackFn := ctx.Value(SearchIncrementalCostKey) + if aggCallbackFn != nil { + aggCallbackFn.(SearchIncrementalCostCallbackFn)(msg, queryType, bytes) + } + } +} diff --git a/vendor/github.com/blevesearch/vellum/builder.go b/vendor/github.com/blevesearch/vellum/builder.go index f7933295751..7e545cbec15 100644 --- a/vendor/github.com/blevesearch/vellum/builder.go +++ b/vendor/github.com/blevesearch/vellum/builder.go @@ -361,9 +361,6 @@ type builderNode struct { func (n *builderNode) reset() { n.final = false n.finalOutput = 0 - for i := range n.trans { - n.trans[i] = emptyTransition - } n.trans = n.trans[:0] n.next = nil } @@ -393,8 +390,6 @@ func (n *builderNode) equiv(o *builderNode) bool { return true } -var emptyTransition = transition{} - type transition struct { out uint64 addr int diff --git a/vendor/github.com/blevesearch/zapx/v15/merge.go b/vendor/github.com/blevesearch/zapx/v15/merge.go index 887d3447e85..fa406e6dbd1 100644 --- a/vendor/github.com/blevesearch/zapx/v15/merge.go +++ b/vendor/github.com/blevesearch/zapx/v15/merge.go @@ -719,6 +719,10 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, } err := segment.visitStoredFields(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool { fieldID := int(fieldsMap[field]) - 1 + if fieldID < 0 { + // no entry for field in fieldsMap + return false + } vals[fieldID] = append(vals[fieldID], value) typs[fieldID] = append(typs[fieldID], typ) diff --git a/vendor/github.com/blevesearch/zapx/v15/posting.go b/vendor/github.com/blevesearch/zapx/v15/posting.go index c1060f92eb8..ad47df0dd6c 100644 --- a/vendor/github.com/blevesearch/zapx/v15/posting.go +++ b/vendor/github.com/blevesearch/zapx/v15/posting.go @@ -109,7 +109,6 @@ type PostingsList struct { chunkSize uint64 - // atomic access to this variable bytesRead uint64 } @@ -303,12 +302,17 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error { return fmt.Errorf("error loading roaring bitmap: %v", err) } - rv.chunkSize, err = getChunkSize(d.sb.chunkMode, + chunkSize, err := getChunkSize(d.sb.chunkMode, rv.postings.GetCardinality(), d.sb.numDocs) if err != nil { return err + } else if chunkSize == 0 { + return fmt.Errorf("chunk size is zero, chunkMode: %v, numDocs: %v", + d.sb.chunkMode, d.sb.numDocs) } + rv.chunkSize = chunkSize + return nil } @@ -344,7 +348,6 @@ type PostingsIterator struct { includeFreqNorm bool includeLocs bool - // atomic access to this variable bytesRead uint64 } diff --git a/vendor/github.com/blevesearch/zapx/v15/segment.go b/vendor/github.com/blevesearch/zapx/v15/segment.go index d5b291e3735..15bc911aa5a 100644 --- a/vendor/github.com/blevesearch/zapx/v15/segment.go +++ b/vendor/github.com/blevesearch/zapx/v15/segment.go @@ -103,7 +103,7 @@ type SegmentBase struct { fieldDvNames []string // field names cached in fieldDvReaders size uint64 - // atomic access to this variable + // atomic access to these variables bytesRead uint64 bytesWritten uint64 @@ -319,6 +319,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok { // read the length of the vellum data vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) + if vellumLen == 0 { + sb.m.Unlock() + return nil, fmt.Errorf("empty dictionary for field: %v", field) + } fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] rv.incrementBytesRead(uint64(read) + vellumLen) rv.fst, err = vellum.Load(fstBytes) diff --git a/vendor/modules.txt b/vendor/modules.txt index 2dc82e3365e..47f06220900 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -59,7 +59,7 @@ github.com/ProtonMail/go-crypto/openpgp/internal/ecc github.com/ProtonMail/go-crypto/openpgp/internal/encoding github.com/ProtonMail/go-crypto/openpgp/packet github.com/ProtonMail/go-crypto/openpgp/s2k -# github.com/RoaringBitmap/roaring v0.9.4 +# github.com/RoaringBitmap/roaring v1.2.3 ## explicit; go 1.14 github.com/RoaringBitmap/roaring github.com/RoaringBitmap/roaring/internal @@ -155,7 +155,7 @@ github.com/bitly/go-simplejson # github.com/bits-and-blooms/bitset v1.2.1 ## explicit; go 1.14 github.com/bits-and-blooms/bitset -# github.com/blevesearch/bleve/v2 v2.3.7 +# github.com/blevesearch/bleve/v2 v2.3.9 ## explicit; go 1.19 github.com/blevesearch/bleve/v2 github.com/blevesearch/bleve/v2/analysis @@ -208,8 +208,8 @@ github.com/blevesearch/gtreap # github.com/blevesearch/mmap-go v1.0.4 ## explicit; go 1.13 github.com/blevesearch/mmap-go -# github.com/blevesearch/scorch_segment_api/v2 v2.1.4 -## explicit; go 1.18 +# github.com/blevesearch/scorch_segment_api/v2 v2.1.5 +## explicit; go 1.19 github.com/blevesearch/scorch_segment_api/v2 # github.com/blevesearch/segment v0.9.1 ## explicit; go 1.18 @@ -221,26 +221,26 @@ github.com/blevesearch/snowballstem/english # github.com/blevesearch/upsidedown_store_api v1.0.2 ## explicit; go 1.18 github.com/blevesearch/upsidedown_store_api -# github.com/blevesearch/vellum v1.0.9 +# github.com/blevesearch/vellum v1.0.10 ## explicit; go 1.18 github.com/blevesearch/vellum github.com/blevesearch/vellum/levenshtein github.com/blevesearch/vellum/regexp github.com/blevesearch/vellum/utf8 -# github.com/blevesearch/zapx/v11 v11.3.7 -## explicit; go 1.18 +# github.com/blevesearch/zapx/v11 v11.3.9 +## explicit; go 1.19 github.com/blevesearch/zapx/v11 -# github.com/blevesearch/zapx/v12 v12.3.7 -## explicit; go 1.18 +# github.com/blevesearch/zapx/v12 v12.3.9 +## explicit; go 1.19 github.com/blevesearch/zapx/v12 -# github.com/blevesearch/zapx/v13 v13.3.7 -## explicit; go 1.18 +# github.com/blevesearch/zapx/v13 v13.3.9 +## explicit; go 1.19 github.com/blevesearch/zapx/v13 -# github.com/blevesearch/zapx/v14 v14.3.7 -## explicit; go 1.18 +# github.com/blevesearch/zapx/v14 v14.3.9 +## explicit; go 1.19 github.com/blevesearch/zapx/v14 -# github.com/blevesearch/zapx/v15 v15.3.9 -## explicit; go 1.18 +# github.com/blevesearch/zapx/v15 v15.3.12 +## explicit; go 1.19 github.com/blevesearch/zapx/v15 # github.com/bluele/gcache v0.0.2 ## explicit; go 1.15