diff --git a/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc index ea272a3e392c5..e31ffd4c2456a 100644 --- a/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc +++ b/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc @@ -11,7 +11,6 @@ The following specialized API is available in the Score context. The following methods are directly callable without a class/instance qualifier. Note parameters denoted by a (*) are treated as read-only values. * double cosineSimilarity(List *, String) -* double cosineSimilaritySparse(Map *, String) * double decayDateExp(String *, String *, String *, double *, JodaCompatibleZonedDateTime) * double decayDateGauss(String *, String *, String *, double *, JodaCompatibleZonedDateTime) * double decayDateLinear(String *, String *, String *, double *, JodaCompatibleZonedDateTime) @@ -22,7 +21,6 @@ The following methods are directly callable without a class/instance qualifier. * double decayNumericGauss(double *, double *, double *, double *, double) * double decayNumericLinear(double *, double *, double *, double *, double) * double dotProduct(List, String) -* double dotProductSparse(Map *, String) * double randomScore(int *) * double randomScore(int *, String *) * double saturation(double, double) @@ -35,9 +33,7 @@ The following classes are available grouped by their respective packages. Click ==== org.elasticsearch.index.query <> -* <> -* <> -* <> +* <> include::packages.asciidoc[] diff --git a/docs/painless/painless-api-reference/painless-api-reference-score/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-score/packages.asciidoc index a1beaeb5bc520..c3a25e8e47af3 100644 --- a/docs/painless/painless-api-reference/painless-api-reference-score/packages.asciidoc +++ b/docs/painless/painless-api-reference/painless-api-reference-score/packages.asciidoc @@ -5,8 +5,8 @@ === Score API for package org.elasticsearch.index.query See the <> for a high-level overview of all packages and classes. -[[painless-api-reference-score-VectorScriptDocValues]] -==== VectorScriptDocValues +[[painless-api-reference-score-DenseVectorScriptDocValues]] +==== DenseVectorScriptDocValues * boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) * void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) * boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) @@ -59,117 +59,3 @@ See the <> for a high-level overview of * def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() * def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) * String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-score-VectorScriptDocValues-DenseVectorScriptDocValues]] -==== VectorScriptDocValues.DenseVectorScriptDocValues -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(String) -* Object getByPath(String, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* String join(String) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-score-VectorScriptDocValues-SparseVectorScriptDocValues]] -==== VectorScriptDocValues.SparseVectorScriptDocValues -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(String) -* Object getByPath(String, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* String join(String) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index 8cd9e0f44f92c..3ba7ff1d1b16d 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -46,8 +46,6 @@ string:: <> and <> <>:: Record dense vectors of float values. -<>:: Record sparse vectors of float values. - <>:: A text-like field optimized for queries to implement as-you-type completion <>:: Defines an alias to an existing field. @@ -119,8 +117,6 @@ include::types/rank-features.asciidoc[] include::types/search-as-you-type.asciidoc[] -include::types/sparse-vector.asciidoc[] - include::types/text.asciidoc[] include::types/token-count.asciidoc[] diff --git a/docs/reference/mapping/types/sparse-vector.asciidoc b/docs/reference/mapping/types/sparse-vector.asciidoc deleted file mode 100644 index 9f7e3963464a3..0000000000000 --- a/docs/reference/mapping/types/sparse-vector.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[role="xpack"] -[testenv="basic"] -[[sparse-vector]] -=== Sparse vector datatype -++++ -Sparse vector -++++ - -deprecated[7.6, The `sparse_vector` type is deprecated and will be removed in 8.0.] -experimental[] - -A `sparse_vector` field stores sparse vectors of float values. -The maximum number of dimensions that can be in a vector should -not exceed 1024. The number of dimensions can be -different across documents. A `sparse_vector` field is -a single-valued field. - -These vectors can be used for <>. -For example, a document score can represent a distance between -a given query vector and the indexed document vector. - -You represent a sparse vector as an object, where object fields -are dimensions, and fields values are values for these dimensions. -Dimensions are integer values from `0` to `65535` encoded as strings. -Dimensions don't need to be in order. - -[source,console] --------------------------------------------------- -PUT my_index -{ - "mappings": { - "properties": { - "my_vector": { - "type": "sparse_vector" - }, - "my_text" : { - "type" : "keyword" - } - } - } -} --------------------------------------------------- -// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.] - -[source,console] --------------------------------------------------- -PUT my_index/_doc/1 -{ - "my_text" : "text1", - "my_vector" : {"1": 0.5, "5": -0.5, "100": 1} -} - -PUT my_index/_doc/2 -{ - "my_text" : "text2", - "my_vector" : {"103": 0.5, "4": -0.5, "5": 1, "11" : 1.2} -} --------------------------------------------------- -// TEST[continued] - -Internally, each document's sparse vector is encoded as a binary -doc value. Its size in bytes is equal to -`6 * NUMBER_OF_DIMENSIONS + 4`, where `NUMBER_OF_DIMENSIONS` - -number of the vector's dimensions. \ No newline at end of file diff --git a/docs/reference/migration/migrate_8_0/search.asciidoc b/docs/reference/migration/migrate_8_0/search.asciidoc index 3964aaa6666a2..c752b1ab8b393 100644 --- a/docs/reference/migration/migrate_8_0/search.asciidoc +++ b/docs/reference/migration/migrate_8_0/search.asciidoc @@ -36,6 +36,14 @@ The `nested_filter` and `nested_path` options, deprecated in 6.x, have been remo `_search` and `_get` requests. Adaptive replica selection (activated by default in this version) will route requests more efficiently using the service time of prior inter-node communications. +[float] +==== Removal of sparse vector fields +The `sparse_vector` field type was deprecated in 7.6 and is now removed in +8.0. We have not seen much interest in this experimental field type, and don't +see a clear use case as it's currently designed. If you have feedback or +suggestions around sparse vector functionality, please let us know through +GitHub or the 'discuss' forums. + [float] ==== Update to vector function signatures The vector functions of the form `function(query, doc['field'])` were diff --git a/docs/reference/vectors/vector-functions.asciidoc b/docs/reference/vectors/vector-functions.asciidoc index 9db4757f03579..14671ece2db0d 100644 --- a/docs/reference/vectors/vector-functions.asciidoc +++ b/docs/reference/vectors/vector-functions.asciidoc @@ -199,173 +199,3 @@ You can check if a document has a value for the field `my_vector` by "source": "doc['my_vector'].size() == 0 ? 0 : cosineSimilarity(params.queryVector, 'my_vector')" -------------------------------------------------- // NOTCONSOLE - -====== `sparse_vector` functions - -deprecated[7.6, The `sparse_vector` type is deprecated and will be removed in 8.0.] - -Let's create an index with a `sparse_vector` mapping and index a couple -of documents into it. - -[source,console] --------------------------------------------------- -PUT my_sparse_index -{ - "mappings": { - "properties": { - "my_sparse_vector": { - "type": "sparse_vector" - }, - "status" : { - "type" : "keyword" - } - } - } -} --------------------------------------------------- -// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.] - -[source,console] --------------------------------------------------- -PUT my_sparse_index/_doc/1 -{ - "my_sparse_vector": {"2": 1.5, "15" : 2, "50": -1.1, "4545": 1.1}, - "status" : "published" -} - -PUT my_sparse_index/_doc/2 -{ - "my_sparse_vector": {"2": 2.5, "10" : 1.3, "55": -2.3, "113": 1.6}, - "status" : "published" -} - -POST my_sparse_index/_refresh --------------------------------------------------- -// TEST[continued] - -The `cosineSimilaritySparse` function calculates cosine similarity -between a given query vector and document vectors. - -[source,console] --------------------------------------------------- -GET my_sparse_index/_search -{ - "query": { - "script_score": { - "query" : { - "bool" : { - "filter" : { - "term" : { - "status" : "published" - } - } - } - }, - "script": { - "source": "cosineSimilaritySparse(params.query_vector, 'my_sparse_vector') + 1.0", - "params": { - "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.] - -The `dotProductSparse` function calculates dot product -between a given query vector and document vectors. - -[source,console] --------------------------------------------------- -GET my_sparse_index/_search -{ - "query": { - "script_score": { - "query" : { - "bool" : { - "filter" : { - "term" : { - "status" : "published" - } - } - } - }, - "script": { - "source": """ - double value = dotProductSparse(params.query_vector, 'my_sparse_vector'); - return sigmoid(1, Math.E, -value); - """, - "params": { - "query_vector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.] - -The `l1normSparse` function calculates L^1^ distance -between a given query vector and document vectors. - -[source,console] --------------------------------------------------- -GET my_sparse_index/_search -{ - "query": { - "script_score": { - "query" : { - "bool" : { - "filter" : { - "term" : { - "status" : "published" - } - } - } - }, - "script": { - "source": "1 / (1 + l1normSparse(params.queryVector, 'my_sparse_vector'))", - "params": { - "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.] - -The `l2normSparse` function calculates L^2^ distance -between a given query vector and document vectors. - -[source,console] --------------------------------------------------- -GET my_sparse_index/_search -{ - "query": { - "script_score": { - "query" : { - "bool" : { - "filter" : { - "term" : { - "status" : "published" - } - } - } - }, - "script": { - "source": "1 / (1 + l2normSparse(params.queryVector, 'my_sparse_vector'))", - "params": { - "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0} - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TEST[warning:The [sparse_vector] field type is deprecated and will be removed in 8.0.] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsage.java index 955989308a49e..1eb8cbae96134 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsage.java @@ -6,6 +6,7 @@ package org.elasticsearch.xpack.core.vectors; +import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -18,13 +19,15 @@ public class VectorsFeatureSetUsage extends XPackFeatureSet.Usage { private final int numDenseVectorFields; - private final int numSparseVectorFields; private final int avgDenseVectorDims; public VectorsFeatureSetUsage(StreamInput input) throws IOException { super(input); numDenseVectorFields = input.readVInt(); - numSparseVectorFields = input.readVInt(); + // Older versions recorded the number of sparse vector fields. + if (input.getVersion().before(Version.V_8_0_0)) { + input.readVInt(); + } avgDenseVectorDims = input.readVInt(); } @@ -32,15 +35,16 @@ public VectorsFeatureSetUsage(StreamInput input) throws IOException { public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeVInt(numDenseVectorFields); - out.writeVInt(numSparseVectorFields); + // Older versions recorded the number of sparse vector fields. + if (out.getVersion().before(Version.V_8_0_0)) { + out.writeInt(0); + } out.writeVInt(avgDenseVectorDims); } - public VectorsFeatureSetUsage(boolean available, boolean enabled, int numDenseVectorFields, int numSparseVectorFields, - int avgDenseVectorDims) { + public VectorsFeatureSetUsage(boolean available, boolean enabled, int numDenseVectorFields, int avgDenseVectorDims) { super(XPackField.VECTORS, available, enabled); this.numDenseVectorFields = numDenseVectorFields; - this.numSparseVectorFields = numSparseVectorFields; this.avgDenseVectorDims = avgDenseVectorDims; } @@ -49,23 +53,20 @@ public VectorsFeatureSetUsage(boolean available, boolean enabled, int numDenseVe protected void innerXContent(XContentBuilder builder, Params params) throws IOException { super.innerXContent(builder, params); builder.field("dense_vector_fields_count", numDenseVectorFields); - builder.field("sparse_vector_fields_count", numSparseVectorFields); builder.field("dense_vector_dims_avg_count", avgDenseVectorDims); } public int numDenseVectorFields() { return numDenseVectorFields; } - public int numSparseVectorFields() { - return numSparseVectorFields; - } + public int avgDenseVectorDims() { return avgDenseVectorDims; } @Override public int hashCode() { - return Objects.hash(available, enabled, numDenseVectorFields, numSparseVectorFields, avgDenseVectorDims); + return Objects.hash(available, enabled, numDenseVectorFields, avgDenseVectorDims); } @Override @@ -73,6 +74,6 @@ public boolean equals(Object obj) { if (obj instanceof VectorsFeatureSetUsage == false) return false; VectorsFeatureSetUsage other = (VectorsFeatureSetUsage) obj; return available == other.available && enabled == other.enabled && numDenseVectorFields == other.numDenseVectorFields - && numSparseVectorFields == other.numSparseVectorFields && avgDenseVectorDims == other.avgDenseVectorDims; + && avgDenseVectorDims == other.avgDenseVectorDims; } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsageTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsageTests.java index f0874299f442e..8033f932836c3 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsageTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/vectors/VectorsFeatureSetUsageTests.java @@ -17,10 +17,9 @@ protected VectorsFeatureSetUsage createTestInstance() { boolean available = randomBoolean(); boolean enabled = randomBoolean(); if (available && enabled) { - return new VectorsFeatureSetUsage(available, enabled, randomIntBetween(0, 100000), randomIntBetween(0, 100000), - randomIntBetween(0, 1024)); + return new VectorsFeatureSetUsage(available, enabled, randomIntBetween(0, 100000), randomIntBetween(0, 1024)); } else { - return new VectorsFeatureSetUsage(available, enabled, 0, 0, 0); + return new VectorsFeatureSetUsage(available, enabled, 0, 0); } } @@ -29,7 +28,6 @@ protected VectorsFeatureSetUsage mutateInstance(VectorsFeatureSetUsage instance) boolean available = instance.available(); boolean enabled = instance.enabled(); int numDenseVectorFields = instance.numDenseVectorFields(); - int numSparseVectorFields = instance.numSparseVectorFields(); int avgDenseVectorDims = instance.avgDenseVectorDims(); if (available == false || enabled == false) { @@ -37,9 +35,8 @@ protected VectorsFeatureSetUsage mutateInstance(VectorsFeatureSetUsage instance) enabled = true; } numDenseVectorFields = randomValueOtherThan(numDenseVectorFields, () -> randomIntBetween(0, 100000)); - numSparseVectorFields = randomValueOtherThan(numSparseVectorFields, () -> randomIntBetween(0, 100000)); avgDenseVectorDims = randomValueOtherThan(avgDenseVectorDims, () -> randomIntBetween(0, 1024)); - return new VectorsFeatureSetUsage(available, enabled, numDenseVectorFields, numSparseVectorFields, avgDenseVectorDims); + return new VectorsFeatureSetUsage(available, enabled, numDenseVectorFields, avgDenseVectorDims); } @Override diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml index 891de6adee661..3e2746dc35f24 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml @@ -10,7 +10,6 @@ setup: body: settings: number_of_replicas: 0 - # we need to have 1 shard to get request failure in test "Dense vectors should error with sparse vector functions" number_of_shards: 1 mappings: properties: @@ -184,31 +183,3 @@ setup: - match: {hits.hits.0._id: "1"} - match: {hits.hits.1._id: "2"} - match: {hits.hits.1._score: 0.0} - ---- -"Dense vectors should error with sparse vector functions": - -- do: - index: - index: test-index - id: 1 - body: - my_dense_vector: [10, 2, 0.15] - -- do: - indices.refresh: {} - -- do: - catch: bad_request - headers: - Content-Type: application/json - search: - body: - query: - script_score: - query: {match_all: {} } - script: - source: "dotProductSparse(params.query_vector, 'my_dense_vector')" - params: - query_vector: {"2": 0.5, "10" : 111.3, "3": 44} -- match: { error.root_cause.0.type: "script_exception" } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/30_sparse_vector_basic.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/30_sparse_vector_basic.yml deleted file mode 100644 index c46866e77fad0..0000000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/30_sparse_vector_basic.yml +++ /dev/null @@ -1,105 +0,0 @@ -setup: - - skip: - features: [headers, warnings] - version: " - 7.2.99" - reason: "sparse_vector functions were introduced in 7.3.0" - - - do: - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - indices.create: - index: test-index - body: - settings: - number_of_replicas: 0 - mappings: - properties: - my_sparse_vector: - type: sparse_vector - - do: - index: - index: test-index - id: 1 - body: - my_sparse_vector: {"2": 230.0, "10" : 300.33, "50": -34.8988, "113": 15.555, "4545": -200.0} - - - do: - index: - index: test-index - id: 2 - body: - my_sparse_vector: {"2": -0.5, "10" : 100.0, "50": -13, "113": 14.8, "4545": -156.0} - - - do: - index: - index: test-index - id: 3 - body: - my_sparse_vector: {"2": 0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0} - - - do: - indices.refresh: {} - ---- -"Dot Product": -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "dotProductSparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"2": 0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0} - -- match: {hits.total: 3} - -- match: {hits.hits.0._id: "1"} -- gte: {hits.hits.0._score: 65425.62} -- lte: {hits.hits.0._score: 65425.63} - -- match: {hits.hits.1._id: "3"} -- gte: {hits.hits.1._score: 37111.98} -- lte: {hits.hits.1._score: 37111.99} - -- match: {hits.hits.2._id: "2"} -- gte: {hits.hits.2._score: 35853.78} -- lte: {hits.hits.2._score: 35853.79} - ---- -"Cosine Similarity": -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "cosineSimilaritySparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"2": -0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0} - -- match: {hits.total: 3} - -- match: {hits.hits.0._id: "3"} -- gte: {hits.hits.0._score: 0.999} -- lte: {hits.hits.0._score: 1.001} - -- match: {hits.hits.1._id: "2"} -- gte: {hits.hits.1._score: 0.998} -- lte: {hits.hits.1._score: 1.0} - -- match: {hits.hits.2._id: "1"} -- gte: {hits.hits.2._score: 0.78} -- lte: {hits.hits.2._score: 0.791} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/35_sparse_vector_l1l2.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/35_sparse_vector_l1l2.yml deleted file mode 100644 index b9fb20f8a3c8d..0000000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/35_sparse_vector_l1l2.yml +++ /dev/null @@ -1,106 +0,0 @@ -setup: - - skip: - features: [headers, warnings] - version: " - 7.3.99" - reason: "l1norm and l2norm functions were added from 7.4" - - - do: - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - indices.create: - index: test-index - body: - settings: - number_of_replicas: 0 - mappings: - properties: - my_sparse_vector: - type: sparse_vector - - do: - index: - index: test-index - id: 1 - body: - my_sparse_vector: {"2": 230.0, "10" : 300.33, "50": -34.8988, "113": 15.555, "4545": -200.0} - - - do: - index: - index: test-index - id: 2 - body: - my_sparse_vector: {"2": -0.5, "10" : 100.0, "50": -13, "113": 14.8, "4545": -156.0} - - - do: - index: - index: test-index - id: 3 - body: - my_sparse_vector: {"2": 0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0} - - - do: - indices.refresh: {} - ---- -"L1 norm": - - do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "l1normSparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"2": 0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0} - - - match: {hits.total: 3} - - - match: {hits.hits.0._id: "1"} - - gte: {hits.hits.0._score: 485.18} - - lte: {hits.hits.0._score: 485.19} - - - match: {hits.hits.1._id: "2"} - - gte: {hits.hits.1._score: 12.29} - - lte: {hits.hits.1._score: 12.31} - - - match: {hits.hits.2._id: "3"} - - gte: {hits.hits.2._score: 0.00} - - lte: {hits.hits.2._score: 0.01} - - ---- -"L2 norm": - - do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "l2normSparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"2": 0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0} - - - match: {hits.total: 3} - - - match: {hits.hits.0._id: "1"} - - gte: {hits.hits.0._score: 301.36} - - lte: {hits.hits.0._score: 301.37} - - - match: {hits.hits.1._id: "2"} - - gte: {hits.hits.1._score: 11.34} - - lte: {hits.hits.1._score: 11.35} - - - match: {hits.hits.2._id: "3"} - - gte: {hits.hits.2._score: 0.00} - - lte: {hits.hits.2._score: 0.01} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/40_sparse_vector_special_cases.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/40_sparse_vector_special_cases.yml deleted file mode 100644 index f5c275cc8fa6d..0000000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/40_sparse_vector_special_cases.yml +++ /dev/null @@ -1,379 +0,0 @@ -setup: - - skip: - features: [headers, warnings] - version: " - 7.3.99" - reason: "sparse_vector functions check on empty values was added from 7.4" - - - do: - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - indices.create: - index: test-index - body: - settings: - number_of_replicas: 0 - # we need to have 1 shard to get request failure in test "Sparse vectors should error with dense vector functions" - number_of_shards: 1 - mappings: - properties: - my_sparse_vector: - type: sparse_vector - - ---- -"Vectors of different dimensions and data types": -# document vectors of different dimensions - - do: - index: - index: test-index - id: 1 - body: - my_sparse_vector: {"1": 10} - - - do: - index: - index: test-index - id: 2 - body: - my_sparse_vector: {"1": 10, "10" : 10.5} - - - do: - index: - index: test-index - id: 3 - body: - my_sparse_vector: {"1": 10, "10" : 10.5, "100": 100.5} - - - do: - indices.refresh: {} - -# query vector of type integer - - do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "cosineSimilaritySparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10} - - - match: {hits.total: 3} - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.2._id: "3"} - -# query vector of type double - - do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "cosineSimilaritySparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10.0} - - - match: {hits.total: 3} - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.2._id: "3"} - ---- -"Documents missing a vector field": -- do: - index: - index: test-index - id: 1 - body: - my_sparse_vector: {"1": 10} - -- do: - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - index: - index: test-index - id: 2 - body: - some_other_field: "random_value" - -- do: - indices.refresh: {} - -# expect an error when documents miss a vector field -- do: - catch: bad_request - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "cosineSimilaritySparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10.0} -- match: { error.root_cause.0.type: "script_exception" } - -# guard against missing values by checking size() -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "doc['my_sparse_vector'].size() == 0 ? 0 : cosineSimilaritySparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10.0} - -- match: {hits.total: 2} -- match: {hits.hits.0._id: "1"} -- match: {hits.hits.1._id: "2"} -- match: {hits.hits.1._score: 0.0} - - ---- -"Dimensions can be sorted differently": -# All the documents' and query's vectors are the same, and should return cosineSimilarity equal to 1 -- do: - index: - index: test-index - id: 1 - body: - my_sparse_vector: {"2": 230.0, "11" : 300.33, "12": -34.8988, "30": 15.555, "100": -200.0} - -- do: - index: - index: test-index - id: 2 - body: - my_sparse_vector: {"100": -200.0, "12": -34.8988, "11" : 300.33, "113": 15.555, "2": 230.0} - -- do: - index: - index: test-index - id: 3 - body: - my_sparse_vector: {"100": -200.0, "30": 15.555, "12": -34.8988, "11" : 300.33, "2": 230.0} - -- do: - indices.refresh: {} - -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "cosineSimilaritySparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"100": -200.0, "11" : 300.33, "12": -34.8988, "2": 230.0, "30": 15.555} - -- match: {hits.total: 3} - -- gte: {hits.hits.0._score: 0.99} -- lte: {hits.hits.0._score: 1.001} -- gte: {hits.hits.1._score: 0.99} -- lte: {hits.hits.1._score: 1.001} -- gte: {hits.hits.2._score: 0.99} -- lte: {hits.hits.2._score: 1.001} - ---- -"Sparse vectors should error with dense vector functions": - -- do: - index: - index: test-index - id: 1 - body: - my_sparse_vector: {"100": -200.0, "30": 15.555} - -- do: - indices.refresh: {} - -- do: - catch: bad_request - headers: - Content-Type: application/json - search: - body: - query: - script_score: - query: {match_all: {} } - script: - source: "dotProduct(params.query_vector, 'my_sparse_vector')" - params: - query_vector: [0.5, 111] -- match: { error.root_cause.0.type: "script_exception" } - ---- -"Query vector has different dimensions from documents' vectors": -- do: - index: - index: test-index - id: 1 - body: - my_sparse_vector: {"1": 10} - -- do: - index: - index: test-index - id: 2 - body: - my_sparse_vector: {"1": 10, "10" : 10.5} - -- do: - index: - index: test-index - id: 3 - body: - my_sparse_vector: {"1": 10, "10" : 10.5, "100": 100.5} - -- do: - indices.refresh: {} - -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "dotProductSparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10, "5": 5} - -- match: {hits.total: 3} - -- match: {hits.hits.0._id: "1"} -- gte: {hits.hits.0._score: 99.99} -- lte: {hits.hits.0._score: 100.01} - -- match: {hits.hits.1._id: "2"} -- gte: {hits.hits.0._score: 99.99} -- lte: {hits.hits.0._score: 100.01} - -- match: {hits.hits.2._id: "3"} -- gte: {hits.hits.0._score: 99.99} -- lte: {hits.hits.0._score: 100.01} - - -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "cosineSimilaritySparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10, "5" : 5} - -- match: {hits.total: 3} - -- match: {hits.hits.0._id: "1"} -- gte: {hits.hits.0._score: 0.894} -- lte: {hits.hits.0._score: 0.895} - -- match: {hits.hits.1._id: "2"} -- gte: {hits.hits.1._score: 0.61} -- lte: {hits.hits.1._score: 0.62} - -- match: {hits.hits.2._id: "3"} -- gte: {hits.hits.2._score: 0.08} -- lte: {hits.hits.2._score: 0.09} - -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "l1normSparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10, "5": 5} - -- match: {hits.total: 3} - -- match: {hits.hits.0._id: "3"} -- match: {hits.hits.0._score: 116} - -- match: {hits.hits.1._id: "2"} -- match: {hits.hits.1._score: 15.5} - -- match: {hits.hits.2._id: "1"} -- match: {hits.hits.2._score: 5} - -- do: - headers: - Content-Type: application/json - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. - search: - rest_total_hits_as_int: true - body: - query: - script_score: - query: {match_all: {} } - script: - source: "l2normSparse(params.query_vector, 'my_sparse_vector')" - params: - query_vector: {"1": 10, "5": 5} - -- match: {hits.total: 3} - -- match: {hits.hits.0._id: "3"} -- gte: {hits.hits.0._score: 101.17} -- lte: {hits.hits.0._score: 101.18} - -- match: {hits.hits.1._id: "2"} -- gte: {hits.hits.1._score: 11.62} -- lte: {hits.hits.1._score: 11.63} - -- match: {hits.hits.2._id: "1"} -- gte: {hits.hits.2._score: 5.0} -- lte: {hits.hits.2._score: 5.0} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/50_vector_stats.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/50_vector_stats.yml index b81ee9c5c193d..9ca60475fc887 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/50_vector_stats.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/50_vector_stats.yml @@ -1,6 +1,6 @@ setup: - skip: - features: [headers, warnings] + features: headers version: " - 7.3.99" reason: "vector stats was added from 7.4" @@ -10,7 +10,6 @@ setup: - match: { vectors.available: true } - match: { vectors.enabled: true } - match: { vectors.dense_vector_fields_count: 0 } - - match: { vectors.sparse_vector_fields_count: 0 } - match: { vectors.dense_vector_dims_avg_count: 0 } - do: @@ -27,8 +26,6 @@ setup: dims: 30 - do: - warnings: - - The [sparse_vector] field type is deprecated and will be removed in 8.0. indices.create: index: test-index2 body: @@ -37,12 +34,9 @@ setup: my_dense_vector3: type: dense_vector dims: 20 - my_sparse_vector1: - type: sparse_vector - do: {xpack.usage: {}} - match: { vectors.available: true } - match: { vectors.enabled: true } - match: { vectors.dense_vector_fields_count: 3 } - - match: { vectors.sparse_vector_fields_count: 1 } - match: { vectors.dense_vector_dims_avg_count: 20 } diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/VectorsUsageTransportAction.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/VectorsUsageTransportAction.java index f88f012b3b5ed..4de66dcac8c75 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/VectorsUsageTransportAction.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/VectorsUsageTransportAction.java @@ -81,7 +81,7 @@ protected void masterOperation(Task task, XPackUsageRequest request, ClusterStat } } VectorsFeatureSetUsage usage = - new VectorsFeatureSetUsage(vectorsAvailable, vectorsEnabled, numDenseVectorFields, numSparseVectorFields, avgDenseVectorDims); + new VectorsFeatureSetUsage(vectorsAvailable, vectorsEnabled, numDenseVectorFields, avgDenseVectorDims); listener.onResponse(new XPackUsageFeatureResponse(usage)); } } diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java index b1518d3ecd586..4fbd48f489822 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java @@ -147,7 +147,7 @@ public Query existsQuery(QueryShardContext context) { @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { - return new VectorDVIndexFieldData.Builder(true); + return new VectorDVIndexFieldData.Builder(); } @Override diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapper.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapper.java index 0b4abff4bb30a..afa178306bbed 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapper.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapper.java @@ -8,17 +8,13 @@ package org.elasticsearch.xpack.vectors.mapper; import org.apache.logging.log4j.LogManager; -import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; +import org.elasticsearch.Version; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.XContentParser.Token; -import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; @@ -26,26 +22,26 @@ import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.DocValueFormat; -import org.elasticsearch.xpack.vectors.query.VectorDVIndexFieldData; -import java.io.IOException; import java.time.ZoneId; import java.util.List; import java.util.Map; -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; - /** * A {@link FieldMapper} for indexing a sparse vector of floats. + * + * @deprecated The sparse_vector type was deprecated in 7.x and removed in 8.0. This mapper + * definition only exists so that 7.x indices can be read without error. + * + * TODO: remove in 9.0. */ +@Deprecated public class SparseVectorFieldMapper extends FieldMapper { - - public static final String CONTENT_TYPE = "sparse_vector"; - public static short MAX_DIMS_COUNT = 1024; //maximum allowed number of dimensions - public static int MAX_DIMS_NUMBER = 65535; //maximum allowed dimension's number - private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(SparseVectorFieldMapper.class)); - public static final String DEPRECATION_MESSAGE = "The [sparse_vector] field type is deprecated and will be removed in 8.0."; + static final String ERROR_MESSAGE = "The [sparse_vector] field type is no longer supported."; + static final String ERROR_MESSAGE_7X = "The [sparse_vector] field type is no longer supported. Old 7.x indices are allowed to " + + "contain [sparse_vector] fields, but they cannot be indexed or searched."; + public static final String CONTENT_TYPE = "sparse_vector"; public static class Defaults { public static final MappedFieldType FIELD_TYPE = new SparseVectorFieldType(); @@ -66,11 +62,6 @@ public Builder(String name) { builder = this; } - @Override - public SparseVectorFieldType fieldType() { - return (SparseVectorFieldType) super.fieldType(); - } - @Override public SparseVectorFieldMapper build(BuilderContext context) { setupFieldType(context); @@ -82,10 +73,14 @@ public SparseVectorFieldMapper build(BuilderContext context) { public static class TypeParser implements Mapper.TypeParser { @Override - public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { - deprecationLogger.deprecatedAndMaybeLog("sparse_vector", DEPRECATION_MESSAGE); - SparseVectorFieldMapper.Builder builder = new SparseVectorFieldMapper.Builder(name); - return builder; + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) + throws MapperParsingException { + if (parserContext.indexVersionCreated().onOrAfter(Version.V_8_0_0)) { + throw new IllegalArgumentException(ERROR_MESSAGE); + } else { + deprecationLogger.deprecatedAndMaybeLog("sparse_vector", ERROR_MESSAGE_7X); + return new Builder(name); + } } } @@ -117,11 +112,6 @@ public Query existsQuery(QueryShardContext context) { return new DocValuesFieldExistsQuery(name()); } - @Override - public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { - return new VectorDVIndexFieldData.Builder(false); - } - @Override public Query termQuery(Object value, QueryShardContext context) { throw new UnsupportedOperationException( @@ -147,55 +137,14 @@ public SparseVectorFieldType fieldType() { } @Override - public void parse(ParseContext context) throws IOException { - if (context.externalValueSet()) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] can't be used in multi-fields"); - } - ensureExpectedToken(Token.START_OBJECT, context.parser().currentToken(), context.parser()::getTokenLocation); - int[] dims = new int[0]; - float[] values = new float[0]; - int dimCount = 0; - int dim = 0; - float value; - for (Token token = context.parser().nextToken(); token != Token.END_OBJECT; token = context.parser().nextToken()) { - if (token == Token.FIELD_NAME) { - try { - dim = Integer.parseInt(context.parser().currentName()); - if (dim < 0 || dim > MAX_DIMS_NUMBER) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "]'s dimension number " + - "must be a non-negative integer value not exceeding [" + MAX_DIMS_NUMBER + "], got [" + dim + "]"); - } - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "]'s dimensions should be " + - "integers represented as strings, but got [" + context.parser().currentName() + "]", e); - } - } else if (token == Token.VALUE_NUMBER) { - value = context.parser().floatValue(true); - if (dims.length <= dimCount) { // ensure arrays have enough capacity - values = ArrayUtil.grow(values, dimCount + 1); - dims = ArrayUtil.grow(dims, dimCount + 1); - } - dims[dimCount] = dim; - values[dimCount] = value; - if (dimCount++ >= MAX_DIMS_COUNT) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + - "] has exceeded the maximum allowed number of dimensions of [" + MAX_DIMS_COUNT + "]"); - } - } else { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + - "] takes an object that maps a dimension number to a float, " + "but got unexpected token [" + token + "]"); - } - } - - BytesRef br = VectorEncoderDecoder.encodeSparseVector(indexCreatedVersion, dims, values, dimCount); - BinaryDocValuesField field = new BinaryDocValuesField(fieldType().name(), br); - context.doc().addWithKey(fieldType().name(), field); + public void parse(ParseContext context) { + throw new UnsupportedOperationException(ERROR_MESSAGE_7X); } @Override protected void parseCreateField(ParseContext context, List fields) { - throw new AssertionError("parse is implemented directly"); + throw new IllegalStateException("parse is implemented directly"); } @Override diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/VectorEncoderDecoder.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/VectorEncoderDecoder.java index 2d591aaccd48f..651c018b8063f 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/VectorEncoderDecoder.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/VectorEncoderDecoder.java @@ -8,154 +8,15 @@ package org.elasticsearch.xpack.vectors.mapper; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.InPlaceMergeSorter; import org.elasticsearch.Version; import java.nio.ByteBuffer; -// static utility functions for encoding and decoding dense_vector and sparse_vector fields public final class VectorEncoderDecoder { - static final byte INT_BYTES = 4; - static final byte SHORT_BYTES = 2; + public static final byte INT_BYTES = 4; private VectorEncoderDecoder() { } - /** - * Encodes a sparse array represented by values, dims and dimCount into a bytes array - BytesRef - * BytesRef: int[] floats encoded as integers values, 2 bytes for each dimension, length of vector - * @param indexVersion - index version - * @param dims - dims of the sparse array - * @param values - values of the sparse array - * @param dimCount - number of the dimensions, necessary as values and dims are dynamically created arrays, - * and may be over-allocated - * @return BytesRef - */ - public static BytesRef encodeSparseVector(Version indexVersion, int[] dims, float[] values, int dimCount) { - // 1. Sort dims and values - sortSparseDimsValues(dims, values, dimCount); - - // 2. Encode dimensions - // as each dimension is a positive value that doesn't exceed 65535, 2 bytes is enough for encoding it - byte[] bytes = indexVersion.onOrAfter(Version.V_7_5_0) ? new byte[dimCount * (INT_BYTES + SHORT_BYTES) + INT_BYTES] : - new byte[dimCount * (INT_BYTES + SHORT_BYTES)]; - ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); - - for (int dim = 0; dim < dimCount; dim++) { - int dimValue = dims[dim]; - byteBuffer.put((byte) (dimValue >> 8)); - byteBuffer.put((byte) dimValue); - } - - // 3. Encode values - double dotProduct = 0.0f; - for (int dim = 0; dim < dimCount; dim++) { - float value = values[dim]; - byteBuffer.putFloat(value); - dotProduct += value * value; - } - - // 4. Encode vector magnitude at the end - if (indexVersion.onOrAfter(Version.V_7_5_0)) { - float vectorMagnitude = (float) Math.sqrt(dotProduct); - byteBuffer.putFloat(vectorMagnitude); - } - - return new BytesRef(bytes); - } - - /** - * Decodes the first part of BytesRef into sparse vector dimensions - * @param indexVersion - index version - * @param vectorBR - sparse vector encoded in BytesRef - */ - public static int[] decodeSparseVectorDims(Version indexVersion, BytesRef vectorBR) { - int dimCount = indexVersion.onOrAfter(Version.V_7_5_0) - ? (vectorBR.length - INT_BYTES) / (INT_BYTES + SHORT_BYTES) - : vectorBR.length / (INT_BYTES + SHORT_BYTES); - ByteBuffer byteBuffer = ByteBuffer.wrap(vectorBR.bytes, vectorBR.offset, dimCount * SHORT_BYTES); - - int[] dims = new int[dimCount]; - for (int dim = 0; dim < dimCount; dim++) { - dims[dim] = ((byteBuffer.get() & 0xFF) << 8) | (byteBuffer.get() & 0xFF); - } - return dims; - } - - /** - * Decodes the second part of the BytesRef into sparse vector values - * @param indexVersion - index version - * @param vectorBR - sparse vector encoded in BytesRef - */ - public static float[] decodeSparseVector(Version indexVersion, BytesRef vectorBR) { - int dimCount = indexVersion.onOrAfter(Version.V_7_5_0) - ? (vectorBR.length - INT_BYTES) / (INT_BYTES + SHORT_BYTES) - : vectorBR.length / (INT_BYTES + SHORT_BYTES); - int offset = vectorBR.offset + SHORT_BYTES * dimCount; - float[] vector = new float[dimCount]; - - ByteBuffer byteBuffer = ByteBuffer.wrap(vectorBR.bytes, offset, dimCount * INT_BYTES); - for (int dim = 0; dim < dimCount; dim++) { - vector[dim] = byteBuffer.getFloat(); - } - return vector; - } - - /** - * Sorts dimensions in the ascending order and - * sorts values in the same order as their corresponding dimensions - * - * @param dims - dimensions of the sparse query vector - * @param values - values for the sparse query vector - * @param n - number of dimensions - */ - public static void sortSparseDimsValues(int[] dims, float[] values, int n) { - new InPlaceMergeSorter() { - @Override - public int compare(int i, int j) { - return Integer.compare(dims[i], dims[j]); - } - - @Override - public void swap(int i, int j) { - int tempDim = dims[i]; - dims[i] = dims[j]; - dims[j] = tempDim; - - float tempValue = values[j]; - values[j] = values[i]; - values[i] = tempValue; - } - }.sort(0, n); - } - - /** - * Sorts dimensions in the ascending order and - * sorts values in the same order as their corresponding dimensions - * - * @param dims - dimensions of the sparse query vector - * @param values - values for the sparse query vector - * @param n - number of dimensions - */ - public static void sortSparseDimsFloatValues(int[] dims, float[] values, int n) { - new InPlaceMergeSorter() { - @Override - public int compare(int i, int j) { - return Integer.compare(dims[i], dims[j]); - } - - @Override - public void swap(int i, int j) { - int tempDim = dims[i]; - dims[i] = dims[j]; - dims[j] = tempDim; - - float tempValue = values[j]; - values[j] = values[i]; - values[i] = tempValue; - } - }.sort(0, n); - } - public static int denseVectorLength(Version indexVersion, BytesRef vectorBR) { return indexVersion.onOrAfter(Version.V_7_5_0) ? (vectorBR.length - INT_BYTES) / INT_BYTES diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorScriptDocValues.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/DenseVectorScriptDocValues.java similarity index 62% rename from x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorScriptDocValues.java rename to x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/DenseVectorScriptDocValues.java index f22e7ad16ea50..ea5bf69017390 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorScriptDocValues.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/DenseVectorScriptDocValues.java @@ -13,15 +13,12 @@ import java.io.IOException; -/** - * VectorScriptDocValues represents docValues for dense and sparse vector fields - */ -public abstract class VectorScriptDocValues extends ScriptDocValues { +public class DenseVectorScriptDocValues extends ScriptDocValues { private final BinaryDocValues in; private BytesRef value; - VectorScriptDocValues(BinaryDocValues in) { + DenseVectorScriptDocValues(BinaryDocValues in) { this.in = in; } @@ -52,19 +49,4 @@ public int size() { return 1; } } - - // not final, as it needs to be extended by Mockito for tests - public static class DenseVectorScriptDocValues extends VectorScriptDocValues { - public DenseVectorScriptDocValues(BinaryDocValues in) { - super(in); - } - } - - // not final, as it needs to be extended by Mockito for tests - public static class SparseVectorScriptDocValues extends VectorScriptDocValues { - public SparseVectorScriptDocValues(BinaryDocValues in) { - super(in); - } - } - } diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java index ff31c7b00ba1e..70fed21f42948 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java @@ -7,36 +7,22 @@ package org.elasticsearch.xpack.vectors.query; -import org.apache.logging.log4j.LogManager; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; -import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.script.ScoreScript; -import org.elasticsearch.xpack.vectors.mapper.SparseVectorFieldMapper; import org.elasticsearch.xpack.vectors.mapper.VectorEncoderDecoder; -import org.elasticsearch.xpack.vectors.query.VectorScriptDocValues.DenseVectorScriptDocValues; -import org.elasticsearch.xpack.vectors.query.VectorScriptDocValues.SparseVectorScriptDocValues; import java.io.IOException; import java.nio.ByteBuffer; import java.util.List; -import java.util.Map; - -import static org.elasticsearch.xpack.vectors.mapper.VectorEncoderDecoder.sortSparseDimsFloatValues; public class ScoreScriptUtils { - private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(ScoreScriptUtils.class)); - - //**************FUNCTIONS FOR DENSE VECTORS - // Functions are implemented as classes to accept a hidden parameter scoreScript that contains some index settings. - // Also, constructors for some functions accept queryVector to calculate and cache queryVectorMagnitude only once - // per script execution for all documents. public static class DenseVectorFunction { final ScoreScript scoreScript; final float[] queryVector; - final VectorScriptDocValues.DenseVectorScriptDocValues docValues; + final DenseVectorScriptDocValues docValues; public DenseVectorFunction(ScoreScript scoreScript, List queryVector, @@ -184,206 +170,4 @@ public double cosineSimilarity() { return dotProduct / vectorMagnitude; } } - - //**************FUNCTIONS FOR SPARSE VECTORS - // Functions are implemented as classes to accept a hidden parameter scoreScript that contains some index settings. - // Also, constructors for some functions accept queryVector to calculate and cache queryVectorMagnitude only once - // per script execution for all documents. - - public static class SparseVectorFunction { - final ScoreScript scoreScript; - final float[] queryValues; - final int[] queryDims; - - final VectorScriptDocValues.SparseVectorScriptDocValues docValues; - - // prepare queryVector once per script execution - // queryVector represents a map of dimensions to values - public SparseVectorFunction(ScoreScript scoreScript, - Map queryVector, - String field) { - this.scoreScript = scoreScript; - this.docValues = (SparseVectorScriptDocValues) scoreScript.getDoc().get(field); - - //break vector into two arrays dims and values - int n = queryVector.size(); - queryValues = new float[n]; - queryDims = new int[n]; - int i = 0; - for (Map.Entry dimValue : queryVector.entrySet()) { - try { - queryDims[i] = Integer.parseInt(dimValue.getKey()); - } catch (final NumberFormatException e) { - throw new IllegalArgumentException("Failed to parse a query vector dimension, it must be an integer!", e); - } - queryValues[i] = dimValue.getValue().floatValue(); - i++; - } - // Sort dimensions in the ascending order and sort values in the same order as their corresponding dimensions - sortSparseDimsFloatValues(queryDims, queryValues, n); - deprecationLogger.deprecatedAndMaybeLog("sparse_vector_function", SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - BytesRef getEncodedVector() { - try { - docValues.setNextDocId(scoreScript._getDocId()); - } catch (IOException e) { - throw ExceptionsHelper.convertToElastic(e); - } - - BytesRef vector = docValues.getEncodedValue(); - if (vector == null) { - throw new IllegalArgumentException("A document doesn't have a value for a vector field!"); - } - return vector; - } - } - - // Calculate l1 norm (Manhattan distance) between a query's sparse vector and documents' sparse vectors - public static final class L1NormSparse extends SparseVectorFunction { - public L1NormSparse(ScoreScript scoreScript,Map queryVector, String field) { - super(scoreScript, queryVector, field); - } - - public double l1normSparse() { - BytesRef vector = getEncodedVector(); - int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(scoreScript._getIndexVersion(), vector); - float[] docValues = VectorEncoderDecoder.decodeSparseVector(scoreScript._getIndexVersion(), vector); - - int queryIndex = 0; - int docIndex = 0; - double l1norm = 0; - while (queryIndex < queryDims.length && docIndex < docDims.length) { - if (queryDims[queryIndex] == docDims[docIndex]) { - l1norm += Math.abs(queryValues[queryIndex] - docValues[docIndex]); - queryIndex++; - docIndex++; - } else if (queryDims[queryIndex] > docDims[docIndex]) { - l1norm += Math.abs(docValues[docIndex]); // 0 for missing query dim - docIndex++; - } else { - l1norm += Math.abs(queryValues[queryIndex]); // 0 for missing doc dim - queryIndex++; - } - } - while (queryIndex < queryDims.length) { - l1norm += Math.abs(queryValues[queryIndex]); // 0 for missing doc dim - queryIndex++; - } - while (docIndex < docDims.length) { - l1norm += Math.abs(docValues[docIndex]); // 0 for missing query dim - docIndex++; - } - return l1norm; - } - } - - // Calculate l2 norm (Euclidean distance) between a query's sparse vector and documents' sparse vectors - public static final class L2NormSparse extends SparseVectorFunction { - public L2NormSparse(ScoreScript scoreScript, Map queryVector, String field) { - super(scoreScript, queryVector, field); - } - - public double l2normSparse() { - BytesRef vector = getEncodedVector(); - int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(scoreScript._getIndexVersion(), vector); - float[] docValues = VectorEncoderDecoder.decodeSparseVector(scoreScript._getIndexVersion(), vector); - - int queryIndex = 0; - int docIndex = 0; - double l2norm = 0; - while (queryIndex < queryDims.length && docIndex < docDims.length) { - if (queryDims[queryIndex] == docDims[docIndex]) { - double diff = queryValues[queryIndex] - docValues[docIndex]; - l2norm += diff * diff; - queryIndex++; - docIndex++; - } else if (queryDims[queryIndex] > docDims[docIndex]) { - double diff = docValues[docIndex]; // 0 for missing query dim - l2norm += diff * diff; - docIndex++; - } else { - double diff = queryValues[queryIndex]; // 0 for missing doc dim - l2norm += diff * diff; - queryIndex++; - } - } - while (queryIndex < queryDims.length) { - l2norm += queryValues[queryIndex] * queryValues[queryIndex]; // 0 for missing doc dims - queryIndex++; - } - while (docIndex < docDims.length) { - l2norm += docValues[docIndex]* docValues[docIndex]; // 0 for missing query dims - docIndex++; - } - return Math.sqrt(l2norm); - } - } - - // Calculate a dot product between a query's sparse vector and documents' sparse vectors - public static final class DotProductSparse extends SparseVectorFunction { - public DotProductSparse(ScoreScript scoreScript, Map queryVector, String field) { - super(scoreScript, queryVector, field); - } - - public double dotProductSparse() { - BytesRef vector = getEncodedVector(); - int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(scoreScript._getIndexVersion(), vector); - float[] docValues = VectorEncoderDecoder.decodeSparseVector(scoreScript._getIndexVersion(), vector); - - return intDotProductSparse(queryValues, queryDims, docValues, docDims); - } - } - - // Calculate cosine similarity between a query's sparse vector and documents' sparse vectors - public static final class CosineSimilaritySparse extends SparseVectorFunction { - final double queryVectorMagnitude; - - public CosineSimilaritySparse(ScoreScript scoreScript, Map queryVector, String field) { - super(scoreScript, queryVector, field); - double dotProduct = 0; - for (int i = 0; i< queryDims.length; i++) { - dotProduct += queryValues[i] * queryValues[i]; - } - this.queryVectorMagnitude = Math.sqrt(dotProduct); - } - - public double cosineSimilaritySparse() { - BytesRef vector = getEncodedVector(); - int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(scoreScript._getIndexVersion(), vector); - float[] docValues = VectorEncoderDecoder.decodeSparseVector(scoreScript._getIndexVersion(), vector); - - double docQueryDotProduct = intDotProductSparse(queryValues, queryDims, docValues, docDims); - double docVectorMagnitude = 0.0f; - if (scoreScript._getIndexVersion().onOrAfter(Version.V_7_5_0)) { - docVectorMagnitude = VectorEncoderDecoder.decodeVectorMagnitude(scoreScript._getIndexVersion(), vector); - } else { - for (float docValue : docValues) { - docVectorMagnitude += docValue * docValue; - } - docVectorMagnitude = (float) Math.sqrt(docVectorMagnitude); - } - - return docQueryDotProduct / (docVectorMagnitude * queryVectorMagnitude); - } - } - - private static double intDotProductSparse(float[] v1Values, int[] v1Dims, float[] v2Values, int[] v2Dims) { - double v1v2DotProduct = 0; - int v1Index = 0; - int v2Index = 0; - // find common dimensions among vectors v1 and v2 and calculate dotProduct based on common dimensions - while (v1Index < v1Values.length && v2Index < v2Values.length) { - if (v1Dims[v1Index] == v2Dims[v2Index]) { - v1v2DotProduct += v1Values[v1Index] * v2Values[v2Index]; - v1Index++; - v2Index++; - } else if (v1Dims[v1Index] > v2Dims[v2Index]) { - v2Index++; - } else { - v1Index++; - } - } - return v1v2DotProduct; - } } diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVAtomicFieldData.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVAtomicFieldData.java index 201bf00de1ea6..5f092afe17795 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVAtomicFieldData.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVAtomicFieldData.java @@ -24,12 +24,10 @@ final class VectorDVAtomicFieldData implements AtomicFieldData { private final LeafReader reader; private final String field; - private final boolean isDense; - VectorDVAtomicFieldData(LeafReader reader, String field, boolean isDense) { + VectorDVAtomicFieldData(LeafReader reader, String field) { this.reader = reader; this.field = field; - this.isDense = isDense; } @Override @@ -51,11 +49,7 @@ public SortedBinaryDocValues getBytesValues() { public ScriptDocValues getScriptValues() { try { final BinaryDocValues values = DocValues.getBinary(reader, field); - if (isDense) { - return new VectorScriptDocValues.DenseVectorScriptDocValues(values); - } else { - return new VectorScriptDocValues.SparseVectorScriptDocValues(values); - } + return new DenseVectorScriptDocValues(values); } catch (IOException e) { throw new IllegalStateException("Cannot load doc values for vector field!", e); } diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVIndexFieldData.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVIndexFieldData.java index 04922c4131426..7d2aa5b25bd52 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVIndexFieldData.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorDVIndexFieldData.java @@ -23,11 +23,9 @@ public class VectorDVIndexFieldData extends DocValuesIndexFieldData implements IndexFieldData { - private final boolean isDense; - public VectorDVIndexFieldData(Index index, String fieldName, boolean isDense) { + public VectorDVIndexFieldData(Index index, String fieldName) { super(index, fieldName); - this.isDense = isDense; } @Override @@ -37,25 +35,21 @@ public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMod @Override public VectorDVAtomicFieldData load(LeafReaderContext context) { - return new VectorDVAtomicFieldData(context.reader(), fieldName, isDense); + return new VectorDVAtomicFieldData(context.reader(), fieldName); } @Override - public VectorDVAtomicFieldData loadDirect(LeafReaderContext context) throws Exception { + public VectorDVAtomicFieldData loadDirect(LeafReaderContext context) { return load(context); } public static class Builder implements IndexFieldData.Builder { - private final boolean isDense; - public Builder(boolean isDense) { - this.isDense = isDense; - } @Override public IndexFieldData build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) { final String fieldName = fieldType.name(); - return new VectorDVIndexFieldData(indexSettings.getIndex(), fieldName, isDense); + return new VectorDVIndexFieldData(indexSettings.getIndex(), fieldName); } } diff --git a/x-pack/plugin/vectors/src/main/resources/org/elasticsearch/xpack/vectors/query/whitelist.txt b/x-pack/plugin/vectors/src/main/resources/org/elasticsearch/xpack/vectors/query/whitelist.txt index 33abe41fcce0f..6eb6dd08fbdb4 100644 --- a/x-pack/plugin/vectors/src/main/resources/org/elasticsearch/xpack/vectors/query/whitelist.txt +++ b/x-pack/plugin/vectors/src/main/resources/org/elasticsearch/xpack/vectors/query/whitelist.txt @@ -3,11 +3,7 @@ # or more contributor license agreements. Licensed under the Elastic License; # you may not use this file except in compliance with the Elastic License. # -class org.elasticsearch.xpack.vectors.query.VectorScriptDocValues { -} -class org.elasticsearch.xpack.vectors.query.VectorScriptDocValues$DenseVectorScriptDocValues { -} -class org.elasticsearch.xpack.vectors.query.VectorScriptDocValues$SparseVectorScriptDocValues { +class org.elasticsearch.xpack.vectors.query.DenseVectorScriptDocValues { } class org.elasticsearch.script.ScoreScript @no_import { } @@ -17,8 +13,4 @@ static_import { double l2norm(org.elasticsearch.script.ScoreScript, List, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L2Norm double cosineSimilarity(org.elasticsearch.script.ScoreScript, List, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity double dotProduct(org.elasticsearch.script.ScoreScript, List, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DotProduct - double l1normSparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L1NormSparse - double l2normSparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L2NormSparse - double dotProductSparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DotProductSparse - double cosineSimilaritySparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilaritySparse } \ No newline at end of file diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapperTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapperTests.java index 15a68eb0aab7f..8213b237c82a7 100644 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapperTests.java +++ b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldMapperTests.java @@ -7,12 +7,8 @@ package org.elasticsearch.xpack.vectors.mapper; -import org.apache.lucene.document.BinaryDocValuesField; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; @@ -20,45 +16,20 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.DocumentMapperParser; import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.VersionUtils; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; import org.elasticsearch.xpack.vectors.Vectors; -import org.hamcrest.Matchers; -import org.junit.Before; -import java.io.IOException; import java.util.Collection; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.IntStream; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.core.IsInstanceOf.instanceOf; public class SparseVectorFieldMapperTests extends ESSingleNodeTestCase { - private DocumentMapper mapper; - - @Before - public void setUpMapper() throws Exception { - IndexService indexService = createIndex("test-index"); - DocumentMapperParser parser = indexService.mapperService().documentMapperParser(); - String mapping = Strings.toString(XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject("my-sparse-vector").field("type", "sparse_vector") - .endObject() - .endObject() - .endObject() - .endObject()); - mapper = parser.parse("_doc", new CompressedXContent(mapping)); - } @Override protected Collection> getPlugins() { @@ -71,197 +42,66 @@ protected boolean forbidPrivateIndexSettings() { return false; } - public void testDefaults() throws Exception { - Version indexVersion = Version.CURRENT; - int[] indexedDims = {65535, 50, 2}; - float[] indexedValues = {0.5f, 1800f, -34567.11f}; - ParsedDocument doc1 = mapper.parse(new SourceToParse("test-index", "1", BytesReference - .bytes(XContentFactory.jsonBuilder() - .startObject() - .startObject("my-sparse-vector") - .field(Integer.toString(indexedDims[0]), indexedValues[0]) - .field(Integer.toString(indexedDims[1]), indexedValues[1]) - .field(Integer.toString(indexedDims[2]), indexedValues[2]) - .endObject() - .endObject()), - XContentType.JSON)); - IndexableField[] fields = doc1.rootDoc().getFields("my-sparse-vector"); - assertEquals(1, fields.length); - assertThat(fields[0], Matchers.instanceOf(BinaryDocValuesField.class)); - - // assert that after decoding the indexed values are equal to expected - int[] expectedDims = {2, 50, 65535}; //the same as indexed but sorted - float[] expectedValues = {-34567.11f, 1800f, 0.5f}; //the same as indexed but sorted by their dimensions - double dotProduct = 0.0f; - for (float value: expectedValues) { - dotProduct += value * value; - } - float expectedMagnitude = (float) Math.sqrt(dotProduct); + public void testSparseVectorWith8xIndex() throws Exception { + Version version = VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT); + Settings settings = Settings.builder() + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), version) + .build(); - // assert that after decoded magnitude, dims and values are equal to expected - BytesRef vectorBR = fields[0].binaryValue(); - int[] decodedDims = VectorEncoderDecoder.decodeSparseVectorDims(indexVersion, vectorBR); - assertArrayEquals( - "Decoded sparse vector dimensions are not equal to the indexed ones.", - expectedDims, - decodedDims - ); - float[] decodedValues = VectorEncoderDecoder.decodeSparseVector(indexVersion, vectorBR); - assertArrayEquals( - "Decoded sparse vector values are not equal to the indexed ones.", - expectedValues, - decodedValues, - 0.001f - ); - float decodedMagnitude = VectorEncoderDecoder.decodeVectorMagnitude(indexVersion, vectorBR); - assertEquals(expectedMagnitude, decodedMagnitude, 0.001f); + IndexService indexService = createIndex("index", settings); + MapperService mapperService = indexService.mapperService(); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - public void testAddDocumentsToIndexBefore_V_7_5_0() throws Exception { - Version indexVersion = Version.V_7_4_0; - IndexService indexService = createIndex("test-index7_4", - Settings.builder().put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), indexVersion).build()); - DocumentMapperParser parser = indexService.mapperService().documentMapperParser(); - String mapping = Strings.toString(XContentFactory.jsonBuilder() + BytesReference mapping = BytesReference.bytes(XContentFactory.jsonBuilder() .startObject() .startObject("_doc") .startObject("properties") - .startObject("my-sparse-vector").field("type", "sparse_vector") + .startObject("my-vector").field("type", "sparse_vector") .endObject() .endObject() .endObject() .endObject()); - mapper = parser.parse("_doc", new CompressedXContent(mapping)); - - int[] indexedDims = {65535, 50, 2}; - float[] indexedValues = {0.5f, 1800f, -34567.11f}; - ParsedDocument doc1 = mapper.parse(new SourceToParse("test-index7_4", "1", BytesReference - .bytes(XContentFactory.jsonBuilder() - .startObject() - .startObject("my-sparse-vector") - .field(Integer.toString(indexedDims[0]), indexedValues[0]) - .field(Integer.toString(indexedDims[1]), indexedValues[1]) - .field(Integer.toString(indexedDims[2]), indexedValues[2]) - .endObject() - .endObject()), - XContentType.JSON)); - IndexableField[] fields = doc1.rootDoc().getFields("my-sparse-vector"); - assertEquals(1, fields.length); - assertThat(fields[0], Matchers.instanceOf(BinaryDocValuesField.class)); - - // assert that after decoding the indexed values are equal to expected - int[] expectedDims = {2, 50, 65535}; //the same as indexed but sorted - float[] expectedValues = {-34567.11f, 1800f, 0.5f}; //the same as indexed but sorted by their dimensions - - // assert that after decoded magnitude, dims and values are equal to expected - BytesRef vectorBR = fields[0].binaryValue(); - int[] decodedDims = VectorEncoderDecoder.decodeSparseVectorDims(indexVersion, vectorBR); - assertArrayEquals( - "Decoded sparse vector dimensions are not equal to the indexed ones.", - expectedDims, - decodedDims - ); - float[] decodedValues = VectorEncoderDecoder.decodeSparseVector(indexVersion, vectorBR); - assertArrayEquals( - "Decoded sparse vector values are not equal to the indexed ones.", - expectedValues, - decodedValues, - 0.001f - ); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> + mapperService.parse(MapperService.SINGLE_MAPPING_NAME, new CompressedXContent(mapping))); + assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE)); } - public void testDimensionNumberValidation() { - // 1. test for an error on negative dimension - MapperParsingException e = expectThrows(MapperParsingException.class, () -> { - mapper.parse(new SourceToParse("test-index", "1", BytesReference - .bytes(XContentFactory.jsonBuilder() - .startObject() - .startObject("my-sparse-vector") - .field(Integer.toString(-50), 100f) - .endObject() - .endObject()), - XContentType.JSON)); - }); - assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); - assertThat(e.getCause().getMessage(), containsString( - "dimension number must be a non-negative integer value not exceeding [65535], got [-50]")); + public void testSparseVectorWith7xIndex() throws Exception { + Version version = VersionUtils.randomPreviousCompatibleVersion(random(), Version.V_8_0_0); + Settings settings = Settings.builder() + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), version) + .build(); - // 2. test for an error on a dimension greater than MAX_DIMS_NUMBER - e = expectThrows(MapperParsingException.class, () -> { - mapper.parse(new SourceToParse("test-index", "1", BytesReference - .bytes(XContentFactory.jsonBuilder() - .startObject() - .startObject("my-sparse-vector") - .field(Integer.toString(70000), 100f) - .endObject() - .endObject()), - XContentType.JSON)); - }); - assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); - assertThat(e.getCause().getMessage(), containsString( - "dimension number must be a non-negative integer value not exceeding [65535], got [70000]")); - - // 3. test for an error on a wrong formatted dimension - e = expectThrows(MapperParsingException.class, () -> { - mapper.parse(new SourceToParse("test-index", "1", BytesReference - .bytes(XContentFactory.jsonBuilder() - .startObject() - .startObject("my-sparse-vector") - .field("WrongDim123", 100f) - .endObject() - .endObject()), - XContentType.JSON)); - }); - assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); - assertThat(e.getCause().getMessage(), containsString( - "dimensions should be integers represented as strings, but got [WrongDim123]")); + IndexService indexService = createIndex("index", settings); + MapperService mapperService = indexService.mapperService(); - // 4. test for an error on a wrong format for the map of dims to values - e = expectThrows(MapperParsingException.class, () -> { - mapper.parse(new SourceToParse("test-index", "1", BytesReference - .bytes(XContentFactory.jsonBuilder() - .startObject() - .startObject("my-sparse-vector") - .startArray(Integer.toString(10)).value(10f).value(100f).endArray() + BytesReference mapping = BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("my-vector").field("type", "sparse_vector") + .endObject() .endObject() - .endObject()), - XContentType.JSON)); - }); - assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); - assertThat(e.getCause().getMessage(), containsString( - "takes an object that maps a dimension number to a float, but got unexpected token [START_ARRAY]")); - - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - public void testDimensionLimit() throws IOException { - Map validVector = IntStream.range(0, SparseVectorFieldMapper.MAX_DIMS_COUNT) - .boxed() - .collect(Collectors.toMap(String::valueOf, Function.identity())); - - BytesReference validDoc = BytesReference.bytes( - XContentFactory.jsonBuilder().startObject() - .field("my-sparse-vector", validVector) + .endObject() .endObject()); - mapper.parse(new SourceToParse("test-index", "1", validDoc, XContentType.JSON)); - Map invalidVector = IntStream.range(0, SparseVectorFieldMapper.MAX_DIMS_COUNT + 1) - .boxed() - .collect(Collectors.toMap(String::valueOf, Function.identity())); + DocumentMapper mapper = mapperService.parse(MapperService.SINGLE_MAPPING_NAME, new CompressedXContent(mapping)); + assertWarnings(SparseVectorFieldMapper.ERROR_MESSAGE_7X); - BytesReference invalidDoc = BytesReference.bytes( - XContentFactory.jsonBuilder().startObject() - .field("my-sparse-vector", invalidVector) - .endObject()); - MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse( - new SourceToParse("test-index", "1", invalidDoc, XContentType.JSON))); - assertThat(e.getDetailedMessage(), containsString("has exceeded the maximum allowed number of dimensions")); + // Check that new vectors cannot be indexed. + int[] indexedDims = {65535, 50, 2}; + float[] indexedValues = {0.5f, 1800f, -34567.11f}; + BytesReference source = BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject() + .startObject("my-vector") + .field(Integer.toString(indexedDims[0]), indexedValues[0]) + .field(Integer.toString(indexedDims[1]), indexedValues[1]) + .field(Integer.toString(indexedDims[2]), indexedValues[2]) + .endObject() + .endObject()); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); + MapperParsingException indexException = expectThrows(MapperParsingException.class, () -> + mapper.parse(new SourceToParse("index", "id", source, XContentType.JSON))); + assertThat(indexException.getCause().getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE)); } - } diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldTypeTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldTypeTests.java index 7f6af470d1825..825dcec2ee493 100644 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldTypeTests.java +++ b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/SparseVectorFieldTypeTests.java @@ -16,4 +16,9 @@ public class SparseVectorFieldTypeTests extends FieldTypeTestCase { protected MappedFieldType createDefaultFieldType() { return new SparseVectorFieldMapper.SparseVectorFieldType(); } + + public void testDocValuesDisabled() { + MappedFieldType fieldType = createDefaultFieldType(); + expectThrows(IllegalArgumentException.class, () -> fieldType.fielddataBuilder("index")); + } } diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/VectorEncoderDecoderTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/VectorEncoderDecoderTests.java deleted file mode 100644 index c81bdfe147ebd..0000000000000 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/VectorEncoderDecoderTests.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -package org.elasticsearch.xpack.vectors.mapper; - -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.Version; -import org.elasticsearch.test.ESTestCase; - -import java.nio.ByteBuffer; -import java.util.HashSet; -import java.util.Set; -import java.util.Arrays; - -public class VectorEncoderDecoderTests extends ESTestCase { - - public void testSparseVectorEncodingDecoding() { - Version indexVersion = Version.CURRENT; - int dimCount = randomIntBetween(0, 100); - float[] expectedValues = new float[dimCount]; - int[] expectedDims = randomUniqueDims(dimCount); - double dotProduct = 0.0f; - for (int i = 0; i < dimCount; i++) { - expectedValues[i] = randomFloat(); - dotProduct += expectedValues[i] * expectedValues[i]; - } - float expectedMagnitude = (float) Math.sqrt(dotProduct); - - // test that sorting in the encoding works as expected - int[] sortedDims = Arrays.copyOf(expectedDims, dimCount); - Arrays.sort(sortedDims); - VectorEncoderDecoder.sortSparseDimsValues(expectedDims, expectedValues, dimCount); - assertArrayEquals( - "Sparse vector dims are not properly sorted!", - sortedDims, - expectedDims - ); - - // test that values that went through encoding and decoding are equal to their original - BytesRef encodedSparseVector = VectorEncoderDecoder.encodeSparseVector(indexVersion, expectedDims, expectedValues, dimCount); - int[] decodedDims = VectorEncoderDecoder.decodeSparseVectorDims(indexVersion, encodedSparseVector); - float[] decodedValues = VectorEncoderDecoder.decodeSparseVector(indexVersion, encodedSparseVector); - float decodedMagnitude = VectorEncoderDecoder.decodeVectorMagnitude(indexVersion, encodedSparseVector); - assertEquals(expectedMagnitude, decodedMagnitude, 0.0f); - assertArrayEquals( - "Decoded sparse vector dims are not equal to their original!", - expectedDims, - decodedDims - ); - assertArrayEquals( - "Decoded sparse vector values are not equal to their original.", - expectedValues, - decodedValues, - 0.001f - ); - } - - public void testSparseVectorEncodingDecodingBefore_V_7_5_0() { - Version indexVersion = Version.V_7_4_0; - int dimCount = randomIntBetween(0, 100); - float[] expectedValues = new float[dimCount]; - int[] expectedDims = randomUniqueDims(dimCount); - for (int i = 0; i < dimCount; i++) { - expectedValues[i] = randomFloat(); - } - - // test that sorting in the encoding works as expected - int[] sortedDims = Arrays.copyOf(expectedDims, dimCount); - Arrays.sort(sortedDims); - VectorEncoderDecoder.sortSparseDimsValues(expectedDims, expectedValues, dimCount); - assertArrayEquals( - "Sparse vector dims are not properly sorted!", - sortedDims, - expectedDims - ); - - // test that values that went through encoding and decoding are equal to their original - BytesRef encodedSparseVector = VectorEncoderDecoder.encodeSparseVector(indexVersion, expectedDims, expectedValues, dimCount); - int[] decodedDims = VectorEncoderDecoder.decodeSparseVectorDims(indexVersion, encodedSparseVector); - float[] decodedValues = VectorEncoderDecoder.decodeSparseVector(indexVersion, encodedSparseVector); - assertArrayEquals( - "Decoded sparse vector dims are not equal to their original!", - expectedDims, - decodedDims - ); - assertArrayEquals( - "Decoded sparse vector values are not equal to their original.", - expectedValues, - decodedValues, - 0.001f - ); - } - - // imitates the code in DenseVectorFieldMapper::parse - public static BytesRef mockEncodeDenseVector(float[] values, Version indexVersion) { - byte[] bytes = indexVersion.onOrAfter(Version.V_7_5_0) - ? new byte[VectorEncoderDecoder.INT_BYTES * values.length + VectorEncoderDecoder.INT_BYTES] - : new byte[VectorEncoderDecoder.INT_BYTES * values.length]; - double dotProduct = 0f; - - ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); - for (float value : values) { - byteBuffer.putFloat(value); - dotProduct += value * value; - } - - if (indexVersion.onOrAfter(Version.V_7_5_0)) { - // encode vector magnitude at the end - float vectorMagnitude = (float) Math.sqrt(dotProduct); - byteBuffer.putFloat(vectorMagnitude); - } - return new BytesRef(bytes); - } - - // generate unique random dims - private static int[] randomUniqueDims(int dimCount) { - int[] values = new int[dimCount]; - Set usedValues = new HashSet<>(); - int value; - for (int i = 0; i < dimCount; i++) { - value = randomValueOtherThanMany(usedValues::contains, () -> randomIntBetween(0, SparseVectorFieldMapper.MAX_DIMS_NUMBER)); - usedValues.add(value); - values[i] = value; - } - return values; - } - -} diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/DenseVectorFunctionTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/DenseVectorFunctionTests.java index 9cdf97e643f4c..82a54bf05af2b 100644 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/DenseVectorFunctionTests.java +++ b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/DenseVectorFunctionTests.java @@ -10,18 +10,18 @@ import org.elasticsearch.Version; import org.elasticsearch.script.ScoreScript; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.vectors.mapper.VectorEncoderDecoder; import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.CosineSimilarity; import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.DotProduct; import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.L1Norm; import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.L2Norm; -import org.elasticsearch.xpack.vectors.query.VectorScriptDocValues.DenseVectorScriptDocValues; import org.junit.Before; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collections; import java.util.List; -import static org.elasticsearch.xpack.vectors.mapper.VectorEncoderDecoderTests.mockEncodeDenseVector; import static org.hamcrest.Matchers.containsString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -40,7 +40,7 @@ public void setUpVectors() { invalidQueryVector = Arrays.asList(0.5, 111.3); } - public void testDenseVectorFunctions() { + public void testVectorFunctions() { for (Version indexVersion : Arrays.asList(Version.V_7_4_0, Version.CURRENT)) { BytesRef encodedDocVector = mockEncodeDenseVector(docVector, indexVersion); DenseVectorScriptDocValues docValues = mock(DenseVectorScriptDocValues.class); @@ -96,4 +96,24 @@ private void testL2Norm(ScoreScript scoreScript) { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, invalidFunction::l2norm); assertThat(e.getMessage(), containsString("query vector has a different number of dimensions [2] than the document vectors [5]")); } + + private static BytesRef mockEncodeDenseVector(float[] values, Version indexVersion) { + byte[] bytes = indexVersion.onOrAfter(Version.V_7_5_0) + ? new byte[VectorEncoderDecoder.INT_BYTES * values.length + VectorEncoderDecoder.INT_BYTES] + : new byte[VectorEncoderDecoder.INT_BYTES * values.length]; + double dotProduct = 0f; + + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); + for (float value : values) { + byteBuffer.putFloat(value); + dotProduct += value * value; + } + + if (indexVersion.onOrAfter(Version.V_7_5_0)) { + // encode vector magnitude at the end + float vectorMagnitude = (float) Math.sqrt(dotProduct); + byteBuffer.putFloat(vectorMagnitude); + } + return new BytesRef(bytes); + } } diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/SparseVectorFunctionTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/SparseVectorFunctionTests.java deleted file mode 100644 index 381ec48f624c7..0000000000000 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/SparseVectorFunctionTests.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -package org.elasticsearch.xpack.vectors.query; - -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.Version; -import org.elasticsearch.script.ScoreScript; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.vectors.mapper.SparseVectorFieldMapper; -import org.elasticsearch.xpack.vectors.mapper.VectorEncoderDecoder; -import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.CosineSimilaritySparse; -import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.DotProductSparse; -import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.L1NormSparse; -import org.elasticsearch.xpack.vectors.query.ScoreScriptUtils.L2NormSparse; -import org.elasticsearch.xpack.vectors.query.VectorScriptDocValues.SparseVectorScriptDocValues; -import org.junit.Before; - -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class SparseVectorFunctionTests extends ESTestCase { - private String field; - private int[] docVectorDims; - private float[] docVectorValues; - private Map queryVector; - - @Before - public void setUpVectors() { - field = "vector"; - docVectorDims = new int[] {2, 10, 50, 113, 4545}; - docVectorValues = new float[] {230.0f, 300.33f, -34.8988f, 15.555f, -200.0f}; - queryVector = new HashMap<>() {{ - put("2", 0.5); - put("10", 111.3); - put("50", -13.0); - put("113", 14.8); - put("4545", -156.0); - }}; - } - - public void testSparseVectorFunctions() { - for (Version indexVersion : Arrays.asList(Version.V_7_4_0, Version.CURRENT)) { - BytesRef encodedDocVector = VectorEncoderDecoder.encodeSparseVector(indexVersion, - docVectorDims, docVectorValues, docVectorDims.length); - SparseVectorScriptDocValues docValues = mock(SparseVectorScriptDocValues.class); - when(docValues.getEncodedValue()).thenReturn(encodedDocVector); - - ScoreScript scoreScript = mock(ScoreScript.class); - when(scoreScript._getIndexVersion()).thenReturn(indexVersion); - when(scoreScript.getDoc()).thenReturn(Collections.singletonMap(field, docValues)); - - testDotProduct(scoreScript); - testCosineSimilarity(scoreScript); - testL1Norm(scoreScript); - testL2Norm(scoreScript); - } - } - - private void testDotProduct(ScoreScript scoreScript) { - DotProductSparse function = new DotProductSparse(scoreScript, queryVector, field); - double result = function.dotProductSparse(); - assertEquals("dotProductSparse result is not equal to the expected value!", 65425.624, result, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - private void testCosineSimilarity(ScoreScript scoreScript) { - CosineSimilaritySparse function = new CosineSimilaritySparse(scoreScript, queryVector, field); - double result = function.cosineSimilaritySparse(); - assertEquals("cosineSimilaritySparse result is not equal to the expected value!", 0.790, result, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - private void testL1Norm(ScoreScript scoreScript) { - L1NormSparse function = new L1NormSparse(scoreScript, queryVector, field); - double result = function.l1normSparse(); - assertEquals("l1norm result is not equal to the expected value!", 485.184, result, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - private void testL2Norm(ScoreScript scoreScript) { - L2NormSparse function = new L2NormSparse(scoreScript, queryVector, field); - double result = function.l2normSparse(); - assertEquals("L2NormSparse result is not equal to the expected value!", 301.361, result, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - public void testSparseVectorMissingDimensions1() { - String field = "vector"; - - // Document vector's biggest dimension > query vector's biggest dimension - int[] docVectorDims = {2, 10, 50, 113, 4545, 4546}; - float[] docVectorValues = {230.0f, 300.33f, -34.8988f, 15.555f, -200.0f, 11.5f}; - BytesRef encodedDocVector = VectorEncoderDecoder.encodeSparseVector( - Version.CURRENT, docVectorDims, docVectorValues, docVectorDims.length); - VectorScriptDocValues.SparseVectorScriptDocValues dvs = mock(VectorScriptDocValues.SparseVectorScriptDocValues.class); - when(dvs.getEncodedValue()).thenReturn(encodedDocVector); - - ScoreScript scoreScript = mock(ScoreScript.class); - when(scoreScript._getIndexVersion()).thenReturn(Version.CURRENT); - when(scoreScript.getDoc()).thenReturn(Collections.singletonMap(field, dvs)); - - Map queryVector = new HashMap<>() {{ - put("2", 0.5); - put("10", 111.3); - put("50", -13.0); - put("113", 14.8); - put("114", -20.5); - put("4545", -156.0); - }}; - - // test dotProductSparse - DotProductSparse docProductSparse = new DotProductSparse(scoreScript, queryVector, field); - double result = docProductSparse.dotProductSparse(); - assertEquals("dotProductSparse result is not equal to the expected value!", 65425.624, result, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - - // test cosineSimilaritySparse - CosineSimilaritySparse cosineSimilaritySparse = new CosineSimilaritySparse(scoreScript, queryVector, field); - double result2 = cosineSimilaritySparse.cosineSimilaritySparse(); - assertEquals("cosineSimilaritySparse result is not equal to the expected value!", 0.786, result2, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - - // test l1norm - L1NormSparse l1Norm = new L1NormSparse(scoreScript, queryVector, field); - double result3 = l1Norm.l1normSparse(); - assertEquals("l1normSparse result is not equal to the expected value!", 517.184, result3, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - - // test L2NormSparse - L2NormSparse L2NormSparse = new L2NormSparse(scoreScript, queryVector, field); - double result4 = L2NormSparse.l2normSparse(); - assertEquals("L2NormSparse result is not equal to the expected value!", 302.277, result4, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } - - public void testSparseVectorMissingDimensions2() { - String field = "vector"; - - // Document vector's biggest dimension < query vector's biggest dimension - int[] docVectorDims = {2, 10, 50, 113, 4545, 4546}; - float[] docVectorValues = {230.0f, 300.33f, -34.8988f, 15.555f, -200.0f, 11.5f}; - BytesRef encodedDocVector = VectorEncoderDecoder.encodeSparseVector( - Version.CURRENT, docVectorDims, docVectorValues, docVectorDims.length); - VectorScriptDocValues.SparseVectorScriptDocValues dvs = mock(VectorScriptDocValues.SparseVectorScriptDocValues.class); - when(dvs.getEncodedValue()).thenReturn(encodedDocVector); - - ScoreScript scoreScript = mock(ScoreScript.class); - when(scoreScript._getIndexVersion()).thenReturn(Version.CURRENT); - when(scoreScript.getDoc()).thenReturn(Collections.singletonMap(field, dvs)); - - Map queryVector = new HashMap<>() {{ - put("2", 0.5); - put("10", 111.3); - put("50", -13.0); - put("113", 14.8); - put("4545", -156.0); - put("4548", -20.5); - }}; - - // test dotProductSparse - DotProductSparse docProductSparse = new DotProductSparse(scoreScript, queryVector, field); - double result = docProductSparse.dotProductSparse(); - assertEquals("dotProductSparse result is not equal to the expected value!", 65425.624, result, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - - // test cosineSimilaritySparse - CosineSimilaritySparse cosineSimilaritySparse = new CosineSimilaritySparse(scoreScript, queryVector, field); - double result2 = cosineSimilaritySparse.cosineSimilaritySparse(); - assertEquals("cosineSimilaritySparse result is not equal to the expected value!", 0.786, result2, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - - // test l1norm - L1NormSparse l1Norm = new L1NormSparse(scoreScript, queryVector, field); - double result3 = l1Norm.l1normSparse(); - assertEquals("l1normSparse result is not equal to the expected value!", 517.184, result3, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - - // test L2NormSparse - L2NormSparse L2NormSparse = new L2NormSparse(scoreScript, queryVector, field); - double result4 = L2NormSparse.l2normSparse(); - assertEquals("L2NormSparse result is not equal to the expected value!", 302.277, result4, 0.001); - assertWarnings(SparseVectorFieldMapper.DEPRECATION_MESSAGE); - } -}