opendistro-for-elasticsearch · VijayanB · Dec 29, 2020 · Dec 11, 2020 · Dec 11, 2020 · Dec 11, 2020
diff --git a/build.gradle b/build.gradle
@@ -87,6 +87,7 @@ esplugin {
     // zip file name and plugin name in ${elasticsearch.plugin.name} read by ES when plugin loading
     description 'Open Distro for Elasticsearch KNN'
     classname 'com.amazon.opendistroforelasticsearch.knn.plugin.KNNPlugin'
+    extendedPlugins = ['lang-painless']
 }
 
 tasks.named("integTest").configure {
@@ -107,6 +108,7 @@ task release(type: Copy, group: 'build') {
 //****************************************************************************/
 dependencies {
     compileOnly "org.elasticsearch:elasticsearch:${es_version}"
+    compileOnly "org.elasticsearch.plugin:elasticsearch-scripting-painless-spi:${es_version}"
     compile group: 'com.google.guava', name: 'failureaccess', version:'1.0.1'
     compile group: 'com.google.guava', name: 'guava', version:'29.0-jre'
     testImplementation "org.elasticsearch.test:framework:${es_version}"

diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorDVLeafFieldData.java b/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorDVLeafFieldData.java
@@ -0,0 +1,49 @@
+package com.amazon.opendistroforelasticsearch.knn.index;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.LeafReader;
+import org.elasticsearch.index.fielddata.LeafFieldData;
+import org.elasticsearch.index.fielddata.ScriptDocValues;
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
+
+import java.io.IOException;
+
+public class KNNVectorDVLeafFieldData implements LeafFieldData {
+
+    private final LeafReader reader;
+    private final String fieldName;
+
+    public KNNVectorDVLeafFieldData(LeafReader reader, String fieldName) {
+        this.reader = reader;
+        this.fieldName = fieldName;
+    }
+
+    @Override
+    public void close() {
+        // no-op
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        return 0; // unknown
+    }
+
+    @Override
+    public ScriptDocValues<float[]> getScriptValues() {
+        try {
+            final BinaryDocValues values = reader.getBinaryDocValues(fieldName);
+            if (values == null) {
+                throw new IllegalStateException("Binary Doc values not enabled for the field " + fieldName
+                        + " Please ensure the field type is knn_vector in mappings for this field");
+            }
+            return new KNNVectorScriptDocValues(values);
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values for vector field!", e);
+        }
+    }
+
+    @Override
+    public SortedBinaryDocValues getBytesValues() {
+        throw new UnsupportedOperationException("knn vector field doesn't support sorting");
+    }
+}
diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorFieldMapper.java b/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorFieldMapper.java
@@ -30,6 +30,7 @@
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.Mapper;
@@ -41,13 +42,15 @@
 import org.elasticsearch.index.mapper.ValueFetcher;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.index.query.QueryShardException;
+import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 import org.elasticsearch.search.lookup.SearchLookup;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.function.Supplier;
 
 import static com.amazon.opendistroforelasticsearch.knn.index.KNNSettings.INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING;
 import static com.amazon.opendistroforelasticsearch.knn.index.KNNSettings.INDEX_KNN_ALGO_PARAM_M_SETTING;
@@ -207,6 +210,12 @@ public Query termQuery(Object value, QueryShardContext context) {
         public int getDimension() {
             return dimension;
         }
+
+        @Override
+        public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier<SearchLookup> searchLookup) {
+            failIfNoDocValues();
+            return new KNNVectorIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES);
+        }
     }
 
     protected Explicit<Boolean> ignoreMalformed;

diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorIndexFieldData.java b/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorIndexFieldData.java
@@ -0,0 +1,89 @@
+/*
+ *   Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License").
+ *   You may not use this file except in compliance with the License.
+ *   A copy of the License is located at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   or in the "license" file accompanying this file. This file is distributed
+ *   on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ *   express or implied. See the License for the specific language governing
+ *   permissions and limitations under the License.
+ */
+package com.amazon.opendistroforelasticsearch.knn.index;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.SortField;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.fielddata.IndexFieldDataCache;
+import org.elasticsearch.indices.breaker.CircuitBreakerService;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.MultiValueMode;
+import org.elasticsearch.search.aggregations.support.ValuesSourceType;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+public class KNNVectorIndexFieldData implements IndexFieldData<KNNVectorDVLeafFieldData> {
+
+    private final String fieldName;
+    private final ValuesSourceType valuesSourceType;
+
+    public KNNVectorIndexFieldData(String fieldName, ValuesSourceType valuesSourceType) {
+        this.fieldName = fieldName;
+        this.valuesSourceType = valuesSourceType;
+    }
+
+    @Override
+    public String getFieldName() {
+        return fieldName;
+    }
+
+    @Override
+    public ValuesSourceType getValuesSourceType() {
+        return valuesSourceType;
+    }
+
+    @Override
+    public KNNVectorDVLeafFieldData load(LeafReaderContext context) {
+        return new KNNVectorDVLeafFieldData(context.reader(), fieldName);
+    }
+
+    @Override
+    public KNNVectorDVLeafFieldData loadDirect(LeafReaderContext context) {
+        return load(context);
+    }
+
+    @Override
+    public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
+        throw new UnsupportedOperationException("knn vector field doesn't support this operation");
+    }
+
+    @Override
+    public BucketedSort newBucketedSort(
+            BigArrays bigArrays, Object missingValue,
+            MultiValueMode sortMode, XFieldComparatorSource.Nested nested,
+            SortOrder sortOrder, DocValueFormat format, int bucketSize, BucketedSort.ExtraData extra) {
+        throw new UnsupportedOperationException("knn vector field doesn't support this operation");
+    }
+
+
+    public static class Builder implements IndexFieldData.Builder {
+
+        private final String name;
+        private final ValuesSourceType valuesSourceType;
+
+
+        public Builder(String name, ValuesSourceType valuesSourceType) {
+            this.name = name;
+            this.valuesSourceType = valuesSourceType;
+        }
+
+        @Override
+        public IndexFieldData<?> build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
+            return new KNNVectorIndexFieldData(name, valuesSourceType);
+        }
+    }
+}
diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorScriptDocValues.java b/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNVectorScriptDocValues.java
@@ -0,0 +1,72 @@
+/*
+ *   Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License").
+ *   You may not use this file except in compliance with the License.
+ *   A copy of the License is located at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   or in the "license" file accompanying this file. This file is distributed
+ *   on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ *   express or implied. See the License for the specific language governing
+ *   permissions and limitations under the License.
+ */
+
+package com.amazon.opendistroforelasticsearch.knn.index;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.fielddata.ScriptDocValues;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+
+// This class is thread safe, since docExists is synchronized at an instance level
+public final class KNNVectorScriptDocValues extends ScriptDocValues<float[]> {
+
+    private final BinaryDocValues binaryDocValues;
+    private boolean docExists;
+
+    public KNNVectorScriptDocValues(BinaryDocValues binaryDocValues) {
+        this.binaryDocValues = binaryDocValues;
+    }
+
+    @Override
+    public void setNextDocId(int docId) throws IOException {
+        synchronized (this) {
+            if (binaryDocValues.advanceExact(docId)) {
+                docExists = true;
+                return;
+            }
+            docExists = false;
+        }
+    }
+
+    public synchronized float[] getValue() throws IOException {
+        if (!docExists) {
+            throw new IllegalArgumentException("no value found for the corresponding doc ID");
+        }
+        BytesRef value = binaryDocValues.binaryValue();
+        ByteArrayInputStream byteStream = new ByteArrayInputStream(value.bytes, value.offset, value.length);
+        ObjectInputStream objectStream = new ObjectInputStream(byteStream);
+        try {
+            return (float[]) objectStream.readObject();
+        } catch (ClassNotFoundException e) {
+            throw new RuntimeException((e));
+        }
+    }
+
+    @Override
+    public int size() {
+        synchronized (this) {
+            return docExists ? 1 : 0;
+        }
+    }
+
+    @Override
+    public float[] get(int i) {
+        throw new UnsupportedOperationException("knn vector does not support this operation");
+    }
+}