Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Support k-NN similarity functions in painless scripting #281

Merged
Merged
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ esplugin {
// zip file name and plugin name in ${elasticsearch.plugin.name} read by ES when plugin loading
description 'Open Distro for Elasticsearch KNN'
classname 'com.amazon.opendistroforelasticsearch.knn.plugin.KNNPlugin'
extendedPlugins = ['lang-painless']
}

tasks.named("integTest").configure {
Expand All @@ -107,6 +108,7 @@ task release(type: Copy, group: 'build') {
//****************************************************************************/
dependencies {
compileOnly "org.elasticsearch:elasticsearch:${es_version}"
compileOnly "org.elasticsearch.plugin:elasticsearch-scripting-painless-spi:${es_version}"
compile group: 'com.google.guava', name: 'failureaccess', version:'1.0.1'
compile group: 'com.google.guava', name: 'guava', version:'29.0-jre'
testImplementation "org.elasticsearch.test:framework:${es_version}"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package com.amazon.opendistroforelasticsearch.knn.index;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.elasticsearch.index.fielddata.LeafFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;

import java.io.IOException;

public class KNNVectorDVLeafFieldData implements LeafFieldData {
jmazanec15 marked this conversation as resolved.
Show resolved Hide resolved

private final LeafReader reader;
private final String fieldName;

public KNNVectorDVLeafFieldData(LeafReader reader, String fieldName) {
this.reader = reader;
this.fieldName = fieldName;
}

@Override
public void close() {
// no-op
}

@Override
public long ramBytesUsed() {
return 0; // unknown
}

@Override
public ScriptDocValues<float[]> getScriptValues() {
try {
BinaryDocValues values = DocValues.getBinary(reader, fieldName);
return new KNNVectorScriptDocValues(values, fieldName);
} catch (IOException e) {
throw new IllegalStateException("Cannot load doc values for knn vector field: "+fieldName, e);
}
}

@Override
public SortedBinaryDocValues getBytesValues() {
throw new UnsupportedOperationException("knn vector field '"+ fieldName + "' doesn't support sorting");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
Expand All @@ -41,13 +42,15 @@
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;

import static com.amazon.opendistroforelasticsearch.knn.index.KNNSettings.INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING;
import static com.amazon.opendistroforelasticsearch.knn.index.KNNSettings.INDEX_KNN_ALGO_PARAM_M_SETTING;
Expand Down Expand Up @@ -207,6 +210,12 @@ public Query termQuery(Object value, QueryShardContext context) {
public int getDimension() {
return dimension;
}

@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier<SearchLookup> searchLookup) {
jmazanec15 marked this conversation as resolved.
Show resolved Hide resolved
failIfNoDocValues();
return new KNNVectorIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES);
}
}

protected Explicit<Boolean> ignoreMalformed;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazon.opendistroforelasticsearch.knn.index;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.SortField;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.search.sort.BucketedSort;
import org.elasticsearch.search.sort.SortOrder;

public class KNNVectorIndexFieldData implements IndexFieldData<KNNVectorDVLeafFieldData> {

private final String fieldName;
private final ValuesSourceType valuesSourceType;

public KNNVectorIndexFieldData(String fieldName, ValuesSourceType valuesSourceType) {
this.fieldName = fieldName;
this.valuesSourceType = valuesSourceType;
}

@Override
public String getFieldName() {
return fieldName;
}

@Override
public ValuesSourceType getValuesSourceType() {
return valuesSourceType;
}

@Override
public KNNVectorDVLeafFieldData load(LeafReaderContext context) {
return new KNNVectorDVLeafFieldData(context.reader(), fieldName);
}

@Override
public KNNVectorDVLeafFieldData loadDirect(LeafReaderContext context) {
return load(context);
}

@Override
public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
throw new UnsupportedOperationException("knn vector field doesn't support this operation");
}

@Override
public BucketedSort newBucketedSort(
BigArrays bigArrays, Object missingValue,
MultiValueMode sortMode, XFieldComparatorSource.Nested nested,
SortOrder sortOrder, DocValueFormat format, int bucketSize, BucketedSort.ExtraData extra) {
throw new UnsupportedOperationException("knn vector field doesn't support this operation");
}


public static class Builder implements IndexFieldData.Builder {

private final String name;
private final ValuesSourceType valuesSourceType;


public Builder(String name, ValuesSourceType valuesSourceType) {
this.name = name;
this.valuesSourceType = valuesSourceType;
}

@Override
public IndexFieldData<?> build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
return new KNNVectorIndexFieldData(name, valuesSourceType);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.index;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.index.fielddata.ScriptDocValues;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;

public final class KNNVectorScriptDocValues extends ScriptDocValues<float[]> {
jmazanec15 marked this conversation as resolved.
Show resolved Hide resolved

private final BinaryDocValues binaryDocValues;
private final String fieldName;
private boolean docExists;

public KNNVectorScriptDocValues(BinaryDocValues binaryDocValues, String fieldName) {
this.binaryDocValues = binaryDocValues;
this.fieldName = fieldName;
}

@Override
public void setNextDocId(int docId) throws IOException {
if (binaryDocValues.advanceExact(docId)) {
docExists = true;
jmazanec15 marked this conversation as resolved.
Show resolved Hide resolved
return;
}
docExists = false;
}

public float[] getValue() {
if (!docExists) {
String errorMessage = String.format(
"One of the document doesn't have a value for field '%s'. " +
"This can be avoided by checking if a document has a value for the field or not " +
"by doc['%s'].size() == 0 ? 0 : {your script}",fieldName,fieldName);
throw new IllegalStateException(errorMessage);
}
try {
BytesRef value = binaryDocValues.binaryValue();
ByteArrayInputStream byteStream = new ByteArrayInputStream(value.bytes, value.offset, value.length);
ObjectInputStream objectStream = new ObjectInputStream(byteStream);
return (float[]) objectStream.readObject();
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException((e));
}
}

@Override
public int size() {
return docExists ? 1 : 0;
}

@Override
public float[] get(int i) {
throw new UnsupportedOperationException("knn vector does not support this operation");
}
}
Loading