Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

k-NN plugin support for Elasticsearch version 7.10.0 #271

Merged
merged 5 commits into from
Nov 25, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build-tools/knnplugin-coverage.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ task dummyIntegTest(type: Test) {
}
}

integTest.runner {
integTest {
systemProperty 'jacoco.dir', "${jacocoDir}"
}

Expand All @@ -71,7 +71,7 @@ jacocoTestReport {

allprojects{
afterEvaluate {
jacocoTestReport.dependsOn integTest.runner
jacocoTestReport.dependsOn integTest

testClusters.integTest {
jvmArgs " ${dummyIntegTest.jacoco.getAsJvmArg()}".replace('javaagent:','javaagent:/')
Expand Down
52 changes: 29 additions & 23 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

buildscript {
ext {
es_version = System.getProperty("es.version", "7.9.1")
es_version = System.getProperty("es.version", "7.10.0")
es_group = "org.elasticsearch"
distribution = 'oss-zip'
}
Expand Down Expand Up @@ -44,6 +44,7 @@ plugins {
}

apply plugin: 'elasticsearch.esplugin'
apply plugin: 'elasticsearch.rest-test'

def usingRemoteCluster = System.properties.containsKey('tests.rest.cluster') || System.properties.containsKey('tests.cluster')
def usingMultiNode = project.properties.containsKey('numNodes')
Expand Down Expand Up @@ -88,6 +89,10 @@ esplugin {
classname 'com.amazon.opendistroforelasticsearch.knn.plugin.KNNPlugin'
}

tasks.named("integTest").configure {
it.dependsOn(project.tasks.named("bundlePlugin"))
}

task release(type: Copy, group: 'build') {
dependsOn allprojects*.tasks.build
from(zipTree(project.tasks.bundlePlugin.outputs.files.getSingleFile()))
Expand All @@ -110,6 +115,7 @@ dependencies {
licenseHeaders.enabled = false
dependencyLicenses.enabled = false
thirdPartyAudit.enabled = false
loggerUsageCheck.enabled = false

def es_tmp_dir = rootProject.file('build/private/es_tmp').absoluteFile
es_tmp_dir.mkdirs()
Expand All @@ -136,36 +142,35 @@ integTest {
if (integTestDependOnJniLib) {
dependsOn buildJniLib
}
runner {
systemProperty 'tests.security.manager', 'false'
systemProperty 'java.io.tmpdir', es_tmp_dir.absolutePath
systemProperty "java.library.path", "$rootDir/jni/release"
// allows integration test classes to access test resource from project root path
systemProperty('project.root', project.rootDir.absolutePath)

doFirst {
// Tell the test JVM if the cluster JVM is running under a debugger so that tests can
// use longer timeouts for requests.
def isDebuggingCluster = getDebug() || System.getProperty("test.debug") != null
systemProperty 'cluster.debug', isDebuggingCluster
// Set number of nodes system property to be used in tests
systemProperty 'cluster.number_of_nodes', "${_numNodes}"
// There seems to be an issue when running multi node run or integ tasks with unicast_hosts
// not being written, the waitForAllConditions ensures it's written
getClusters().forEach { cluster ->
systemProperty 'tests.security.manager', 'false'
systemProperty 'java.io.tmpdir', es_tmp_dir.absolutePath
systemProperty "java.library.path", "$rootDir/jni/release"
// allows integration test classes to access test resource from project root path
systemProperty('project.root', project.rootDir.absolutePath)

doFirst {
// Tell the test JVM if the cluster JVM is running under a debugger so that tests can
// use longer timeouts for requests.
def isDebuggingCluster = getDebug() || System.getProperty("test.debug") != null
systemProperty 'cluster.debug', isDebuggingCluster
// Set number of nodes system property to be used in tests
systemProperty 'cluster.number_of_nodes', "${_numNodes}"
// There seems to be an issue when running multi node run or integ tasks with unicast_hosts
// not being written, the waitForAllConditions ensures it's written
getClusters().forEach { cluster ->
cluster.waitForAllConditions()
}
}
}

// The -Ddebug.es option makes the cluster debuggable; this makes the tests debuggable
if (System.getProperty("test.debug") != null) {
jvmArgs '-agentlib:jdwp=transport=dt_socket,server=n,suspend=y,address=8000'
}
// The -Ddebug.es option makes the cluster debuggable; this makes the tests debuggable
if (System.getProperty("test.debug") != null) {
jvmArgs '-agentlib:jdwp=transport=dt_socket,server=n,suspend=y,address=8000'
}
}

testClusters.integTest {
testDistribution = "OSS"
plugin(project.tasks.bundlePlugin.archiveFile)
// Cluster shrink exception thrown if we try to set numberOfNodes to 1, so only apply if > 1
if (_numNodes > 1) numberOfNodes = _numNodes
// When running integration tests it doesn't forward the --debug-jvm to the cluster anymore
Expand All @@ -182,6 +187,7 @@ testClusters.integTest {
}

run {
useCluster project.testClusters.integTest
dependsOn buildJniLib
doFirst {
// There seems to be an issue when running multi node run or integ tasks with unicast_hosts
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# permissions and limitations under the License.
#

version=1.6
version=1.11.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this for?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually this variable is not in use. It was pointing to older version. So i updated to latest version. This variable could be used in build.gradle file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see makes sense.

2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@

distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,18 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParametrizedFieldMapper;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.TypeParsers;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -78,9 +79,9 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
m -> toType(m).stored, false);
private final Parameter<Boolean> hasDocValues = Parameter.boolParam("doc_values", false,
m -> toType(m).hasDocValues, true);
private final Parameter<Integer> dimension = new Parameter<>(KNNConstants.DIMENSION, false, -1,
(n, o) -> {
int value = XContentMapValues.nodeIntegerValue(o);
private final Parameter<Integer> dimension = new Parameter<>(KNNConstants.DIMENSION, false, () -> -1,
(n, c, o) -> {
int value = (o == null ? null : XContentMapValues.nodeIntegerValue(o));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if o is null, then i don't think we can assign null to value since it is primitive.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Fixed.

if (value > MAX_DIMENSION) {
throw new IllegalArgumentException("Dimension value cannot be greater than " +
MAX_DIMENSION + " for vector: " + name);
Expand All @@ -92,9 +93,7 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
}
return value;
}, m -> toType(m).dimension);

private final Parameter<Map<String, String>> meta = new Parameter<>("meta", true,
Collections.emptyMap(), TypeParsers::parseMeta, m -> m.fieldType().meta());
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name) {
super(name);
Expand Down Expand Up @@ -177,10 +176,15 @@ public static class KNNVectorFieldType extends MappedFieldType {
int dimension;

public KNNVectorFieldType(String name, Map<String, String> meta, int dimension) {
super(name, false, true, TextSearchInfo.NONE, meta);
super(name, false, false, true, TextSearchInfo.NONE, meta);
this.dimension = dimension;
}

@Override
public ValueFetcher valueFetcher(MapperService mapperService, SearchLookup searchLookup, String format) {
throw new UnsupportedOperationException("KNN Vector do not support fields search");
}

@Override
public String typeName() {
return CONTENT_TYPE;
Expand Down Expand Up @@ -245,7 +249,6 @@ public static class Defaults {
}
}


@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down Expand Up @@ -346,6 +349,5 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
if (includeDefaults || ignoreMalformed.explicit()) {
builder.field(Names.IGNORE_MALFORMED, ignoreMalformed.value());
}
builder.field(KNNConstants.DIMENSION, fieldType().dimension);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copyright 2020

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Thanks

*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.index.codec.KNN87Codec;

import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat;
import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;

/**
* Extends the Codec to support a new file format for KNN index
* based on the mappings.
*
*/
public final class KNN87Codec extends Codec {

private static final Logger logger = LogManager.getLogger(KNN87Codec.class);
private final DocValuesFormat docValuesFormat;
private final DocValuesFormat perFieldDocValuesFormat;
private final CompoundFormat compoundFormat;
private Codec lucene87Codec;
private PostingsFormat postingsFormat = null;

public static final String KNN_87 = "KNN87Codec";
public static final String LUCENE_87 = "Lucene87"; // Lucene Codec to be used

public KNN87Codec() {
super(KNN_87);
// Note that DocValuesFormat can use old Codec's DocValuesFormat. For instance Lucene84 uses Lucene80
// DocValuesFormat. Refer to defaultDVFormat in LuceneXXCodec.java to find out which version it uses
this.docValuesFormat = new KNN80DocValuesFormat();
this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return docValuesFormat;
}
};
this.compoundFormat = new KNN80CompoundFormat();
}

/*
* This function returns the Codec.
*/
public Codec getDelegatee() {
if (lucene87Codec == null)
lucene87Codec = Codec.forName(LUCENE_87);
return lucene87Codec;
}

@Override
public DocValuesFormat docValuesFormat() {
return this.perFieldDocValuesFormat;
}

/*
* For all the below functions, we could have extended FilterCodec, but this brings
* SPI related issues while loading Codec in the tests. So fall back to traditional
* approach of manually overriding.
*/


public void setPostingsFormat(PostingsFormat postingsFormat) {
this.postingsFormat = postingsFormat;
}

@Override
public PostingsFormat postingsFormat() {
if (this.postingsFormat == null) {
return getDelegatee().postingsFormat();
}
return this.postingsFormat;
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return getDelegatee().storedFieldsFormat();
}

@Override
public TermVectorsFormat termVectorsFormat() {
return getDelegatee().termVectorsFormat();
}

@Override
public FieldInfosFormat fieldInfosFormat() {
return getDelegatee().fieldInfosFormat();
}

@Override
public SegmentInfoFormat segmentInfoFormat() {
return getDelegatee().segmentInfoFormat();
}

@Override
public NormsFormat normsFormat() {
return getDelegatee().normsFormat();
}

@Override
public LiveDocsFormat liveDocsFormat() {
return getDelegatee().liveDocsFormat();
}

@Override
public CompoundFormat compoundFormat() {
return this.compoundFormat;
}

@Override
public PointsFormat pointsFormat() {
return getDelegatee().pointsFormat();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

package com.amazon.opendistroforelasticsearch.knn.plugin;

import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN86Codec.KNN86Codec;
import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN87Codec.KNN87Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.elasticsearch.index.codec.CodecService;

import static com.amazon.opendistroforelasticsearch.knn.index.codec.KNN86Codec.KNN86Codec.KNN_86;
import static com.amazon.opendistroforelasticsearch.knn.index.codec.KNN87Codec.KNN87Codec.KNN_87;

/**
* KNNCodecService to inject the right KNNCodec version
Expand All @@ -37,18 +37,18 @@ class KNNCodecService extends CodecService {
* return the KNN Codec
*
* @param name dummy name
* @return KNN86Codec
* @return Latest KNN Codec
*/
@Override
public Codec codec(String name) {
Codec codec = Codec.forName(KNN_86);
Codec codec = Codec.forName(KNN_87);
if (codec == null) {
throw new IllegalArgumentException("failed to find codec [" + name + "]");
}
return codec;
}

public void setPostingsFormat(PostingsFormat postingsFormat) {
((KNN86Codec)codec("")).setPostingsFormat(postingsFormat);
((KNN87Codec)codec("")).setPostingsFormat(postingsFormat);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class KNNEngineFactory implements EngineFactory {
@Override
public Engine newReadWriteEngine(EngineConfig config) {
codecService.setPostingsFormat(config.getCodec().postingsFormat());
EngineConfig engineConfig = new EngineConfig(config.getShardId(), config.getAllocationId(),
EngineConfig engineConfig = new EngineConfig(config.getShardId(),
config.getThreadPool(), config.getIndexSettings(), config.getWarmer(), config.getStore(),
config.getMergePolicy(), config.getAnalyzer(), config.getSimilarity(), codecService,
config.getEventListener(), config.getQueryCache(), config.getQueryCachingPolicy(),
Expand Down
Loading