Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
k-NN plugin support for Elasticsearch version 7.10.0 (#271)
Browse files Browse the repository at this point in the history
* ES 7.10 support
  • Loading branch information
vamshin authored Nov 25, 2020
1 parent a174fc7 commit c517644
Show file tree
Hide file tree
Showing 13 changed files with 208 additions and 54 deletions.
4 changes: 2 additions & 2 deletions build-tools/knnplugin-coverage.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ task dummyIntegTest(type: Test) {
}
}

integTest.runner {
integTest {
systemProperty 'jacoco.dir', "${jacocoDir}"
}

Expand All @@ -71,7 +71,7 @@ jacocoTestReport {

allprojects{
afterEvaluate {
jacocoTestReport.dependsOn integTest.runner
jacocoTestReport.dependsOn integTest

testClusters.integTest {
jvmArgs " ${dummyIntegTest.jacoco.getAsJvmArg()}".replace('javaagent:','javaagent:/')
Expand Down
52 changes: 29 additions & 23 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

buildscript {
ext {
es_version = System.getProperty("es.version", "7.9.1")
es_version = System.getProperty("es.version", "7.10.0")
es_group = "org.elasticsearch"
distribution = 'oss-zip'
}
Expand Down Expand Up @@ -44,6 +44,7 @@ plugins {
}

apply plugin: 'elasticsearch.esplugin'
apply plugin: 'elasticsearch.rest-test'

def usingRemoteCluster = System.properties.containsKey('tests.rest.cluster') || System.properties.containsKey('tests.cluster')
def usingMultiNode = project.properties.containsKey('numNodes')
Expand Down Expand Up @@ -88,6 +89,10 @@ esplugin {
classname 'com.amazon.opendistroforelasticsearch.knn.plugin.KNNPlugin'
}

tasks.named("integTest").configure {
it.dependsOn(project.tasks.named("bundlePlugin"))
}

task release(type: Copy, group: 'build') {
dependsOn allprojects*.tasks.build
from(zipTree(project.tasks.bundlePlugin.outputs.files.getSingleFile()))
Expand All @@ -110,6 +115,7 @@ dependencies {
licenseHeaders.enabled = false
dependencyLicenses.enabled = false
thirdPartyAudit.enabled = false
loggerUsageCheck.enabled = false

def es_tmp_dir = rootProject.file('build/private/es_tmp').absoluteFile
es_tmp_dir.mkdirs()
Expand All @@ -136,36 +142,35 @@ integTest {
if (integTestDependOnJniLib) {
dependsOn buildJniLib
}
runner {
systemProperty 'tests.security.manager', 'false'
systemProperty 'java.io.tmpdir', es_tmp_dir.absolutePath
systemProperty "java.library.path", "$rootDir/jni/release"
// allows integration test classes to access test resource from project root path
systemProperty('project.root', project.rootDir.absolutePath)

doFirst {
// Tell the test JVM if the cluster JVM is running under a debugger so that tests can
// use longer timeouts for requests.
def isDebuggingCluster = getDebug() || System.getProperty("test.debug") != null
systemProperty 'cluster.debug', isDebuggingCluster
// Set number of nodes system property to be used in tests
systemProperty 'cluster.number_of_nodes', "${_numNodes}"
// There seems to be an issue when running multi node run or integ tasks with unicast_hosts
// not being written, the waitForAllConditions ensures it's written
getClusters().forEach { cluster ->
systemProperty 'tests.security.manager', 'false'
systemProperty 'java.io.tmpdir', es_tmp_dir.absolutePath
systemProperty "java.library.path", "$rootDir/jni/release"
// allows integration test classes to access test resource from project root path
systemProperty('project.root', project.rootDir.absolutePath)

doFirst {
// Tell the test JVM if the cluster JVM is running under a debugger so that tests can
// use longer timeouts for requests.
def isDebuggingCluster = getDebug() || System.getProperty("test.debug") != null
systemProperty 'cluster.debug', isDebuggingCluster
// Set number of nodes system property to be used in tests
systemProperty 'cluster.number_of_nodes', "${_numNodes}"
// There seems to be an issue when running multi node run or integ tasks with unicast_hosts
// not being written, the waitForAllConditions ensures it's written
getClusters().forEach { cluster ->
cluster.waitForAllConditions()
}
}
}

// The -Ddebug.es option makes the cluster debuggable; this makes the tests debuggable
if (System.getProperty("test.debug") != null) {
jvmArgs '-agentlib:jdwp=transport=dt_socket,server=n,suspend=y,address=8000'
}
// The -Ddebug.es option makes the cluster debuggable; this makes the tests debuggable
if (System.getProperty("test.debug") != null) {
jvmArgs '-agentlib:jdwp=transport=dt_socket,server=n,suspend=y,address=8000'
}
}

testClusters.integTest {
testDistribution = "OSS"
plugin(project.tasks.bundlePlugin.archiveFile)
// Cluster shrink exception thrown if we try to set numberOfNodes to 1, so only apply if > 1
if (_numNodes > 1) numberOfNodes = _numNodes
// When running integration tests it doesn't forward the --debug-jvm to the cluster anymore
Expand All @@ -182,6 +187,7 @@ testClusters.integTest {
}

run {
useCluster project.testClusters.integTest
dependsOn buildJniLib
doFirst {
// There seems to be an issue when running multi node run or integ tasks with unicast_hosts
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# permissions and limitations under the License.
#

version=1.6
version=1.11.0
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@

distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,18 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParametrizedFieldMapper;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.TypeParsers;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -78,8 +79,11 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
m -> toType(m).stored, false);
private final Parameter<Boolean> hasDocValues = Parameter.boolParam("doc_values", false,
m -> toType(m).hasDocValues, true);
private final Parameter<Integer> dimension = new Parameter<>(KNNConstants.DIMENSION, false, -1,
(n, o) -> {
private final Parameter<Integer> dimension = new Parameter<>(KNNConstants.DIMENSION, false, () -> -1,
(n, c, o) -> {
if (o == null) {
throw new IllegalArgumentException("Dimension cannot be null");
}
int value = XContentMapValues.nodeIntegerValue(o);
if (value > MAX_DIMENSION) {
throw new IllegalArgumentException("Dimension value cannot be greater than " +
Expand All @@ -92,9 +96,7 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
}
return value;
}, m -> toType(m).dimension);

private final Parameter<Map<String, String>> meta = new Parameter<>("meta", true,
Collections.emptyMap(), TypeParsers::parseMeta, m -> m.fieldType().meta());
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name) {
super(name);
Expand Down Expand Up @@ -177,10 +179,15 @@ public static class KNNVectorFieldType extends MappedFieldType {
int dimension;

public KNNVectorFieldType(String name, Map<String, String> meta, int dimension) {
super(name, false, true, TextSearchInfo.NONE, meta);
super(name, false, false, true, TextSearchInfo.NONE, meta);
this.dimension = dimension;
}

@Override
public ValueFetcher valueFetcher(MapperService mapperService, SearchLookup searchLookup, String format) {
throw new UnsupportedOperationException("KNN Vector do not support fields search");
}

@Override
public String typeName() {
return CONTENT_TYPE;
Expand Down Expand Up @@ -249,7 +256,6 @@ public static class Defaults {
}
}


@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down Expand Up @@ -350,6 +356,5 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
if (includeDefaults || ignoreMalformed.explicit()) {
builder.field(Names.IGNORE_MALFORMED, ignoreMalformed.value());
}
builder.field(KNNConstants.DIMENSION, fieldType().dimension);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.index.codec.KNN87Codec;

import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat;
import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;

/**
* Extends the Codec to support a new file format for KNN index
* based on the mappings.
*
*/
public final class KNN87Codec extends Codec {

private static final Logger logger = LogManager.getLogger(KNN87Codec.class);
private final DocValuesFormat docValuesFormat;
private final DocValuesFormat perFieldDocValuesFormat;
private final CompoundFormat compoundFormat;
private Codec lucene87Codec;
private PostingsFormat postingsFormat = null;

public static final String KNN_87 = "KNN87Codec";
public static final String LUCENE_87 = "Lucene87"; // Lucene Codec to be used

public KNN87Codec() {
super(KNN_87);
// Note that DocValuesFormat can use old Codec's DocValuesFormat. For instance Lucene84 uses Lucene80
// DocValuesFormat. Refer to defaultDVFormat in LuceneXXCodec.java to find out which version it uses
this.docValuesFormat = new KNN80DocValuesFormat();
this.perFieldDocValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return docValuesFormat;
}
};
this.compoundFormat = new KNN80CompoundFormat();
}

/*
* This function returns the Codec.
*/
public Codec getDelegatee() {
if (lucene87Codec == null)
lucene87Codec = Codec.forName(LUCENE_87);
return lucene87Codec;
}

@Override
public DocValuesFormat docValuesFormat() {
return this.perFieldDocValuesFormat;
}

/*
* For all the below functions, we could have extended FilterCodec, but this brings
* SPI related issues while loading Codec in the tests. So fall back to traditional
* approach of manually overriding.
*/


public void setPostingsFormat(PostingsFormat postingsFormat) {
this.postingsFormat = postingsFormat;
}

@Override
public PostingsFormat postingsFormat() {
if (this.postingsFormat == null) {
return getDelegatee().postingsFormat();
}
return this.postingsFormat;
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return getDelegatee().storedFieldsFormat();
}

@Override
public TermVectorsFormat termVectorsFormat() {
return getDelegatee().termVectorsFormat();
}

@Override
public FieldInfosFormat fieldInfosFormat() {
return getDelegatee().fieldInfosFormat();
}

@Override
public SegmentInfoFormat segmentInfoFormat() {
return getDelegatee().segmentInfoFormat();
}

@Override
public NormsFormat normsFormat() {
return getDelegatee().normsFormat();
}

@Override
public LiveDocsFormat liveDocsFormat() {
return getDelegatee().liveDocsFormat();
}

@Override
public CompoundFormat compoundFormat() {
return this.compoundFormat;
}

@Override
public PointsFormat pointsFormat() {
return getDelegatee().pointsFormat();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

package com.amazon.opendistroforelasticsearch.knn.plugin;

import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN86Codec.KNN86Codec;
import com.amazon.opendistroforelasticsearch.knn.index.codec.KNN87Codec.KNN87Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.elasticsearch.index.codec.CodecService;

import static com.amazon.opendistroforelasticsearch.knn.index.codec.KNN86Codec.KNN86Codec.KNN_86;
import static com.amazon.opendistroforelasticsearch.knn.index.codec.KNN87Codec.KNN87Codec.KNN_87;

/**
* KNNCodecService to inject the right KNNCodec version
Expand All @@ -37,18 +37,18 @@ class KNNCodecService extends CodecService {
* return the KNN Codec
*
* @param name dummy name
* @return KNN86Codec
* @return Latest KNN Codec
*/
@Override
public Codec codec(String name) {
Codec codec = Codec.forName(KNN_86);
Codec codec = Codec.forName(KNN_87);
if (codec == null) {
throw new IllegalArgumentException("failed to find codec [" + name + "]");
}
return codec;
}

public void setPostingsFormat(PostingsFormat postingsFormat) {
((KNN86Codec)codec("")).setPostingsFormat(postingsFormat);
((KNN87Codec)codec("")).setPostingsFormat(postingsFormat);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class KNNEngineFactory implements EngineFactory {
@Override
public Engine newReadWriteEngine(EngineConfig config) {
codecService.setPostingsFormat(config.getCodec().postingsFormat());
EngineConfig engineConfig = new EngineConfig(config.getShardId(), config.getAllocationId(),
EngineConfig engineConfig = new EngineConfig(config.getShardId(),
config.getThreadPool(), config.getIndexSettings(), config.getWarmer(), config.getStore(),
config.getMergePolicy(), config.getAnalyzer(), config.getSimilarity(), codecService,
config.getEventListener(), config.getQueryCache(), config.getQueryCachingPolicy(),
Expand Down
Loading

0 comments on commit c517644

Please sign in to comment.