From 8d07865987691e2529b25fba5d273a832c0be3ff Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Tue, 3 Jan 2023 15:57:04 -0800 Subject: [PATCH] Add Lucene 9.5 codec and make it new default Signed-off-by: Martin Gaievski --- .../index/codec/KNN950Codec/KNN950Codec.java | 58 +++++++++++++++++++ .../KNN950PerFieldKnnVectorsFormat.java | 28 +++++++++ .../knn/index/codec/KNNCodecVersion.java | 20 ++++++- .../services/org.apache.lucene.codecs.Codec | 3 +- .../codec/KNN950Codec/KNN950CodecTests.java | 41 +++++++++++++ .../knn/index/codec/KNNCodecFactoryTests.java | 8 +++ 6 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950Codec.java create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java create mode 100644 src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950Codec.java new file mode 100644 index 0000000000..532a501537 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950Codec.java @@ -0,0 +1,58 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN950Codec; + +import lombok.Builder; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.opensearch.knn.index.codec.KNNCodecVersion; +import org.opensearch.knn.index.codec.KNNFormatFacade; + +public class KNN950Codec extends FilterCodec { + private static final KNNCodecVersion VERSION = KNNCodecVersion.V_9_5_0; + private final KNNFormatFacade knnFormatFacade; + private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat; + + /** + * No arg constructor that uses Lucene94 as the delegate + */ + public KNN950Codec() { + this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat()); + } + + /** + * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec + * and a unique name to this ctor. + * + * @param delegate codec that will perform all operations this codec does not override + * @param knnVectorsFormat per field format for KnnVector + */ + @Builder + protected KNN950Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) { + super(VERSION.getCodecName(), delegate); + knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate); + perFieldKnnVectorsFormat = knnVectorsFormat; + } + + @Override + public DocValuesFormat docValuesFormat() { + return knnFormatFacade.docValuesFormat(); + } + + @Override + public CompoundFormat compoundFormat() { + return knnFormatFacade.compoundFormat(); + } + + @Override + public KnnVectorsFormat knnVectorsFormat() { + return perFieldKnnVectorsFormat; + } +} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java new file mode 100644 index 0000000000..66dfcd46e4 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java @@ -0,0 +1,28 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN950Codec; + +import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; + +import java.util.Optional; + +/** + * Class provides per field format implementation for Lucene Knn vector type + */ +public class KNN950PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat { + + public KNN950PerFieldKnnVectorsFormat(final Optional mapperService) { + super( + mapperService, + Lucene95HnswVectorsFormat.DEFAULT_MAX_CONN, + Lucene95HnswVectorsFormat.DEFAULT_BEAM_WIDTH, + () -> new Lucene95HnswVectorsFormat(), + (maxConnm, beamWidth) -> new Lucene95HnswVectorsFormat(maxConnm, beamWidth) + ); + } +} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java index 0e68ec5803..65369ed822 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java @@ -11,6 +11,7 @@ import org.apache.lucene.backward_codecs.lucene92.Lucene92Codec; import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene95.Lucene95Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; @@ -20,6 +21,8 @@ import org.opensearch.knn.index.codec.KNN920Codec.KNN920PerFieldKnnVectorsFormat; import org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec; import org.opensearch.knn.index.codec.KNN940Codec.KNN940PerFieldKnnVectorsFormat; +import org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec; +import org.opensearch.knn.index.codec.KNN950Codec.KNN950PerFieldKnnVectorsFormat; import java.util.Optional; import java.util.function.BiFunction; @@ -74,9 +77,24 @@ public enum KNNCodecVersion { .knnVectorsFormat(new KNN940PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) .build(), KNN940Codec::new + ), + + V_9_5_0( + "KNN950Codec", + new Lucene95Codec(), + new KNN950PerFieldKnnVectorsFormat(Optional.empty()), + (delegate) -> new KNNFormatFacade( + new KNN80DocValuesFormat(delegate.docValuesFormat()), + new KNN80CompoundFormat(delegate.compoundFormat()) + ), + (userCodec, mapperService) -> KNN940Codec.builder() + .delegate(userCodec) + .knnVectorsFormat(new KNN950PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) + .build(), + KNN950Codec::new ); - private static final KNNCodecVersion CURRENT = V_9_4_0; + private static final KNNCodecVersion CURRENT = V_9_5_0; private final String codecName; private final Codec defaultCodecDelegate; diff --git a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 8185e7858a..5c44d5756f 100644 --- a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -4,4 +4,5 @@ org.opensearch.knn.index.codec.KNN86Codec.KNN86Codec org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec org.opensearch.knn.index.codec.KNN920Codec.KNN920Codec -org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec \ No newline at end of file +org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec +org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec \ No newline at end of file diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java new file mode 100644 index 0000000000..8a574fb49d --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN950Codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.knn.index.codec.KNNCodecTestCase; + +import java.io.IOException; +import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.function.Function; + +import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_5_0; + +public class KNN950CodecTests extends KNNCodecTestCase { + + public void testMultiFieldsKnnIndex() throws Exception { + testMultiFieldsKnnIndex(KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build()); + } + + public void testBuildFromModelTemplate() throws InterruptedException, ExecutionException, IOException { + testBuildFromModelTemplate((KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build())); + } + + public void testKnnVectorIndex() throws Exception { + Function perFieldKnnVectorsFormatProvider = ( + mapperService) -> new KNN950PerFieldKnnVectorsFormat(Optional.of(mapperService)); + + Function knnCodecProvider = (knnVectorFormat) -> KNN950Codec.builder() + .delegate(V_9_5_0.getDefaultCodecDelegate()) + .knnVectorsFormat(knnVectorFormat) + .build(); + + testKnnVectorIndex(knnCodecProvider, perFieldKnnVectorsFormatProvider); + } +} diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java index 4a17a1d6f5..2ec953b188 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java @@ -9,11 +9,13 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.backward_codecs.lucene91.Lucene91Codec; import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec; +import org.apache.lucene.codecs.lucene95.Lucene95Codec; import org.opensearch.knn.KNNTestCase; import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_1_0; import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_2_0; import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_4_0; +import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_5_0; public class KNNCodecFactoryTests extends KNNTestCase { @@ -35,6 +37,12 @@ public void testKNN940Codec() { assertNotNull(V_9_4_0.getKnnFormatFacadeSupplier().apply(V_9_4_0.getDefaultCodecDelegate())); } + public void testKNN950Codec() { + assertDelegateForVersion(V_9_5_0, Lucene95Codec.class); + assertNotNull(V_9_5_0.getPerFieldKnnVectorsFormat()); + assertNotNull(V_9_5_0.getKnnFormatFacadeSupplier().apply(V_9_5_0.getDefaultCodecDelegate())); + } + private void assertDelegateForVersion(final KNNCodecVersion codecVersion, final Class expectedCodecClass) { final Codec defaultDelegate = codecVersion.getDefaultCodecDelegate(); assertNotNull(defaultDelegate);