From 083ea2ba44a79eb9c592ff61253cc1aa65158888 Mon Sep 17 00:00:00 2001 From: Junqiu Lei Date: Wed, 13 Dec 2023 10:13:36 -0800 Subject: [PATCH] Increase Lucene max dimension limit to 16,000 (#1346) * Increase Lucene max dimension limit to 16,000 Signed-off-by: Junqiu Lei --- CHANGELOG.md | 1 + .../index/codec/BasePerFieldKnnVectorsFormat.java | 5 +++++ .../KNN950Codec/KNN950PerFieldKnnVectorsFormat.java | 12 ++++++++++++ .../knn/index/mapper/LuceneFieldMapper.java | 7 ++----- .../org/opensearch/knn/index/util/KNNEngine.java | 3 +-- .../opensearch/knn/index/codec/KNNCodecTestCase.java | 2 ++ .../knn/index/mapper/KNNVectorFieldMapperTests.java | 4 ++-- 7 files changed, 25 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfd852424..4e53ecc62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Features * Add parent join support for lucene knn [#1182](https://github.com/opensearch-project/k-NN/pull/1182) ### Enhancements +* Increase Lucene max dimension limit to 16,000 [#1346](https://github.com/opensearch-project/k-NN/pull/1346) ### Bug Fixes * Fix use-after-free case on nmslib search path [#1305](https://github.com/opensearch-project/k-NN/pull/1305) * Allow nested knn field mapping when train model [#1318](https://github.com/opensearch-project/k-NN/pull/1318) diff --git a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java index d10ad9821..c8eb22a97 100644 --- a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java @@ -59,6 +59,11 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { return formatSupplier.apply(maxConnections, beamWidth); } + @Override + public int getMaxDimensions(String fieldName) { + return getKnnVectorsFormatForField(fieldName).getMaxDimensions(fieldName); + } + private boolean isKnnVectorFieldType(final String field) { return mapperService.isPresent() && mapperService.get().fieldType(field) instanceof KNNVectorFieldMapper.KNNVectorFieldType; } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java index 66dfcd46e..d9091b2a7 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java @@ -8,6 +8,7 @@ import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; +import org.opensearch.knn.index.util.KNNEngine; import java.util.Optional; @@ -25,4 +26,15 @@ public KNN950PerFieldKnnVectorsFormat(final Optional mapperServic (maxConnm, beamWidth) -> new Lucene95HnswVectorsFormat(maxConnm, beamWidth) ); } + + @Override + /** + * This method returns the maximum dimension allowed from KNNEngine for Lucene codec + * + * @param fieldName Name of the field, ignored + * @return Maximum constant dimension set by KNNEngine + */ + public int getMaxDimensions(String fieldName) { + return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE); + } } diff --git a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java index b28b93028..173f057e6 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java @@ -23,7 +23,6 @@ import java.util.Locale; import java.util.Optional; -import org.apache.lucene.codecs.KnnVectorsFormat; import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD; import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.addStoredFieldForVectorField; import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.buildDocValuesFieldType; @@ -33,8 +32,6 @@ */ public class LuceneFieldMapper extends KNNVectorFieldMapper { - private static final int LUCENE_MAX_DIMENSION = KnnVectorsFormat.DEFAULT_MAX_DIMENSIONS; - /** FieldType used for initializing VectorField, which is used for creating binary doc values. **/ private final FieldType vectorFieldType; private final VectorDataType vectorDataType; @@ -55,12 +52,12 @@ public class LuceneFieldMapper extends KNNVectorFieldMapper { final VectorSimilarityFunction vectorSimilarityFunction = this.knnMethod.getSpaceType().getVectorSimilarityFunction(); final int dimension = input.getMappedFieldType().getDimension(); - if (dimension > LUCENE_MAX_DIMENSION) { + if (dimension > KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE)) { throw new IllegalArgumentException( String.format( Locale.ROOT, "Dimension value cannot be greater than [%s] but got [%s] for vector [%s]", - LUCENE_MAX_DIMENSION, + KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE), dimension, input.getName() ) diff --git a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java index 197bb87f3..8d03d9a9e 100644 --- a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java +++ b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java @@ -6,7 +6,6 @@ package org.opensearch.knn.index.util; import com.google.common.collect.ImmutableSet; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.opensearch.common.ValidationException; import org.opensearch.knn.index.KNNMethod; import org.opensearch.knn.index.KNNMethodContext; @@ -40,7 +39,7 @@ public enum KNNEngine implements KNNLibrary { KNNEngine.FAISS, 16_000, KNNEngine.LUCENE, - KnnVectorsFormat.DEFAULT_MAX_DIMENSIONS + 16_000 ); /** diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java index eb9b4fa2d..40309027d 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java @@ -338,6 +338,7 @@ public void testKnnVectorIndex( writer.close(); verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getKnnVectorsFormatForField(eq(FIELD_NAME_ONE)); + verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getMaxDimensions(eq(FIELD_NAME_ONE)); IndexSearcher searcher = new IndexSearcher(reader); Query query = KNNQueryFactory.create( @@ -372,6 +373,7 @@ public void testKnnVectorIndex( NativeMemoryLoadStrategy.IndexLoadStrategy.initialize(resourceWatcherService); verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getKnnVectorsFormatForField(eq(FIELD_NAME_TWO)); + verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getMaxDimensions(eq(FIELD_NAME_TWO)); IndexSearcher searcher1 = new IndexSearcher(reader1); Query query1 = KNNQueryFactory.create( diff --git a/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java b/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java index 2de98d803..4ed231063 100644 --- a/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java +++ b/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java @@ -301,7 +301,7 @@ public void testTypeParser_parse_fromKnnMethodContext_invalidDimension() throws XContentBuilder xContentBuilderOverMaxDimension = XContentFactory.jsonBuilder() .startObject() .field(TYPE_FIELD_NAME, KNN_VECTOR_TYPE) - .field(DIMENSION_FIELD_NAME, 2000) + .field(DIMENSION_FIELD_NAME, 20000) .startObject(KNN_METHOD) .field(NAME, METHOD_HNSW) .field(METHOD_PARAMETER_SPACE_TYPE, SpaceType.L2) @@ -321,7 +321,7 @@ public void testTypeParser_parse_fromKnnMethodContext_invalidDimension() throws IllegalArgumentException.class, () -> builderOverMaxDimension.build(new Mapper.BuilderContext(settings, new ContentPath())) ); - assertEquals("Dimension value cannot be greater than 1024 for vector: test-field-name", ex.getMessage()); + assertEquals("Dimension value cannot be greater than 16000 for vector: test-field-name", ex.getMessage()); XContentBuilder xContentBuilderInvalidDimension = XContentFactory.jsonBuilder() .startObject()