From 32dac1b124b6997a3045daeb31dcff01ab95a55c Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Wed, 11 Jan 2023 16:50:57 -0800 Subject: [PATCH] Add Lucene specific file extensions to core HybridFS (#721) * Add lucene vector specific file extensions for io with mmap Signed-off-by: Martin Gaievski (cherry picked from commit 8a2aa04d7b03edef40a7c43f00215e4bc906b082) --- .../opensearch-knn.release-notes-2.5.0.0.md | 1 + .../opensearch/knn/index/util/KNNEngine.java | 6 +++++ .../opensearch/knn/index/util/KNNLibrary.java | 11 +++++++++ .../org/opensearch/knn/index/util/Lucene.java | 6 +++++ .../org/opensearch/knn/plugin/KNNPlugin.java | 24 +++++++++++++++++++ .../knn/index/util/KNNEngineTests.java | 14 +++++++++++ .../knn/index/util/LuceneTests.java | 9 +++++++ 7 files changed, 71 insertions(+) diff --git a/release-notes/opensearch-knn.release-notes-2.5.0.0.md b/release-notes/opensearch-knn.release-notes-2.5.0.0.md index 397e860c3..25f7d3aab 100644 --- a/release-notes/opensearch-knn.release-notes-2.5.0.0.md +++ b/release-notes/opensearch-knn.release-notes-2.5.0.0.md @@ -5,6 +5,7 @@ Compatible with OpenSearch 2.5.0 ### Enhancements * Extend SystemIndexPlugin for k-NN model system index ([#630](https://github.com/opensearch-project/k-NN/pull/630)) +* Add Lucene specific file extensions to core HybridFS ([#721](https://github.com/opensearch-project/k-NN/pull/721)) ### Bug Fixes diff --git a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java index 0751b332a..fe28de43e 100644 --- a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java +++ b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java @@ -12,6 +12,7 @@ import org.opensearch.knn.index.KNNMethodContext; import org.opensearch.knn.index.SpaceType; +import java.util.List; import java.util.Map; import java.util.Set; @@ -176,4 +177,9 @@ public Boolean isInitialized() { public void setInitialized(Boolean isInitialized) { knnLibrary.setInitialized(isInitialized); } + + @Override + public List mmapFileExtensions() { + return knnLibrary.mmapFileExtensions(); + } } diff --git a/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java b/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java index 7cb14f7f0..b990ce33b 100644 --- a/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java +++ b/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java @@ -16,6 +16,8 @@ import org.opensearch.knn.index.KNNMethodContext; import org.opensearch.knn.index.SpaceType; +import java.util.Collections; +import java.util.List; import java.util.Map; /** @@ -113,4 +115,13 @@ public interface KNNLibrary { * @param isInitialized whether library has been initialized */ void setInitialized(Boolean isInitialized); + + /** + * Getter for mmap file extensions + * + * @return list of file extensions that will be read/write with mmap + */ + default List mmapFileExtensions() { + return Collections.EMPTY_LIST; + } } diff --git a/src/main/java/org/opensearch/knn/index/util/Lucene.java b/src/main/java/org/opensearch/knn/index/util/Lucene.java index 83f123969..bfa6cb040 100644 --- a/src/main/java/org/opensearch/knn/index/util/Lucene.java +++ b/src/main/java/org/opensearch/knn/index/util/Lucene.java @@ -13,6 +13,7 @@ import org.opensearch.knn.index.Parameter; import org.opensearch.knn.index.SpaceType; +import java.util.List; import java.util.Map; import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; @@ -73,4 +74,9 @@ public float score(float rawScore, SpaceType spaceType) { // score provided. return rawScore; } + + @Override + public List mmapFileExtensions() { + return List.of("vec", "vex"); + } } diff --git a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java index 4836c6c47..efa9065de 100644 --- a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java +++ b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java @@ -20,6 +20,7 @@ import org.opensearch.knn.index.query.KNNWeight; import org.opensearch.knn.index.codec.KNNCodecService; import org.opensearch.knn.index.memory.NativeMemoryLoadStrategy; +import org.opensearch.knn.index.util.KNNEngine; import org.opensearch.knn.indices.ModelGraveyard; import org.opensearch.knn.indices.ModelCache; import org.opensearch.knn.indices.ModelDao; @@ -104,6 +105,8 @@ import java.util.Map; import java.util.Optional; import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.Collections.singletonList; import static org.opensearch.knn.common.KNNConstants.KNN_THREAD_POOL_PREFIX; @@ -338,4 +341,25 @@ public List getNamedXContent() { public Collection getSystemIndexDescriptors(Settings settings) { return ImmutableList.of(new SystemIndexDescriptor(MODEL_INDEX_NAME, "Index for storing models used for k-NN indices")); } + + /** + * Plugin can provide additional node settings, that includes new settings or overrides for existing one from core. + * + * @return settings that are set by plugin + */ + @Override + public Settings additionalSettings() { + // We add engine specific extensions to the core list for HybridFS store type. We read existing values + // and append ours because in core setting will be replaced by override. + // Values are set as cluster defaults and are used at index creation time. Index specific overrides will take priority over values + // that are set here. + final List engineSettings = Arrays.stream(KNNEngine.values()) + .flatMap(engine -> engine.mmapFileExtensions().stream()) + .collect(Collectors.toList()); + final List combinedSettings = Stream.concat( + IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getDefault(Settings.EMPTY).stream(), + engineSettings.stream() + ).collect(Collectors.toList()); + return Settings.builder().putList(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getKey(), combinedSettings).build(); + } } diff --git a/src/test/java/org/opensearch/knn/index/util/KNNEngineTests.java b/src/test/java/org/opensearch/knn/index/util/KNNEngineTests.java index af9feeb9e..bed0b7908 100644 --- a/src/test/java/org/opensearch/knn/index/util/KNNEngineTests.java +++ b/src/test/java/org/opensearch/knn/index/util/KNNEngineTests.java @@ -8,6 +8,10 @@ import org.opensearch.knn.KNNTestCase; import org.opensearch.knn.common.KNNConstants; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + public class KNNEngineTests extends KNNTestCase { /** * Check that version from engine and library match @@ -47,4 +51,14 @@ public void testGetEngineFromPath() { String invalidPath = "test.invalid"; expectThrows(IllegalArgumentException.class, () -> KNNEngine.getEngineNameFromPath(invalidPath)); } + + public void testMmapFileExtensions() { + final List mmapExtensions = Arrays.stream(KNNEngine.values()) + .flatMap(engine -> engine.mmapFileExtensions().stream()) + .collect(Collectors.toList()); + assertNotNull(mmapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(mmapExtensions)); + assertTrue(mmapExtensions.containsAll(expectedSettings)); + } } diff --git a/src/test/java/org/opensearch/knn/index/util/LuceneTests.java b/src/test/java/org/opensearch/knn/index/util/LuceneTests.java index bbf43a30a..f5c8e45b9 100644 --- a/src/test/java/org/opensearch/knn/index/util/LuceneTests.java +++ b/src/test/java/org/opensearch/knn/index/util/LuceneTests.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.util.Collections; +import java.util.List; import java.util.Map; import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; @@ -114,4 +115,12 @@ public void testVersion() { Lucene luceneInstance = Lucene.INSTANCE; assertEquals(Version.LATEST.toString(), luceneInstance.getVersion()); } + + public void testMmapFileExtensions() { + final List luceneMmapExtensions = Lucene.INSTANCE.mmapFileExtensions(); + assertNotNull(luceneMmapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(luceneMmapExtensions)); + assertTrue(luceneMmapExtensions.containsAll(expectedSettings)); + } }