From 6d27cec3f7f3108e49a013e6ea849b91b1d19a4a Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Mon, 28 Aug 2023 09:58:21 -0700 Subject: [PATCH] Added max distance computation logic to enhance the switch to exact search in filtered Nearest Neighbor Search. Signed-off-by: Navneet Verma --- CHANGELOG.md | 1 + .../opensearch/knn/common/KNNConstants.java | 5 +++ .../org/opensearch/knn/index/KNNSettings.java | 30 ++------------ .../opensearch/knn/index/query/KNNWeight.java | 30 ++++++++++---- .../knn/index/KNNSettingsTests.java | 39 ++++--------------- .../knn/index/query/KNNWeightTests.java | 3 -- 6 files changed, 39 insertions(+), 69 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ad388902..b6f37f6d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Enhancements * Enabled the IVF algorithm to work with Filters of K-NN Query. [#1013](https://github.com/opensearch-project/k-NN/pull/1013) * Improved the logic to switch to exact search for restrictive filters search for better recall. [#1059](https://github.com/opensearch-project/k-NN/pull/1059) +* Added max distance computation logic to enhance the switch to exact search in filtered Nearest Neighbor Search. [#1066](https://github.com/opensearch-project/k-NN/pull/1066) ### Bug Fixes ### Infrastructure ### Documentation diff --git a/src/main/java/org/opensearch/knn/common/KNNConstants.java b/src/main/java/org/opensearch/knn/common/KNNConstants.java index 63529fccc..d7835a9c2 100644 --- a/src/main/java/org/opensearch/knn/common/KNNConstants.java +++ b/src/main/java/org/opensearch/knn/common/KNNConstants.java @@ -101,4 +101,9 @@ public class KNNConstants { // API Constants public static final String CLEAR_CACHE = "clear_cache"; + + // Filtered Search Constants + // Please refer this github issue for more details for choosing this value: + // https://github.com/opensearch-project/k-NN/issues/1049#issuecomment-1694741092 + public static int MAX_DISTANCE_COMPUTATIONS = 2048000; } diff --git a/src/main/java/org/opensearch/knn/index/KNNSettings.java b/src/main/java/org/opensearch/knn/index/KNNSettings.java index 7c063c367..14a69f317 100644 --- a/src/main/java/org/opensearch/knn/index/KNNSettings.java +++ b/src/main/java/org/opensearch/knn/index/KNNSettings.java @@ -74,7 +74,6 @@ public class KNNSettings { public static final String MODEL_INDEX_NUMBER_OF_REPLICAS = "knn.model.index.number_of_replicas"; public static final String MODEL_CACHE_SIZE_LIMIT = "knn.model.cache.size.limit"; public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD = "index.knn.advanced.filtered_exact_search_threshold"; - public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT = "index.knn.advanced.filtered_exact_search_threshold_pct"; /** * Default setting values @@ -89,8 +88,7 @@ public class KNNSettings { public static final Integer KNN_MAX_MODEL_CACHE_SIZE_LIMIT_PERCENTAGE = 25; // Model cache limit cannot exceed 25% of the JVM heap public static final String KNN_DEFAULT_MEMORY_CIRCUIT_BREAKER_LIMIT = "50%"; - public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE = 2000; - public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE = 10; + public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE = -1; /** * Settings Definition @@ -162,15 +160,6 @@ public class KNNSettings { public static final Setting ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING = Setting.intSetting( ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE, - 0, - IndexScope, - Setting.Property.Dynamic - ); - - public static final Setting ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING = Setting.intSetting( - ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, - ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE, - 0, IndexScope, Setting.Property.Dynamic ); @@ -348,10 +337,6 @@ private Setting getSetting(String key) { return ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING; } - if (ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT.equals(key)) { - return ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING; - } - throw new IllegalArgumentException("Cannot find setting by key [" + key + "]"); } @@ -368,8 +353,7 @@ public List> getSettings() { MODEL_INDEX_NUMBER_OF_SHARDS_SETTING, MODEL_INDEX_NUMBER_OF_REPLICAS_SETTING, MODEL_CACHE_SIZE_LIMIT_SETTING, - ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING, - ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING + ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING ); return Stream.concat(settings.stream(), dynamicCacheSettings.values().stream()).collect(Collectors.toList()); } @@ -390,7 +374,7 @@ public static double getCircuitBreakerUnsetPercentage() { return KNNSettings.state().getSettingValue(KNNSettings.KNN_CIRCUIT_BREAKER_UNSET_PERCENTAGE); } - public static int getFilteredExactSearchThreshold(final String indexName) { + public static Integer getFilteredExactSearchThreshold(final String indexName) { return KNNSettings.state().clusterService.state() .getMetadata() .index(indexName) @@ -398,14 +382,6 @@ public static int getFilteredExactSearchThreshold(final String indexName) { .getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE); } - public static int getFilteredExactSearchThresholdPct(final String indexName) { - return KNNSettings.state().clusterService.state() - .getMetadata() - .index(indexName) - .getSettings() - .getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE); - } - public void initialize(Client client, ClusterService clusterService) { this.client = client; this.clusterService = clusterService; diff --git a/src/main/java/org/opensearch/knn/index/query/KNNWeight.java b/src/main/java/org/opensearch/knn/index/query/KNNWeight.java index 7352dd436..4778ba25d 100644 --- a/src/main/java/org/opensearch/knn/index/query/KNNWeight.java +++ b/src/main/java/org/opensearch/knn/index/query/KNNWeight.java @@ -375,18 +375,32 @@ private SpaceType getSpaceType(final FieldInfo fieldInfo) { private boolean canDoExactSearch(final int filterIdsCount, final int searchableDocs) { log.debug( - "Info for doing exact search Live Docs: {}, filterIdsLength : {}, Threshold value: {} , Threshold %age : {}", + "Info for doing exact search Live Docs: {}, filterIdsLength : {}, Threshold value: {}", searchableDocs, filterIdsCount, - KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName()), - KNNSettings.getFilteredExactSearchThresholdPct(knnQuery.getIndexName()) + KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName()) ); + int filterThresholdValue = KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName()); // Refer this GitHub around more details https://github.com/opensearch-project/k-NN/issues/1049 on the logic - return filterIdsCount <= knnQuery.getK() - || (filterIdsCount <= KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName()) - && (((float) filterIdsCount / (float) searchableDocs) * 100) <= (float) KNNSettings.getFilteredExactSearchThresholdPct( - knnQuery.getIndexName() - )); + if (filterIdsCount <= knnQuery.getK()) { + return true; + } + // See user has defined Exact Search filtered threshold. if yes, then use that setting. + if (isExactSearchThresholdSettingSet(filterThresholdValue)) { + return filterThresholdValue >= filterIdsCount; + } + // if no setting is set, then use the default max distance computation value to see if we can do exact search. + return KNNConstants.MAX_DISTANCE_COMPUTATIONS <= filterIdsCount * knnQuery.getQueryVector().length; + } + + /** + * This function validates if {@link KNNSettings#ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD} is set or not. This + * is done by validating if the setting value is equal to the default value. + * @param filterThresholdValue value of the Index Setting: {@link KNNSettings#ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING} + * @return boolean true if the setting is set. + */ + private boolean isExactSearchThresholdSettingSet(int filterThresholdValue) { + return filterThresholdValue != KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE; } /** diff --git a/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java b/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java index 9432be33e..07b2aa20a 100644 --- a/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java +++ b/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java @@ -6,7 +6,6 @@ package org.opensearch.knn.index; import lombok.SneakyThrows; -import org.junit.Assert; import org.opensearch.action.admin.cluster.state.ClusterStateRequest; import org.opensearch.action.admin.indices.create.CreateIndexRequest; import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; @@ -85,19 +84,15 @@ public void testFilteredSearchAdvanceSetting_whenNoValuesProvidedByUsers_thenDef mockNode.client().admin().indices().create(new CreateIndexRequest(INDEX_NAME)).actionGet(); KNNSettings.state().setClusterService(clusterService); - int filteredSearchThresholdPct = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME); - int filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME); + Integer filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME); mockNode.close(); - assertEquals((int) KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE, filteredSearchThresholdPct); - assertEquals((int) KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE, filteredSearchThreshold); + assertEquals(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE, filteredSearchThreshold); assertWarnings(); } @SneakyThrows public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValidateSameValues() { - int userDefinedPctThreshold = 20; int userDefinedThreshold = 1000; - int userDefinedPctThresholdMinValue = 0; int userDefinedThresholdMinValue = 0; Node mockNode = createMockNode(Collections.emptyMap()); mockNode.start(); @@ -108,7 +103,6 @@ public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValid final Settings filteredSearchAdvanceSettings = Settings.builder() .put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThreshold) - .put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, userDefinedPctThreshold) .build(); mockNode.client() @@ -117,40 +111,23 @@ public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValid .updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettings, INDEX_NAME)) .actionGet(); - int filteredSearchThresholdPct = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME); int filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME); // validate if we are able to set MinValues for the setting final Settings filteredSearchAdvanceSettingsWithMinValues = Settings.builder() - .put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThresholdMinValue) - .put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, userDefinedPctThresholdMinValue) - .build(); + .put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThresholdMinValue) + .build(); mockNode.client() - .admin() - .indices() - .updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithMinValues, INDEX_NAME)) - .actionGet(); + .admin() + .indices() + .updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithMinValues, INDEX_NAME)) + .actionGet(); - int filteredSearchThresholdPctMinValue = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME); int filteredSearchThresholdMinValue = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME); - // Validate if less than MinValues are set then Exception Happens - final Settings filteredSearchAdvanceSettingsWithLessThanMinValues = Settings.builder() - .put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, -1) - .put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, -1) - .build(); - - Assert.assertThrows(IllegalArgumentException.class, () -> mockNode.client() - .admin() - .indices() - .updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithLessThanMinValues, INDEX_NAME)) - .actionGet()); - mockNode.close(); - assertEquals(userDefinedPctThreshold, filteredSearchThresholdPct); assertEquals(userDefinedThreshold, filteredSearchThreshold); - assertEquals(userDefinedPctThresholdMinValue, filteredSearchThresholdPctMinValue); assertEquals(userDefinedThresholdMinValue, filteredSearchThresholdMinValue); assertWarnings(); } diff --git a/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java b/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java index 81c1d16e7..dcc836b94 100644 --- a/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java +++ b/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java @@ -125,7 +125,6 @@ public static void setUpClass() throws Exception { @Before public void setupBeforeTest() { knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME)).thenReturn(0); - knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME)).thenReturn(0); } @SneakyThrows @@ -471,7 +470,6 @@ public void testANNWithFilterQuery_whenExactSearch_thenSuccess() { @SneakyThrows public void testANNWithFilterQuery_whenExactSearchViaThresholdSetting_thenSuccess() { knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME)).thenReturn(10); - knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME)).thenReturn(10); float[] vector = new float[] { 0.1f, 0.3f }; int k = 1; final int[] filterDocIds = new int[] { 0, 1, 2, 3, 4, 5 }; @@ -487,7 +485,6 @@ public void testANNWithFilterQuery_whenExactSearchViaThresholdSetting_thenSucces when(filterScorer.iterator()).thenReturn(DocIdSetIterator.all(filterDocIds.length)); - final KNNQuery query = new KNNQuery(FIELD_NAME, QUERY_VECTOR, k, INDEX_NAME, FILTER_QUERY); final KNNWeight knnWeight = new KNNWeight(query, 0.0f, filterQueryWeight);