Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added max distance computation logic to enhance the switch to exact search in filtered Nearest Neighbor Search. #1066

Merged
merged 1 commit into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Enhancements
* Enabled the IVF algorithm to work with Filters of K-NN Query. [#1013](https://github.com/opensearch-project/k-NN/pull/1013)
* Improved the logic to switch to exact search for restrictive filters search for better recall. [#1059](https://github.com/opensearch-project/k-NN/pull/1059)
* Added max distance computation logic to enhance the switch to exact search in filtered Nearest Neighbor Search. [#1066](https://github.com/opensearch-project/k-NN/pull/1066)
### Bug Fixes
### Infrastructure
### Documentation
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,9 @@ public class KNNConstants {

// API Constants
public static final String CLEAR_CACHE = "clear_cache";

// Filtered Search Constants
// Please refer this github issue for more details for choosing this value:
// https://github.com/opensearch-project/k-NN/issues/1049#issuecomment-1694741092
public static int MAX_DISTANCE_COMPUTATIONS = 2048000;
}
30 changes: 3 additions & 27 deletions src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ public class KNNSettings {
public static final String MODEL_INDEX_NUMBER_OF_REPLICAS = "knn.model.index.number_of_replicas";
public static final String MODEL_CACHE_SIZE_LIMIT = "knn.model.cache.size.limit";
public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD = "index.knn.advanced.filtered_exact_search_threshold";
public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT = "index.knn.advanced.filtered_exact_search_threshold_pct";

/**
* Default setting values
Expand All @@ -89,8 +88,7 @@ public class KNNSettings {
public static final Integer KNN_MAX_MODEL_CACHE_SIZE_LIMIT_PERCENTAGE = 25; // Model cache limit cannot exceed 25% of the JVM heap
public static final String KNN_DEFAULT_MEMORY_CIRCUIT_BREAKER_LIMIT = "50%";

public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE = 2000;
public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE = 10;
navneet1v marked this conversation as resolved.
Show resolved Hide resolved
public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE = -1;

/**
* Settings Definition
Expand Down Expand Up @@ -162,15 +160,6 @@ public class KNNSettings {
public static final Setting<Integer> ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING = Setting.intSetting(
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE,
0,
IndexScope,
Setting.Property.Dynamic
);

public static final Setting<Integer> ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING = Setting.intSetting(
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE,
0,
IndexScope,
Setting.Property.Dynamic
);
Expand Down Expand Up @@ -348,10 +337,6 @@ private Setting<?> getSetting(String key) {
return ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING;
}

if (ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT.equals(key)) {
return ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING;
}

throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
}

Expand All @@ -368,8 +353,7 @@ public List<Setting<?>> getSettings() {
MODEL_INDEX_NUMBER_OF_SHARDS_SETTING,
MODEL_INDEX_NUMBER_OF_REPLICAS_SETTING,
MODEL_CACHE_SIZE_LIMIT_SETTING,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING
);
return Stream.concat(settings.stream(), dynamicCacheSettings.values().stream()).collect(Collectors.toList());
}
Expand All @@ -390,22 +374,14 @@ public static double getCircuitBreakerUnsetPercentage() {
return KNNSettings.state().getSettingValue(KNNSettings.KNN_CIRCUIT_BREAKER_UNSET_PERCENTAGE);
}

public static int getFilteredExactSearchThreshold(final String indexName) {
public static Integer getFilteredExactSearchThreshold(final String indexName) {
return KNNSettings.state().clusterService.state()
.getMetadata()
.index(indexName)
.getSettings()
.getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE);
}

public static int getFilteredExactSearchThresholdPct(final String indexName) {
return KNNSettings.state().clusterService.state()
.getMetadata()
.index(indexName)
.getSettings()
.getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE);
}

public void initialize(Client client, ClusterService clusterService) {
this.client = client;
this.clusterService = clusterService;
Expand Down
30 changes: 22 additions & 8 deletions src/main/java/org/opensearch/knn/index/query/KNNWeight.java
Original file line number Diff line number Diff line change
Expand Up @@ -375,18 +375,32 @@ private SpaceType getSpaceType(final FieldInfo fieldInfo) {

private boolean canDoExactSearch(final int filterIdsCount, final int searchableDocs) {
log.debug(
"Info for doing exact search Live Docs: {}, filterIdsLength : {}, Threshold value: {} , Threshold %age : {}",
"Info for doing exact search Live Docs: {}, filterIdsLength : {}, Threshold value: {}",
searchableDocs,
filterIdsCount,
KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName()),
KNNSettings.getFilteredExactSearchThresholdPct(knnQuery.getIndexName())
KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName())
);
int filterThresholdValue = KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName());
// Refer this GitHub around more details https://github.com/opensearch-project/k-NN/issues/1049 on the logic
return filterIdsCount <= knnQuery.getK()
|| (filterIdsCount <= KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName())
&& (((float) filterIdsCount / (float) searchableDocs) * 100) <= (float) KNNSettings.getFilteredExactSearchThresholdPct(
knnQuery.getIndexName()
));
if (filterIdsCount <= knnQuery.getK()) {
return true;
}
// See user has defined Exact Search filtered threshold. if yes, then use that setting.
if (isExactSearchThresholdSettingSet(filterThresholdValue)) {
return filterThresholdValue >= filterIdsCount;
}
// if no setting is set, then use the default max distance computation value to see if we can do exact search.
return KNNConstants.MAX_DISTANCE_COMPUTATIONS <= filterIdsCount * knnQuery.getQueryVector().length;
}

/**
* This function validates if {@link KNNSettings#ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD} is set or not. This
* is done by validating if the setting value is equal to the default value.
* @param filterThresholdValue value of the Index Setting: {@link KNNSettings#ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING}
* @return boolean true if the setting is set.
*/
private boolean isExactSearchThresholdSettingSet(int filterThresholdValue) {
return filterThresholdValue != KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE;
}

/**
Expand Down
39 changes: 8 additions & 31 deletions src/test/java/org/opensearch/knn/index/KNNSettingsTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
package org.opensearch.knn.index;

import lombok.SneakyThrows;
import org.junit.Assert;
import org.opensearch.action.admin.cluster.state.ClusterStateRequest;
import org.opensearch.action.admin.indices.create.CreateIndexRequest;
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
Expand Down Expand Up @@ -85,19 +84,15 @@ public void testFilteredSearchAdvanceSetting_whenNoValuesProvidedByUsers_thenDef
mockNode.client().admin().indices().create(new CreateIndexRequest(INDEX_NAME)).actionGet();
KNNSettings.state().setClusterService(clusterService);

int filteredSearchThresholdPct = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME);
int filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME);
Integer filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME);
mockNode.close();
assertEquals((int) KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE, filteredSearchThresholdPct);
assertEquals((int) KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE, filteredSearchThreshold);
assertEquals(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE, filteredSearchThreshold);
assertWarnings();
}

@SneakyThrows
public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValidateSameValues() {
int userDefinedPctThreshold = 20;
int userDefinedThreshold = 1000;
int userDefinedPctThresholdMinValue = 0;
int userDefinedThresholdMinValue = 0;
Node mockNode = createMockNode(Collections.emptyMap());
mockNode.start();
Expand All @@ -108,7 +103,6 @@ public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValid

final Settings filteredSearchAdvanceSettings = Settings.builder()
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThreshold)
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, userDefinedPctThreshold)
.build();

mockNode.client()
Expand All @@ -117,40 +111,23 @@ public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValid
.updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettings, INDEX_NAME))
.actionGet();

int filteredSearchThresholdPct = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME);
int filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME);

// validate if we are able to set MinValues for the setting
final Settings filteredSearchAdvanceSettingsWithMinValues = Settings.builder()
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThresholdMinValue)
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, userDefinedPctThresholdMinValue)
.build();
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThresholdMinValue)
.build();

mockNode.client()
.admin()
.indices()
.updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithMinValues, INDEX_NAME))
.actionGet();
.admin()
.indices()
.updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithMinValues, INDEX_NAME))
.actionGet();

int filteredSearchThresholdPctMinValue = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME);
int filteredSearchThresholdMinValue = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME);

// Validate if less than MinValues are set then Exception Happens
final Settings filteredSearchAdvanceSettingsWithLessThanMinValues = Settings.builder()
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, -1)
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, -1)
.build();

Assert.assertThrows(IllegalArgumentException.class, () -> mockNode.client()
.admin()
.indices()
.updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithLessThanMinValues, INDEX_NAME))
.actionGet());

mockNode.close();
assertEquals(userDefinedPctThreshold, filteredSearchThresholdPct);
assertEquals(userDefinedThreshold, filteredSearchThreshold);
assertEquals(userDefinedPctThresholdMinValue, filteredSearchThresholdPctMinValue);
assertEquals(userDefinedThresholdMinValue, filteredSearchThresholdMinValue);
assertWarnings();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ public static void setUpClass() throws Exception {
@Before
public void setupBeforeTest() {
knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME)).thenReturn(0);
knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME)).thenReturn(0);
}

@SneakyThrows
Expand Down Expand Up @@ -471,7 +470,6 @@ public void testANNWithFilterQuery_whenExactSearch_thenSuccess() {
@SneakyThrows
public void testANNWithFilterQuery_whenExactSearchViaThresholdSetting_thenSuccess() {
knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME)).thenReturn(10);
knnSettingsMockedStatic.when(() -> KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME)).thenReturn(10);
float[] vector = new float[] { 0.1f, 0.3f };
int k = 1;
final int[] filterDocIds = new int[] { 0, 1, 2, 3, 4, 5 };
Expand All @@ -487,7 +485,6 @@ public void testANNWithFilterQuery_whenExactSearchViaThresholdSetting_thenSucces

when(filterScorer.iterator()).thenReturn(DocIdSetIterator.all(filterDocIds.length));


final KNNQuery query = new KNNQuery(FIELD_NAME, QUERY_VECTOR, k, INDEX_NAME, FILTER_QUERY);

final KNNWeight knnWeight = new KNNWeight(query, 0.0f, filterQueryWeight);
Expand Down