Skip to content

Commit

Permalink
Improved the logic to switch to exact search for restrictive filters …
Browse files Browse the repository at this point in the history
…search.

This change includes:
* adding 2 extra advanced K-NN settings on when to do exact search for users to tune.

Signed-off-by: Navneet Verma <[email protected]>
  • Loading branch information
navneet1v committed Aug 23, 2023
1 parent 3df8308 commit 2a0a21b
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 8 deletions.
49 changes: 48 additions & 1 deletion src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public class KNNSettings {
public static final String MODEL_INDEX_NUMBER_OF_SHARDS = "knn.model.index.number_of_shards";
public static final String MODEL_INDEX_NUMBER_OF_REPLICAS = "knn.model.index.number_of_replicas";
public static final String MODEL_CACHE_SIZE_LIMIT = "knn.model.cache.size.limit";
public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD = "index.knn.advanced.filtered_exact_search_threshold";
public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT = "index.knn.advanced.filtered_exact_search_threshold_pct";

/**
* Default setting values
Expand All @@ -87,6 +89,9 @@ public class KNNSettings {
public static final Integer KNN_MAX_MODEL_CACHE_SIZE_LIMIT_PERCENTAGE = 25; // Model cache limit cannot exceed 25% of the JVM heap
public static final String KNN_DEFAULT_MEMORY_CIRCUIT_BREAKER_LIMIT = "50%";

public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE = 2000;
public static final Integer ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE = 10;

/**
* Settings Definition
*/
Expand Down Expand Up @@ -154,6 +159,22 @@ public class KNNSettings {
Setting.Property.Dynamic
);

public static final Setting<Integer> ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING = Setting.intSetting(
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE,
0,
IndexScope,
Setting.Property.Dynamic
);

public static final Setting<Integer> ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING = Setting.intSetting(
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE,
0,
IndexScope,
Setting.Property.Dynamic
);

public static final Setting<ByteSizeValue> MODEL_CACHE_SIZE_LIMIT_SETTING = new Setting<>(
MODEL_CACHE_SIZE_LIMIT,
percentageAsString(KNN_DEFAULT_MODEL_CACHE_SIZE_LIMIT_PERCENTAGE),
Expand Down Expand Up @@ -323,6 +344,14 @@ private Setting<?> getSetting(String key) {
return KNN_ALGO_PARAM_INDEX_THREAD_QTY_SETTING;
}

if (ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD.equals(key)) {
return ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING;
}

if (ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT.equals(key)) {
return ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING;
}

throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
}

Expand All @@ -338,7 +367,9 @@ public List<Setting<?>> getSettings() {
IS_KNN_INDEX_SETTING,
MODEL_INDEX_NUMBER_OF_SHARDS_SETTING,
MODEL_INDEX_NUMBER_OF_REPLICAS_SETTING,
MODEL_CACHE_SIZE_LIMIT_SETTING
MODEL_CACHE_SIZE_LIMIT_SETTING,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_SETTING
);
return Stream.concat(settings.stream(), dynamicCacheSettings.values().stream()).collect(Collectors.toList());
}
Expand All @@ -359,6 +390,22 @@ public static double getCircuitBreakerUnsetPercentage() {
return KNNSettings.state().getSettingValue(KNNSettings.KNN_CIRCUIT_BREAKER_UNSET_PERCENTAGE);
}

public static int getFilteredExactSearchThreshold(final String indexName) {
return KNNSettings.state().clusterService.state()
.getMetadata()
.index(indexName)
.getSettings()
.getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE);
}

public static int getFilteredExactSearchThresholdPct(final String indexName) {
return KNNSettings.state().clusterService.state()
.getMetadata()
.index(indexName)
.getSettings()
.getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE);
}

public void initialize(Client client, ClusterService clusterService) {
this.client = client;
this.clusterService = clusterService;
Expand Down
44 changes: 41 additions & 3 deletions src/main/java/org/opensearch/knn/index/query/KNNWeight.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.util.KNNVectorSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
Expand Down Expand Up @@ -115,13 +116,16 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
* . Hence, if filtered results are less than K and filter query is present we should shift to exact search.
* This improves the recall.
*/
if (filterWeight != null && filterIdsArray.length <= knnQuery.getK()) {
if (filterWeight != null && canDoExactSearch(filterIdsArray.length, getTotalDocsInSegment(context))) {
docIdsToScoreMap.putAll(doExactSearch(context, filterIdsArray));
} else {
final Map<Integer, Float> annResults = doANNSearch(context, filterIdsArray);
Map<Integer, Float> annResults = doANNSearch(context, filterIdsArray);
if (annResults == null) {
return null;
}
if (canDoExactSearchAfterANNSearch(filterIdsArray.length, annResults.size())) {
annResults = doExactSearch(context, filterIdsArray);
}
docIdsToScoreMap.putAll(annResults);
}
if (docIdsToScoreMap.isEmpty()) {
Expand Down Expand Up @@ -170,7 +174,6 @@ private int[] getFilterIdsArray(final LeafReaderContext context) throws IOExcept
if (docId == DocIdSetIterator.NO_MORE_DOCS || docId + 1 == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
log.debug("Docs in filtered docs id set is : {}", docId);
filteredIds[filteredIdsIndex] = docId;
filteredIdsIndex++;
docId++;
Expand Down Expand Up @@ -369,4 +372,39 @@ private SpaceType getSpaceType(final FieldInfo fieldInfo) {
String.format(Locale.ROOT, "Unable to find the Space Type from Field Info attribute for field %s", fieldInfo.getName())
);
}

private boolean canDoExactSearch(final int filterIdsCount, final int searchableDocs) {
log.info(
"Info for doing exact search Live Docs: {}, filterIdsLength : {}, Threshold value: {} , Threshold %age : {}",
searchableDocs,
filterIdsCount,
KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName()),
KNNSettings.getFilteredExactSearchThresholdPct(knnQuery.getIndexName())
);
// Refer this GitHub around more details https://github.com/opensearch-project/k-NN/issues/1049 on the logic
return filterIdsCount <= knnQuery.getK()
|| (filterIdsCount <= KNNSettings.getFilteredExactSearchThreshold(knnQuery.getIndexName())
&& (((float) filterIdsCount / (float) searchableDocs) * 100) <= (float) KNNSettings.getFilteredExactSearchThresholdPct(
knnQuery.getIndexName()
));
}

/**
* This condition mainly checks during filtered search we have more than K elements in filterIds but the ANN
* doesn't yeild K nearest neighbors.
* @param filterIdsCount count of filtered Doc ids
* @param annResultCount Count of Nearest Neighbours we got after doing filtered ANN Search.
* @return boolean - true if exactSearch needs to be done after ANNSearch.
*/
private boolean canDoExactSearchAfterANNSearch(final int filterIdsCount, final int annResultCount) {
return filterWeight != null && filterIdsCount >= knnQuery.getK() && knnQuery.getK() > annResultCount;
}

private int getTotalDocsInSegment(final LeafReaderContext context) {
// This means that there is no deleted documents, hence the live docs bitset is null
if (context.reader().getLiveDocs() == null) {
return context.reader().maxDoc();
}
return context.reader().getLiveDocs().length();
}
}
85 changes: 85 additions & 0 deletions src/test/java/org/opensearch/knn/index/KNNSettingsTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
package org.opensearch.knn.index;

import lombok.SneakyThrows;
import org.junit.Assert;
import org.opensearch.action.admin.cluster.state.ClusterStateRequest;
import org.opensearch.action.admin.indices.create.CreateIndexRequest;
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.opensearch.cluster.ClusterName;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.network.NetworkModule;
Expand Down Expand Up @@ -33,6 +37,8 @@

public class KNNSettingsTests extends KNNTestCase {

private static final String INDEX_NAME = "myindex";

@SneakyThrows
public void testGetSettingValueFromConfig() {
long expectedKNNCircuitBreakerLimit = 13;
Expand Down Expand Up @@ -70,6 +76,85 @@ public void testGetSettingValueDefault() {
assertWarnings();
}

@SneakyThrows
public void testFilteredSearchAdvanceSetting_whenNoValuesProvidedByUsers_thenDefaultSettingsUsed() {
Node mockNode = createMockNode(Collections.emptyMap());
mockNode.start();
ClusterService clusterService = mockNode.injector().getInstance(ClusterService.class);
mockNode.client().admin().cluster().state(new ClusterStateRequest()).actionGet();
mockNode.client().admin().indices().create(new CreateIndexRequest(INDEX_NAME)).actionGet();
KNNSettings.state().setClusterService(clusterService);

int filteredSearchThresholdPct = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME);
int filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME);
mockNode.close();
assertEquals((int) KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT_DEFAULT_VALUE, filteredSearchThresholdPct);
assertEquals((int) KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE, filteredSearchThreshold);
assertWarnings();
}

@SneakyThrows
public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValidateSameValues() {
int userDefinedPctThreshold = 20;
int userDefinedThreshold = 1000;
int userDefinedPctThresholdMinValue = 0;
int userDefinedThresholdMinValue = 0;
Node mockNode = createMockNode(Collections.emptyMap());
mockNode.start();
ClusterService clusterService = mockNode.injector().getInstance(ClusterService.class);
mockNode.client().admin().cluster().state(new ClusterStateRequest()).actionGet();
mockNode.client().admin().indices().create(new CreateIndexRequest(INDEX_NAME)).actionGet();
KNNSettings.state().setClusterService(clusterService);

final Settings filteredSearchAdvanceSettings = Settings.builder()
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThreshold)
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, userDefinedPctThreshold)
.build();

mockNode.client()
.admin()
.indices()
.updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettings, INDEX_NAME))
.actionGet();

int filteredSearchThresholdPct = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME);
int filteredSearchThreshold = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME);

// validate if we are able to set MinValues for the setting
final Settings filteredSearchAdvanceSettingsWithMinValues = Settings.builder()
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, userDefinedThresholdMinValue)
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, userDefinedPctThresholdMinValue)
.build();

mockNode.client()
.admin()
.indices()
.updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithMinValues, INDEX_NAME))
.actionGet();

int filteredSearchThresholdPctMinValue = KNNSettings.getFilteredExactSearchThresholdPct(INDEX_NAME);
int filteredSearchThresholdMinValue = KNNSettings.getFilteredExactSearchThreshold(INDEX_NAME);

// Validate if less than MinValues are set then Exception Happens
final Settings filteredSearchAdvanceSettingsWithLessThanMinValues = Settings.builder()
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, -1)
.put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_PCT, -1)
.build();

Assert.assertThrows(IllegalArgumentException.class, () -> mockNode.client()
.admin()
.indices()
.updateSettings(new UpdateSettingsRequest(filteredSearchAdvanceSettingsWithLessThanMinValues, INDEX_NAME))
.actionGet());

mockNode.close();
assertEquals(userDefinedPctThreshold, filteredSearchThresholdPct);
assertEquals(userDefinedThreshold, filteredSearchThreshold);
assertEquals(userDefinedPctThresholdMinValue, filteredSearchThresholdPctMinValue);
assertEquals(userDefinedThresholdMinValue, filteredSearchThresholdMinValue);
assertWarnings();
}

private Node createMockNode(Map<String, Object> configSettings) throws IOException {
Path configDir = createTempDir();
File configFile = configDir.resolve("opensearch.yml").toFile();
Expand Down
Loading

0 comments on commit 2a0a21b

Please sign in to comment.