Skip to content

Commit

Permalink
Tuned default values for ef_search and ef_construction for better ind…
Browse files Browse the repository at this point in the history
…exing and search performance

Signed-off-by: Navneet Verma <[email protected]>
  • Loading branch information
navneet1v committed Dec 20, 2023
1 parent 083ea2b commit af5c833
Show file tree
Hide file tree
Showing 30 changed files with 620 additions and 85 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Add parent join support for lucene knn [#1182](https://github.com/opensearch-project/k-NN/pull/1182)
### Enhancements
* Increase Lucene max dimension limit to 16,000 [#1346](https://github.com/opensearch-project/k-NN/pull/1346)
* Tuned default values for ef_search and ef_construction for better indexing and search performance for vector search [#1353](https://github.com/opensearch-project/k-NN/pull/1353)
### Bug Fixes
* Fix use-after-free case on nmslib search path [#1305](https://github.com/opensearch-project/k-NN/pull/1305)
* Allow nested knn field mapping when train model [#1318](https://github.com/opensearch-project/k-NN/pull/1318)
* Properly designate model state for actively training models when nodes crash or leave cluster [#1317](https://github.com/opensearch-project/k-NN/pull/1317)

>>>>>>> main
### Infrastructure
* Upgrade gradle to 8.4 [1289](https://github.com/opensearch-project/k-NN/pull/1289)
### Documentation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@

package org.opensearch.knn.bwc;

import org.junit.Assert;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.knn.index.SpaceType;

import java.util.Map;

import static org.opensearch.knn.TestUtils.KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE;
import static org.opensearch.knn.TestUtils.KNN_ALGO_PARAM_M_MIN_VALUE;
import static org.opensearch.knn.TestUtils.KNN_VECTOR;
Expand All @@ -16,8 +19,13 @@
import static org.opensearch.knn.TestUtils.VECTOR_TYPE;
import static org.opensearch.knn.common.KNNConstants.DIMENSION;
import static org.opensearch.knn.common.KNNConstants.FAISS_NAME;
import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
import static org.opensearch.knn.common.KNNConstants.KNN_METHOD;
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_SEARCH;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE;
import static org.opensearch.knn.common.KNNConstants.NAME;
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;

Expand All @@ -28,6 +36,7 @@ public class IndexingIT extends AbstractRestartUpgradeTestCase {
private static final int K = 5;
private static final int M = 50;
private static final int EF_CONSTRUCTION = 1024;
private static final int EF_SEARCH = 200;
private static final int NUM_DOCS = 10;
private static int QUERY_COUNT = 0;

Expand Down Expand Up @@ -78,20 +87,43 @@ public void testKNNIndexDefaultMethodFieldMapping() throws Exception {
}

// Custom Method Field Mapping
// space_type : "inner_product", engine : "faiss", m : 50, ef_construction : 1024
// space_type : "inner_product", engine : "faiss", m : 50, ef_construction : 1024, ef_search : 200
public void testKNNIndexCustomMethodFieldMapping() throws Exception {
if (isRunningAgainstOldCluster()) {
createKnnIndex(
testIndex,
getKNNDefaultIndexSettings(),
createKNNIndexCustomMethodFieldMapping(TEST_FIELD, DIMENSIONS, SpaceType.INNER_PRODUCT, FAISS_NAME, M, EF_CONSTRUCTION)
createKNNIndexCustomMethodFieldMapping(
TEST_FIELD,
DIMENSIONS,
SpaceType.INNER_PRODUCT,
FAISS_NAME,
M,
EF_CONSTRUCTION,
EF_SEARCH
)
);
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
validateCustomMethodFieldMappingAfterUpgrade();
validateKNNIndexingOnUpgrade();
}
}

private void validateCustomMethodFieldMappingAfterUpgrade() throws Exception {
final Map<String, Object> indexMappings = getIndexMappingAsMap(testIndex);
final Map<String, Object> properties = (Map<String, Object>) indexMappings.get(PROPERTIES);
final Map<String, Object> knnMethod = ((Map<String, Object>) ((Map<String, Object>) properties.get(TEST_FIELD)).get(KNN_METHOD));
final Map<String, Object> methodParameters = (Map<String, Object>) knnMethod.get(PARAMETERS);

Assert.assertEquals(METHOD_HNSW, knnMethod.get(NAME));
Assert.assertEquals(SpaceType.INNER_PRODUCT.getValue(), knnMethod.get(METHOD_PARAMETER_SPACE_TYPE));
Assert.assertEquals(FAISS_NAME, knnMethod.get(KNN_ENGINE));
Assert.assertEquals(EF_CONSTRUCTION, ((Integer) methodParameters.get(METHOD_PARAMETER_EF_CONSTRUCTION)).intValue());
Assert.assertEquals(EF_SEARCH, ((Integer) methodParameters.get(METHOD_PARAMETER_EF_SEARCH)).intValue());
Assert.assertEquals(M, ((Integer) methodParameters.get(METHOD_PARAMETER_M)).intValue());
}

// test null parameters
public void testNullParametersOnUpgrade() throws Exception {
if (isRunningAgainstOldCluster()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ public class IndexingIT extends AbstractRollingUpgradeTestCase {
private static final int K = 5;
private static final int NUM_DOCS = 10;

private static final String ALGO = "hnsw";

private static final String ENGINE = "faiss";

public void testKNNDefaultIndexSettings() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
switch (getClusterType()) {
Expand Down Expand Up @@ -46,6 +50,82 @@ public void testKNNDefaultIndexSettings() throws Exception {
}
}

public void testKNNIndexCreation_withLegacyMapper() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
final String firstMixRoundIndex = testIndex + "first-mix-round";
final String otherMixRoundIndex = testIndex + "other-mix-round";
final String upgradedIndex = testIndex + "upgraded";
switch (getClusterType()) {
case OLD:
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
int docIdOld = 0;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
break;
case MIXED:
if (isFirstMixedRound()) {
docIdOld = 0;
createKnnIndex(firstMixRoundIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(firstMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
} else {
docIdOld = 0;
createKnnIndex(otherMixRoundIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(otherMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
}
break;
case UPGRADED:
docIdOld = 0;
createKnnIndex(upgradedIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(upgradedIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);

deleteKNNIndex(testIndex);
deleteKNNIndex(firstMixRoundIndex);
deleteKNNIndex(otherMixRoundIndex);
deleteKNNIndex(upgradedIndex);
}
}

public void testKNNIndexCreation_withMethodMapper() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
final String firstMixRoundIndex = testIndex + "first-mix-round";
final String otherMixRoundIndex = testIndex + "other-mix-round";
final String upgradedIndex = testIndex + "upgraded";
switch (getClusterType()) {
case OLD:
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE));
int docIdOld = 0;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
break;
case MIXED:
if (isFirstMixedRound()) {
docIdOld = 0;
createKnnIndex(
firstMixRoundIndex,
getKNNDefaultIndexSettings(),
createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE)
);
addKNNDocs(firstMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
} else {
docIdOld = 0;
createKnnIndex(
otherMixRoundIndex,
getKNNDefaultIndexSettings(),
createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE)
);
addKNNDocs(otherMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
}
break;
case UPGRADED:
docIdOld = 0;
createKnnIndex(upgradedIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE));
addKNNDocs(upgradedIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);

deleteKNNIndex(testIndex);
deleteKNNIndex(firstMixRoundIndex);
deleteKNNIndex(otherMixRoundIndex);
deleteKNNIndex(upgradedIndex);
}
}

// validation steps for indexing after upgrading each node from old version to new version
public void validateKNNIndexingOnUpgrade(int totalDocsCount, int docId) throws Exception {
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, totalDocsCount, K);
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/org/opensearch/knn/index/KNNMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public ValidationException validate(KNNMethodContext knnMethodContext) {
);
}

ValidationException methodValidation = methodComponent.validate(knnMethodContext.getMethodComponent());
ValidationException methodValidation = methodComponent.validate(knnMethodContext.getMethodComponentContext());
if (methodValidation != null) {
errorMessages.addAll(methodValidation.validationErrors());
}
Expand All @@ -84,7 +84,7 @@ public ValidationException validate(KNNMethodContext knnMethodContext) {
* @return true if training is required; false otherwise
*/
public boolean isTrainingRequired(KNNMethodContext knnMethodContext) {
return methodComponent.isTrainingRequired(knnMethodContext.getMethodComponent());
return methodComponent.isTrainingRequired(knnMethodContext.getMethodComponentContext());
}

/**
Expand All @@ -95,7 +95,7 @@ public boolean isTrainingRequired(KNNMethodContext knnMethodContext) {
* @return estimate overhead in KB
*/
public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension) {
return methodComponent.estimateOverheadInKB(knnMethodContext.getMethodComponent(), dimension);
return methodComponent.estimateOverheadInKB(knnMethodContext.getMethodComponentContext(), dimension);
}

/**
Expand All @@ -105,7 +105,7 @@ public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension
* @return KNNMethod as a map
*/
public Map<String, Object> getAsMap(KNNMethodContext knnMethodContext) {
Map<String, Object> parameterMap = new HashMap<>(methodComponent.getAsMap(knnMethodContext.getMethodComponent()));
Map<String, Object> parameterMap = new HashMap<>(methodComponent.getAsMap(knnMethodContext.getMethodComponentContext()));
parameterMap.put(KNNConstants.SPACE_TYPE, knnMethodContext.getSpaceType().getValue());
return parameterMap;
}
Expand Down
12 changes: 6 additions & 6 deletions src/main/java/org/opensearch/knn/index/KNNMethodContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public static synchronized KNNMethodContext getDefault() {
@NonNull
private final SpaceType spaceType;
@NonNull
private final MethodComponentContext methodComponent;
private final MethodComponentContext methodComponentContext;

/**
* Constructor from stream.
Expand All @@ -74,7 +74,7 @@ public static synchronized KNNMethodContext getDefault() {
public KNNMethodContext(StreamInput in) throws IOException {
this.knnEngine = KNNEngine.getEngine(in.readString());
this.spaceType = SpaceType.getSpace(in.readString());
this.methodComponent = new MethodComponentContext(in);
this.methodComponentContext = new MethodComponentContext(in);
}

/**
Expand Down Expand Up @@ -198,7 +198,7 @@ public static KNNMethodContext parse(Object in) {
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.field(KNN_ENGINE, knnEngine.getName());
builder.field(METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue());
builder = methodComponent.toXContent(builder, params);
builder = methodComponentContext.toXContent(builder, params);
return builder;
}

Expand All @@ -211,20 +211,20 @@ public boolean equals(Object obj) {
EqualsBuilder equalsBuilder = new EqualsBuilder();
equalsBuilder.append(knnEngine, other.knnEngine);
equalsBuilder.append(spaceType, other.spaceType);
equalsBuilder.append(methodComponent, other.methodComponent);
equalsBuilder.append(methodComponentContext, other.methodComponentContext);

return equalsBuilder.isEquals();
}

@Override
public int hashCode() {
return new HashCodeBuilder().append(knnEngine).append(spaceType).append(methodComponent).toHashCode();
return new HashCodeBuilder().append(knnEngine).append(spaceType).append(methodComponentContext).toHashCode();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(knnEngine.getName());
out.writeString(spaceType.getValue());
this.methodComponent.writeTo(out);
this.methodComponentContext.writeTo(out);
}
}
17 changes: 10 additions & 7 deletions src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.OpenSearchParseException;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.core.action.ActionListener;
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse;
Expand All @@ -21,6 +22,7 @@
import org.opensearch.index.IndexModule;
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;
import org.opensearch.knn.index.memory.NativeMemoryCacheManagerDto;
import org.opensearch.knn.index.util.IndexHyperParametersUtil;
import org.opensearch.monitor.jvm.JvmInfo;
import org.opensearch.monitor.os.OsProbe;

Expand Down Expand Up @@ -80,8 +82,8 @@ public class KNNSettings {
*/
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2";
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_M = 16;
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH = 512;
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION = 512;
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH = 100;
public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION = 100;
public static final Integer KNN_DEFAULT_ALGO_PARAM_INDEX_THREAD_QTY = 1;
public static final Integer KNN_DEFAULT_CIRCUIT_BREAKER_UNSET_PERCENTAGE = 75;
public static final Integer KNN_DEFAULT_MODEL_CACHE_SIZE_LIMIT_PERCENTAGE = 10; // By default, set aside 10% of the JVM for the limit
Expand Down Expand Up @@ -447,11 +449,12 @@ public void onFailure(Exception e) {
* @return efSearch value
*/
public static int getEfSearchParam(String index) {
return KNNSettings.state().clusterService.state()
.getMetadata()
.index(index)
.getSettings()
.getAsInt(KNNSettings.KNN_ALGO_PARAM_EF_SEARCH, 512);
final IndexMetadata indexMetadata = KNNSettings.state().clusterService.state().getMetadata().index(index);
return indexMetadata.getSettings()
.getAsInt(
KNNSettings.KNN_ALGO_PARAM_EF_SEARCH,
IndexHyperParametersUtil.getHNSWEFSearchValue(indexMetadata.getCreationVersion())
);
}

public void setClusterService(ClusterService clusterService) {
Expand Down
17 changes: 16 additions & 1 deletion src/main/java/org/opensearch/knn/index/MethodComponent.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
package org.opensearch.knn.index;

import lombok.Getter;
import org.opensearch.Version;
import org.opensearch.common.TriFunction;
import org.opensearch.common.ValidationException;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.index.util.IndexHyperParametersUtil;

import java.util.ArrayList;
import java.util.HashMap;
Expand Down Expand Up @@ -296,11 +298,24 @@ public static Map<String, Object> getParameterMapWithDefaultsAdded(
) {
Map<String, Object> parametersWithDefaultsMap = new HashMap<>();
Map<String, Object> userProvidedParametersMap = methodComponentContext.getParameters();
Version indexCreationVersion = methodComponentContext.getIndexVersion();
for (Parameter<?> parameter : methodComponent.getParameters().values()) {
if (methodComponentContext.getParameters().containsKey(parameter.getName())) {
parametersWithDefaultsMap.put(parameter.getName(), userProvidedParametersMap.get(parameter.getName()));
} else {
parametersWithDefaultsMap.put(parameter.getName(), parameter.getDefaultValue());
// Picking the right values for the parameters whose values are different based on different index
// created version.
if (parameter.getName().equals(KNNConstants.METHOD_PARAMETER_EF_SEARCH)) {
parametersWithDefaultsMap.put(parameter.getName(), IndexHyperParametersUtil.getHNSWEFSearchValue(indexCreationVersion));
} else if (parameter.getName().equals(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION)) {
parametersWithDefaultsMap.put(
parameter.getName(),
IndexHyperParametersUtil.getHNSWEFConstructionValue(indexCreationVersion)
);
} else {
parametersWithDefaultsMap.put(parameter.getName(), parameter.getDefaultValue());
}

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@

package org.opensearch.knn.index;

import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import org.opensearch.Version;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
Expand All @@ -36,13 +38,17 @@
*
* Each component is composed of a name and a map of parameters.
*/
@AllArgsConstructor
@RequiredArgsConstructor
public class MethodComponentContext implements ToXContentFragment, Writeable {

@Getter
private final String name;
private final Map<String, Object> parameters;

@Getter
@Setter
private Version indexVersion;

/**
* Constructor from stream.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
String.format("Cannot read field type for field [%s] because mapper service is not available", field)
)
).fieldType(field);
var params = type.getKnnMethodContext().getMethodComponent().getParameters();
var params = type.getKnnMethodContext().getMethodComponentContext().getParameters();
int maxConnections = getMaxConnections(params);
int beamWidth = getBeamWidth(params);
log.debug(
Expand Down
Loading

0 comments on commit af5c833

Please sign in to comment.