Skip to content

Commit

Permalink
Refactor Around Mapper and Mapping (#1939)
Browse files Browse the repository at this point in the history
Refactors FieldMapper logic. It removes the LegacyFieldMapper and
replaces it with a FlatFieldMapper. The FlatFieldMapper's role is to
create fields that do not build ANN indices.

Additionally, it puts dimension, model_id, and knn_method_context in a
new KNNMappingConfig class and adds some safety checks around
accessing them. This should make calling logic easier to handle.

Lastly, it cleans up the parsing so that there isnt encoder parsing
directly in the KNNVectorFieldMapper.

Signed-off-by: John Mazanec <[email protected]>
(cherry picked from commit 2cd57e8)
  • Loading branch information
jmazanec15 authored and github-actions[bot] committed Aug 10, 2024
1 parent 20039d0 commit e5b20bb
Show file tree
Hide file tree
Showing 34 changed files with 1,465 additions and 976 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Refactor method structure and definitions [#1920](https://github.com/opensearch-project/k-NN/pull/1920)
* Refactor KNNVectorFieldType from KNNVectorFieldMapper to a separate class for better readability. [#1931](https://github.com/opensearch-project/k-NN/pull/1931)
* Generalize lib interface to return context objects [#1925](https://github.com/opensearch-project/k-NN/pull/1925)
* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
* Restructure mappers to better handle null cases and avoid branching in parsing [#1939](https://github.com/opensearch-project/k-NN/pull/1939)
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,21 @@ public void testKNNIndexDefaultLegacyFieldMapping() throws Exception {
}
}

// Ensure that when segments created with old mapping are forcemerged in new cluster, they
// succeed
public void testKNNIndexDefaultLegacyFieldMappingForceMerge() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);

if (isRunningAgainstOldCluster()) {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, 100);
// Flush to ensure that index is not re-indexed when node comes back up
flush(testIndex, true);
} else {
forceMergeKnnIndex(testIndex);
}
}

// Custom Legacy Field Mapping
// space_type : "linf", engine : "nmslib", m : 2, ef_construction : 2
public void testKNNIndexCustomLegacyFieldMapping() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams;
import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.engine.KNNMethodContext;
import org.opensearch.knn.index.mapper.KNNMappingConfig;
import org.opensearch.knn.index.mapper.KNNVectorFieldType;

import java.util.Optional;
Expand Down Expand Up @@ -66,16 +68,19 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
);
return defaultFormatSupplier.get();
}
var type = (KNNVectorFieldType) mapperService.orElseThrow(
KNNVectorFieldType mappedFieldType = (KNNVectorFieldType) mapperService.orElseThrow(
() -> new IllegalStateException(
String.format("Cannot read field type for field [%s] because mapper service is not available", field)
)
).fieldType(field);
var params = type.getKnnMethodContext().getMethodComponentContext().getParameters();

if (type.getKnnMethodContext().getKnnEngine() == KNNEngine.LUCENE
&& params != null
&& params.containsKey(METHOD_ENCODER_PARAMETER)) {
KNNMappingConfig knnMappingConfig = mappedFieldType.getKnnMappingConfig();
KNNMethodContext knnMethodContext = knnMappingConfig.getKnnMethodContext()
.orElseThrow(() -> new IllegalArgumentException("KNN method context cannot be empty"));

var params = knnMethodContext.getMethodComponentContext().getParameters();

if (knnMethodContext.getKnnEngine() == KNNEngine.LUCENE && params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) {
KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams(
params,
defaultMaxConnections,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
import org.opensearch.common.StopWatch;
import org.opensearch.core.xcontent.DeprecationHandler;
import org.opensearch.core.xcontent.NamedXContentRegistry;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.util.IndexUtil;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.transfer.VectorTransfer;
import org.opensearch.knn.index.codec.transfer.VectorTransferByte;
import org.opensearch.knn.index.codec.transfer.VectorTransferFloat;
import org.opensearch.knn.jni.JNIService;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.util.KNNCodecUtil;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.indices.Model;
Expand Down Expand Up @@ -214,8 +214,7 @@ private void createKNNIndexFromScratch(FieldInfo fieldInfo, KNNCodecUtil.Pair pa
throws IOException {
Map<String, Object> parameters = new HashMap<>();
Map<String, String> fieldAttributes = fieldInfo.attributes();
String parametersString = fieldAttributes.get(KNNConstants.PARAMETERS);

String parametersString = fieldAttributes.get(PARAMETERS);
// parametersString will be null when legacy mapper is used
if (parametersString == null) {
parameters.put(KNNConstants.SPACE_TYPE, fieldAttributes.getOrDefault(KNNConstants.SPACE_TYPE, SpaceType.DEFAULT.getValue()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import lombok.Getter;
import lombok.NonNull;
import lombok.Setter;
import org.opensearch.Version;
import org.opensearch.common.ValidationException;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
Expand All @@ -20,7 +19,6 @@
import org.opensearch.index.mapper.MapperParsingException;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
Expand All @@ -29,7 +27,6 @@
import org.opensearch.knn.training.VectorSpaceInfo;

import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE;
import static org.opensearch.knn.common.KNNConstants.NAME;
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
Expand All @@ -42,21 +39,6 @@
@Getter
public class KNNMethodContext implements ToXContentFragment, Writeable {

private static KNNMethodContext defaultInstance = null;

/**
* This is used only for testing
* @return default KNNMethodContext for testing
*/
public static synchronized KNNMethodContext getDefault() {
if (defaultInstance == null) {
MethodComponentContext methodComponentContext = new MethodComponentContext(METHOD_HNSW, Collections.emptyMap());
methodComponentContext.setIndexVersion(Version.CURRENT);
defaultInstance = new KNNMethodContext(KNNEngine.DEFAULT, SpaceType.DEFAULT, methodComponentContext);
}
return defaultInstance;
}

@NonNull
private final KNNEngine knnEngine;
@NonNull
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.mapper;

import org.apache.lucene.document.FieldType;
import org.opensearch.Version;
import org.opensearch.common.Explicit;
import org.opensearch.knn.index.VectorDataType;

import java.util.Map;

/**
* Mapper used when you dont want to build an underlying KNN struct - you just want to
* store vectors as doc values
*/
public class FlatVectorFieldMapper extends KNNVectorFieldMapper {

private final PerDimensionValidator perDimensionValidator;

public static FlatVectorFieldMapper createFieldMapper(
String fullname,
String simpleName,
Map<String, String> metaValue,
VectorDataType vectorDataType,
Integer dimension,
MultiFields multiFields,
CopyTo copyTo,
Explicit<Boolean> ignoreMalformed,
boolean stored,
boolean hasDocValues,
Version indexCreatedVersion
) {
final KNNVectorFieldType mappedFieldType = new KNNVectorFieldType(fullname, metaValue, vectorDataType, () -> dimension);
return new FlatVectorFieldMapper(
simpleName,
mappedFieldType,
multiFields,
copyTo,
ignoreMalformed,
stored,
hasDocValues,
indexCreatedVersion
);
}

private FlatVectorFieldMapper(
String simpleName,
KNNVectorFieldType mappedFieldType,
MultiFields multiFields,
CopyTo copyTo,
Explicit<Boolean> ignoreMalformed,
boolean stored,
boolean hasDocValues,
Version indexCreatedVersion
) {
super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues, indexCreatedVersion, null);
this.perDimensionValidator = selectPerDimensionValidator(vectorDataType);
this.fieldType = new FieldType(KNNVectorFieldMapper.Defaults.FIELD_TYPE);
this.fieldType.freeze();
}

private PerDimensionValidator selectPerDimensionValidator(VectorDataType vectorDataType) {
if (VectorDataType.BINARY == vectorDataType) {
return PerDimensionValidator.DEFAULT_BIT_VALIDATOR;
}

if (VectorDataType.BYTE == vectorDataType) {
return PerDimensionValidator.DEFAULT_BYTE_VALIDATOR;
}

return PerDimensionValidator.DEFAULT_FLOAT_VALIDATOR;
}

@Override
protected VectorValidator getVectorValidator() {
return VectorValidator.NOOP_VECTOR_VALIDATOR;
}

@Override
protected PerDimensionValidator getPerDimensionValidator() {
return perDimensionValidator;
}

@Override
protected PerDimensionProcessor getPerDimensionProcessor() {
return PerDimensionProcessor.NOOP_PROCESSOR;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.mapper;

import org.opensearch.knn.index.engine.KNNMethodContext;

import java.util.Optional;

/**
* Class holds information about how the ANN indices are created. The design of this class ensures that we do not
* accidentally configure an index that has multiple ways it can be created. This class is immutable.
*/
public interface KNNMappingConfig {
/**
*
* @return Optional containing the modelId if created from model, otherwise empty
*/
default Optional<String> getModelId() {
return Optional.empty();
}

/**
*
* @return Optional containing the KNNMethodContext if created from method, otherwise empty
*/
default Optional<KNNMethodContext> getKnnMethodContext() {
return Optional.empty();
}

/**
*
* @return the dimension of the index; for model based indices, it will be null
*/
int getDimension();
}
Loading

0 comments on commit e5b20bb

Please sign in to comment.