diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 297fc98764d07..95b4462d4f62e 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -743,7 +743,8 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS, - RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA + RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA, + SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 980c432774f6e..6fe8dec9c21b1 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -237,7 +237,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { // Settings for concurrent segment search IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING, - + IndexSettings.ALLOW_DERIVED_FIELDS, // validate that built-in similarities don't get redefined Setting.groupSetting("index.similarity.", (s) -> { Map<String, Settings> groups = s.getAsGroups(); diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 6c0ab2f6b0153..613e93698d683 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -151,6 +151,14 @@ public static IndexMergePolicy fromString(String text) { true, Property.IndexScope ); + + public static final Setting<Boolean> ALLOW_DERIVED_FIELDS = Setting.boolSetting( + "index.query.derived_field.enabled", + true, + Property.Dynamic, + Property.IndexScope + ); + public static final Setting<TimeValue> INDEX_TRANSLOG_SYNC_INTERVAL_SETTING = Setting.timeSetting( "index.translog.sync_interval", TimeValue.timeValueSeconds(5), @@ -763,6 +771,7 @@ public static IndexMergePolicy fromString(String text) { private final boolean assignedOnRemoteNode; private final RemoteStorePathStrategy remoteStorePathStrategy; private final boolean isTranslogMetadataEnabled; + private volatile boolean allowDerivedField; /** * The maximum age of a retention lease before it is considered expired. @@ -856,6 +865,10 @@ private void setDefaultFields(List<String> defaultFields) { this.defaultFields = defaultFields; } + private void setAllowDerivedField(boolean allowDerivedField) { + this.allowDerivedField = allowDerivedField; + } + /** * Returns <code>true</code> if query string parsing should be lenient. The default is <code>false</code> */ @@ -884,6 +897,13 @@ public boolean isDefaultAllowUnmappedFields() { return defaultAllowUnmappedFields; } + /** + * Returns <code>true</code> if queries are allowed to define and use derived fields. The default is <code>true</code> + */ + public boolean isDerivedFieldAllowed() { + return allowDerivedField; + } + /** * Creates a new {@link IndexSettings} instance. The given node settings will be merged with the settings in the metadata * while index level settings will overwrite node settings. @@ -931,6 +951,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti this.queryStringAnalyzeWildcard = QUERY_STRING_ANALYZE_WILDCARD.get(nodeSettings); this.queryStringAllowLeadingWildcard = QUERY_STRING_ALLOW_LEADING_WILDCARD.get(nodeSettings); this.defaultAllowUnmappedFields = scopedSettings.get(ALLOW_UNMAPPED); + this.allowDerivedField = scopedSettings.get(ALLOW_DERIVED_FIELDS); this.durability = scopedSettings.get(INDEX_TRANSLOG_DURABILITY_SETTING); defaultFields = scopedSettings.get(DEFAULT_FIELD_SETTING); syncInterval = INDEX_TRANSLOG_SYNC_INTERVAL_SETTING.get(settings); @@ -1105,6 +1126,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti INDEX_DOC_ID_FUZZY_SET_FALSE_POSITIVE_PROBABILITY_SETTING, this::setDocIdFuzzySetFalsePositiveProbability ); + scopedSettings.addSettingsUpdateConsumer(ALLOW_DERIVED_FIELDS, this::setAllowDerivedField); } private void setSearchIdleAfter(TimeValue searchIdleAfter) { diff --git a/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java new file mode 100644 index 0000000000000..c577a4117247b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java @@ -0,0 +1,229 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.regex.Regex; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.Script; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import static org.opensearch.index.mapper.FieldMapper.IGNORE_MALFORMED_SETTING; + +/** + * Accepts definition of DerivedField from search request in both forms: map parsed from SearchRequest and {@link DerivedField} defined using client. + * The object is initialized per search request and is responsible to resolve {@link DerivedFieldType} given a field name. + * It uses {@link FieldTypeInference} to infer field type for a nested field within DerivedField of {@link DerivedFieldSupportedTypes#OBJECT} type. + */ +public class DefaultDerivedFieldResolver implements DerivedFieldResolver { + private final QueryShardContext queryShardContext; + private final Map<String, DerivedFieldType> derivedFieldTypeMap = new ConcurrentHashMap<>(); + private final FieldTypeInference typeInference; + private static final Logger logger = LogManager.getLogger(DefaultDerivedFieldResolver.class); + + DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map<String, Object> derivedFieldsObject, + List<DerivedField> derivedFields + ) { + this( + queryShardContext, + derivedFieldsObject, + derivedFields, + new FieldTypeInference( + queryShardContext.index().getName(), + queryShardContext.getMapperService(), + queryShardContext.getIndexReader() + ) + ); + } + + DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map<String, Object> derivedFieldsObject, + List<DerivedField> derivedFields, + FieldTypeInference typeInference + ) { + this.queryShardContext = queryShardContext; + initDerivedFieldTypes(derivedFieldsObject, derivedFields); + this.typeInference = typeInference; + } + + @Override + public Set<String> resolvePattern(String pattern) { + Set<String> derivedFields = new HashSet<>(); + if (queryShardContext != null && queryShardContext.getMapperService() != null) { + for (MappedFieldType fieldType : queryShardContext.getMapperService().fieldTypes()) { + if (Regex.simpleMatch(pattern, fieldType.name()) && fieldType instanceof DerivedFieldType) { + derivedFields.add(fieldType.name()); + } + } + } + for (String fieldName : derivedFieldTypeMap.keySet()) { + if (Regex.simpleMatch(pattern, fieldName)) { + derivedFields.add(fieldName); + } + } + return derivedFields; + } + + /** + * Resolves the fieldName. The search request definitions are given precedence over derived fields definitions in the index mapping. + * It caches the response for previously resolved field names + * @param fieldName name of the field. It also accepts nested derived field + * @return DerivedFieldType if resolved successfully, a null otherwise. + */ + @Override + public DerivedFieldType resolve(String fieldName) { + return Optional.ofNullable(resolveUsingSearchDefinitions(fieldName)).orElseGet(() -> resolveUsingMappings(fieldName)); + } + + private DerivedFieldType resolveUsingSearchDefinitions(String fieldName) { + return Optional.ofNullable(derivedFieldTypeMap.get(fieldName)) + .orElseGet( + () -> Optional.ofNullable((DerivedFieldType) getParentDerivedField(fieldName)) + .map( + // compute and cache nested derived field + parentDerivedField -> derivedFieldTypeMap.computeIfAbsent( + fieldName, + f -> this.resolveNestedField(f, parentDerivedField) + ) + ) + .orElse(null) + ); + } + + private DerivedFieldType resolveNestedField(String fieldName, DerivedFieldType parentDerivedField) { + Objects.requireNonNull(parentDerivedField); + try { + Script script = parentDerivedField.derivedField.getScript(); + String nestedType = explicitTypeFromParent(parentDerivedField.derivedField, fieldName.substring(fieldName.indexOf(".") + 1)); + if (nestedType == null) { + Mapper inferredFieldMapper = typeInference.infer( + getValueFetcher(fieldName, script, parentDerivedField.derivedField.getIgnoreMalformed()) + ); + if (inferredFieldMapper != null) { + nestedType = inferredFieldMapper.typeName(); + } + } + if (nestedType != null) { + DerivedField derivedField = new DerivedField(fieldName, nestedType, script); + if (parentDerivedField.derivedField.getProperties() != null) { + derivedField.setProperties(parentDerivedField.derivedField.getProperties()); + } + if (parentDerivedField.derivedField.getPrefilterField() != null) { + derivedField.setPrefilterField(parentDerivedField.derivedField.getPrefilterField()); + } + if (parentDerivedField.derivedField.getFormat() != null) { + derivedField.setFormat(parentDerivedField.derivedField.getFormat()); + } + if (parentDerivedField.derivedField.getIgnoreMalformed()) { + derivedField.setIgnoreMalformed(parentDerivedField.derivedField.getIgnoreMalformed()); + } + return getDerivedFieldType(derivedField); + } else { + logger.warn( + "Field type cannot be inferred. Ensure the field {} is not rare across entire index or provide explicit mapping using [properties] under parent object [{}] ", + fieldName, + parentDerivedField.derivedField.getName() + ); + } + } catch (IOException e) { + logger.warn(e.getMessage()); + } + return null; + } + + private MappedFieldType getParentDerivedField(String fieldName) { + if (fieldName.contains(".")) { + return resolve(fieldName.split("\\.")[0]); + } + return null; + } + + private static String explicitTypeFromParent(DerivedField parentDerivedField, String subField) { + if (parentDerivedField == null) { + return null; + } + return parentDerivedField.getNestedFieldType(subField); + } + + ValueFetcher getValueFetcher(String fieldName, Script script, boolean ignoreMalformed) { + String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1); + return new ObjectDerivedFieldType.ObjectDerivedFieldValueFetcher( + subFieldName, + DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()), + o -> o, // raw object returned will be used to infer the type without modifying it + ignoreMalformed + ); + } + + private void initDerivedFieldTypes(Map<String, Object> derivedFieldsObject, List<DerivedField> derivedFields) { + if (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) { + Map<String, Object> derivedFieldObject = new HashMap<>(); + derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject); + derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject)); + } + if (derivedFields != null) { + for (DerivedField derivedField : derivedFields) { + derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField)); + } + } + } + + private Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject(Map<String, Object> derivedFieldObject) { + Map<String, DerivedFieldType> derivedFieldTypes = new HashMap<>(); + DocumentMapper documentMapper = queryShardContext.getMapperService() + .documentMapperParser() + .parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject); + if (documentMapper != null && documentMapper.mappers() != null) { + for (Mapper mapper : documentMapper.mappers()) { + if (mapper instanceof DerivedFieldMapper) { + DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType(); + derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType); + } + } + } + return derivedFieldTypes; + } + + private DerivedFieldType getDerivedFieldType(DerivedField derivedField) { + Mapper.BuilderContext builderContext = new Mapper.BuilderContext( + queryShardContext.getMapperService().getIndexSettings().getSettings(), + new ContentPath(1) + ); + DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder( + derivedField, + queryShardContext.getMapperService().getIndexAnalyzers(), + null, + IGNORE_MALFORMED_SETTING.getDefault(queryShardContext.getIndexSettings().getSettings()) + ); + return builder.build(builderContext).fieldType(); + } + + private DerivedFieldType resolveUsingMappings(String name) { + if (queryShardContext != null && queryShardContext.getMapperService() != null) { + MappedFieldType mappedFieldType = queryShardContext.getMapperService().fieldType(name); + if (mappedFieldType instanceof DerivedFieldType) { + return (DerivedFieldType) mappedFieldType; + } + } + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedField.java b/server/src/main/java/org/opensearch/index/mapper/DerivedField.java index b502e41cbb97b..249b60a1c4ec5 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedField.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedField.java @@ -30,7 +30,7 @@ public class DerivedField implements Writeable, ToXContentFragment { private final String name; private final String type; private final Script script; - private String sourceIndexedField; + private String prefilterField; private Map<String, Object> properties; private Boolean ignoreMalformed; private String format; @@ -49,7 +49,7 @@ public DerivedField(StreamInput in) throws IOException { if (in.readBoolean()) { properties = in.readMap(); } - sourceIndexedField = in.readOptionalString(); + prefilterField = in.readOptionalString(); format = in.readOptionalString(); ignoreMalformed = in.readOptionalBoolean(); } @@ -67,7 +67,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(true); out.writeMap(properties); } - out.writeOptionalString(sourceIndexedField); + out.writeOptionalString(prefilterField); out.writeOptionalString(format); out.writeOptionalBoolean(ignoreMalformed); } @@ -81,8 +81,8 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par if (properties != null) { builder.field("properties", properties); } - if (sourceIndexedField != null) { - builder.field("source_indexed_field", sourceIndexedField); + if (prefilterField != null) { + builder.field("prefilter_field", prefilterField); } if (format != null) { builder.field("format", format); @@ -110,8 +110,15 @@ public Map<String, Object> getProperties() { return properties; } - public String getSourceIndexedField() { - return sourceIndexedField; + public String getNestedFieldType(String fieldName) { + if (properties == null || properties.isEmpty() || fieldName == null || fieldName.isEmpty()) { + return null; + } + return (String) properties.get(fieldName); + } + + public String getPrefilterField() { + return prefilterField; } public String getFormat() { @@ -126,8 +133,8 @@ public void setProperties(Map<String, Object> properties) { this.properties = properties; } - public void setSourceIndexedField(String sourceIndexedField) { - this.sourceIndexedField = sourceIndexedField; + public void setPrefilterField(String prefilterField) { + this.prefilterField = prefilterField; } public void setFormat(String format) { @@ -140,7 +147,7 @@ public void setIgnoreMalformed(boolean ignoreMalformed) { @Override public int hashCode() { - return Objects.hash(name, type, script, sourceIndexedField, properties, ignoreMalformed, format); + return Objects.hash(name, type, script, prefilterField, properties, ignoreMalformed, format); } @Override @@ -155,7 +162,7 @@ public boolean equals(Object obj) { return Objects.equals(name, other.name) && Objects.equals(type, other.type) && Objects.equals(script, other.script) - && Objects.equals(sourceIndexedField, other.sourceIndexedField) + && Objects.equals(prefilterField, other.prefilterField) && Objects.equals(properties, other.properties) && Objects.equals(ignoreMalformed, other.ignoreMalformed) && Objects.equals(format, other.format); diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java index c6ae71320c35c..e08e46e1ea969 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java @@ -9,16 +9,20 @@ package org.opensearch.index.mapper; import org.apache.lucene.index.IndexableField; +import org.opensearch.common.time.DateFormatter; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.script.Script; import java.io.IOException; import java.util.Arrays; -import java.util.HashMap; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.Function; +import static org.opensearch.index.mapper.DateFieldMapper.getDefaultDateTimeFormatter; + /** * A field mapper for derived fields * @@ -28,6 +32,8 @@ public class DerivedFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "derived"; + protected final IndexAnalyzers indexAnalyzers; + private static DerivedFieldMapper toType(FieldMapper in) { return (DerivedFieldMapper) in; } @@ -38,62 +44,180 @@ private static DerivedFieldMapper toType(FieldMapper in) { * @opensearch.internal */ public static class Builder extends ParametrizedFieldMapper.Builder { - // TODO: The type of parameter may change here if the actual underlying FieldType object is needed - private final Parameter<String> type = Parameter.stringParam("type", false, m -> toType(m).type, ""); + private final Parameter<String> type = Parameter.stringParam("type", true, m -> toType(m).type, ""); + private final IndexAnalyzers indexAnalyzers; + private final boolean defaultIgnoreMalformed; + private final DateFormatter defaultDateFormatter; private final Parameter<Script> script = new Parameter<>( "script", - false, + true, () -> null, (n, c, o) -> o == null ? null : Script.parse(o), m -> toType(m).script ).setSerializerCheck((id, ic, value) -> value != null); - public Builder(String name) { + private final Parameter<Map<String, Object>> properties = new Parameter<>( + "properties", + true, + Collections::emptyMap, + (n, c, o) -> parseProperties(n, o), + m -> toType(m).properties + ); + private final Parameter<String> prefilterField = Parameter.stringParam("prefilter_field", true, m -> toType(m).prefilterField, ""); + private final Parameter<String> format = Parameter.stringParam( + "format", + true, + m -> toType(m).format, + getDefaultDateTimeFormatter().pattern() + ); + private final Parameter<Boolean> ignoreMalformed; + + private static Map<String, Object> parseProperties(String name, Object propertiesObject) { + if (propertiesObject instanceof Map == false) { + throw new MapperParsingException( + "[properties] must be an object, got " + + propertiesObject.getClass().getSimpleName() + + "[" + + propertiesObject + + "] for field [" + + name + + "]" + ); + } + @SuppressWarnings("unchecked") + Map<String, ?> properties = (Map<String, ?>) propertiesObject; + for (Object value : properties.values()) { + if (value == null) { + throw new MapperParsingException("[properties] values can't be null (field [" + name + "])"); + } else if (!(value instanceof String)) { + // In the future, we can accept an Object too if needed + throw new MapperParsingException( + "[properties] values can only be strings, but got " + + value.getClass().getSimpleName() + + "[" + + value + + "] for field [" + + name + + "]" + ); + } + } + return (Map<String, Object>) properties; + } + + public Builder(String name, IndexAnalyzers indexAnalyzers, DateFormatter defaultDateFormatter, boolean defaultIgnoreMalformed) { super(name); + this.indexAnalyzers = indexAnalyzers; + this.defaultDateFormatter = defaultDateFormatter; + this.defaultIgnoreMalformed = defaultIgnoreMalformed; + if (defaultDateFormatter != null) { + this.format.setValue(defaultDateFormatter.pattern()); + } + this.ignoreMalformed = Parameter.boolParam("ignore_malformed", true, m -> toType(m).ignoreMalformed, defaultIgnoreMalformed); } - public Builder(DerivedField derivedField) { - super(derivedField.getName()); + public Builder( + DerivedField derivedField, + IndexAnalyzers indexAnalyzers, + DateFormatter defaultDateFormatter, + boolean defaultIgnoreMalformed + ) { + this(derivedField.getName(), indexAnalyzers, defaultDateFormatter, defaultIgnoreMalformed); this.type.setValue(derivedField.getType()); this.script.setValue(derivedField.getScript()); + if (derivedField.getProperties() != null) { + this.properties.setValue(derivedField.getProperties()); + } + if (derivedField.getPrefilterField() != null) { + this.prefilterField.setValue(derivedField.getPrefilterField()); + } + if (derivedField.getFormat() != null) { + this.format.setValue(derivedField.getFormat()); + } + if (derivedField.getIgnoreMalformed()) { + this.ignoreMalformed.setValue(derivedField.getIgnoreMalformed()); + } } @Override protected List<Parameter<?>> getParameters() { - return Arrays.asList(type, script); + return Arrays.asList(type, script, properties, prefilterField, format, ignoreMalformed); } @Override public DerivedFieldMapper build(BuilderContext context) { - FieldMapper fieldMapper = DerivedFieldSupportedTypes.getFieldMapperFromType(type.getValue(), name, context); + DerivedField derivedField = new DerivedField(buildFullName(context), type.getValue(), script.getValue()); + if (properties.isConfigured()) { + derivedField.setProperties(properties.getValue()); + } + if (prefilterField.isConfigured()) { + derivedField.setPrefilterField(prefilterField.getValue()); + } + if (format.isConfigured()) { + derivedField.setFormat(format.getValue()); + } + if (ignoreMalformed.isConfigured()) { + derivedField.setIgnoreMalformed(ignoreMalformed.getValue()); + } + FieldMapper fieldMapper = DerivedFieldSupportedTypes.getFieldMapperFromType(type.getValue(), name, context, indexAnalyzers); Function<Object, IndexableField> fieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType( type.getValue(), name ); - DerivedFieldType ft = new DerivedFieldType( - new DerivedField(buildFullName(context), type.getValue(), script.getValue()), - fieldMapper, - fieldFunction + DerivedFieldType ft; + if (name.contains(".")) { + ft = new ObjectDerivedFieldType(derivedField, fieldMapper, fieldFunction, indexAnalyzers); + } else { + ft = new DerivedFieldType(derivedField, fieldMapper, fieldFunction, indexAnalyzers); + } + return new DerivedFieldMapper( + name, + ft, + multiFieldsBuilder.build(this, context), + copyTo.build(), + this, + indexAnalyzers, + defaultDateFormatter, + defaultIgnoreMalformed ); - return new DerivedFieldMapper(name, ft, multiFieldsBuilder.build(this, context), copyTo.build(), this); } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + public static final TypeParser PARSER = new TypeParser((n, c) -> { + boolean ignoreMalformedByDefault = IGNORE_MALFORMED_SETTING.get(c.getSettings()); + return new Builder(n, c.getIndexAnalyzers(), c.getDateFormatter(), ignoreMalformedByDefault); + }); + private final String type; private final Script script; + private final String prefilterField; + private final Map<String, Object> properties; + private final boolean ignoreMalformed; + private final boolean defaultIgnoreMalformed; + private final DateFormatter defaultDateFormatter; + private final String format; protected DerivedFieldMapper( String simpleName, MappedFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, - Builder builder + Builder builder, + IndexAnalyzers indexAnalyzers, + DateFormatter defaultDateFormatter, + boolean ignoreMalformed ) { super(simpleName, mappedFieldType, multiFields, copyTo); this.type = builder.type.getValue(); this.script = builder.script.getValue(); + this.prefilterField = builder.prefilterField.getValue(); + this.properties = builder.properties.getValue(); + this.ignoreMalformed = builder.ignoreMalformed.getValue(); + this.format = builder.format.getValue(); + this.indexAnalyzers = indexAnalyzers; + this.defaultDateFormatter = defaultDateFormatter; + this.defaultIgnoreMalformed = ignoreMalformed; } @Override @@ -110,7 +234,7 @@ protected void parseCreateField(ParseContext context) throws IOException { @Override public ParametrizedFieldMapper.Builder getMergeBuilder() { - return new Builder(simpleName()).init(this); + return new Builder(simpleName(), this.indexAnalyzers, defaultDateFormatter, defaultIgnoreMalformed).init(this); } @Override @@ -133,26 +257,4 @@ public Script getScript() { return script; } - public static Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject( - Map<String, Object> derivedFieldObject, - MapperService mapperService - ) { - Map<String, DerivedFieldType> derivedFieldTypes = new HashMap<>(); - DocumentMapper documentMapper = mapperService.documentMapperParser().parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject); - if (documentMapper != null && documentMapper.mappers() != null) { - for (Mapper mapper : documentMapper.mappers()) { - if (mapper instanceof DerivedFieldMapper) { - DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType(); - derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType); - } - } - } - return derivedFieldTypes; - } - - public static DerivedFieldType getDerivedFieldType(DerivedField derivedField, MapperService mapperService) { - BuilderContext builderContext = new Mapper.BuilderContext(mapperService.getIndexSettings().getSettings(), new ContentPath(1)); - Builder builder = new Builder(derivedField); - return builder.build(builderContext).fieldType(); - } } diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java new file mode 100644 index 0000000000000..adcbaf56f854a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.opensearch.common.annotation.PublicApi; + +import java.util.Set; + +/** + * DerivedFieldResolver is used as a lookup to resolve derived fields from their name. + * It is created per search request and needs to be set at {@link org.opensearch.index.query.QueryShardContext#setDerivedFieldResolver(DerivedFieldResolver)} + * for derived fields resolution. + */ +@PublicApi(since = "2.15.0") +public interface DerivedFieldResolver { + /** + * Resolves all derived fields matching a given pattern. It includes derived fields defined both in search requests + * and index mapping. + * @param pattern regex pattern + * @return all derived fields matching the pattern + */ + Set<String> resolvePattern(String pattern); + + /** + * Resolves the MappedFieldType associated with a derived field + * @param fieldName field name to lookup + * @return mapped field type + */ + MappedFieldType resolve(String fieldName); + +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolverFactory.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolverFactory.java new file mode 100644 index 0000000000000..a66c61806e3e3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolverFactory.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.opensearch.OpenSearchException; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.SearchService; + +import java.util.List; +import java.util.Map; + +import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; + +/** + * Used to create DerivedFieldResolver. It chooses between {@link DefaultDerivedFieldResolver} and {@link NoOpDerivedFieldResolver} + * depending on whether derived field is enabled. + */ +public class DerivedFieldResolverFactory { + + public static DerivedFieldResolver createResolver( + QueryShardContext queryShardContext, + Map<String, Object> derivedFieldsObject, + List<DerivedField> derivedFields, + boolean derivedFieldAllowed + ) { + boolean derivedFieldsPresent = derivedFieldsPresent(derivedFieldsObject, derivedFields); + if (derivedFieldsPresent && !derivedFieldAllowed) { + throw new OpenSearchException( + "[derived field] queries cannot be executed when '" + + IndexSettings.ALLOW_DERIVED_FIELDS.getKey() + + "' or '" + + SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING.getKey() + + "' is set to false." + ); + } + if (derivedFieldsPresent && queryShardContext.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[derived field] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false." + ); + } + if (derivedFieldAllowed) { + return new DefaultDerivedFieldResolver(queryShardContext, derivedFieldsObject, derivedFields); + } else { + return new NoOpDerivedFieldResolver(); + } + } + + private static boolean derivedFieldsPresent(Map<String, Object> derivedFieldsObject, List<DerivedField> derivedFields) { + return (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) || (derivedFields != null && !derivedFields.isEmpty()); + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java index aa6936bf6529a..c2bfaffa206a5 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java @@ -11,24 +11,29 @@ import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.FloatField; import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.LatLonPoint; import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.common.Booleans; +import org.opensearch.common.TriFunction; import org.opensearch.common.collect.Tuple; import org.opensearch.common.geo.GeoPoint; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.network.InetAddresses; +import org.opensearch.common.time.DateFormatter; +import org.opensearch.index.analysis.IndexAnalyzers; import java.net.InetAddress; import java.util.Arrays; import java.util.Map; -import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; @@ -39,7 +44,7 @@ */ public enum DerivedFieldSupportedTypes { - BOOLEAN("boolean", (name, context) -> { + BOOLEAN("boolean", (name, context, indexAnalyzers) -> { BooleanFieldMapper.Builder builder = new BooleanFieldMapper.Builder(name); return builder.build(context); }, name -> o -> { @@ -52,8 +57,8 @@ public enum DerivedFieldSupportedTypes { value = Booleans.parseBooleanStrict(textValue, false); } return new Field(name, value ? "T" : "F", BooleanFieldMapper.Defaults.FIELD_TYPE); - }, o -> o), - DATE("date", (name, context) -> { + }, formatter -> o -> o), + DATE("date", (name, context, indexAnalyzers) -> { // TODO: should we support mapping settings exposed by a given field type from derived fields too? // for example, support `format` for date type? DateFieldMapper.Builder builder = new DateFieldMapper.Builder( @@ -64,8 +69,13 @@ public enum DerivedFieldSupportedTypes { Version.CURRENT ); return builder.build(context); - }, name -> o -> new LongPoint(name, (long) o), o -> DateFieldMapper.getDefaultDateTimeFormatter().formatMillis((long) o)), - GEO_POINT("geo_point", (name, context) -> { + }, + name -> o -> new LongPoint(name, (long) o), + formatter -> o -> formatter == null + ? DateFieldMapper.getDefaultDateTimeFormatter().formatMillis((long) o) + : formatter.formatMillis((long) o) + ), + GEO_POINT("geo_point", (name, context, indexAnalyzers) -> { GeoPointFieldMapper.Builder builder = new GeoPointFieldMapper.Builder(name); return builder.build(context); }, name -> o -> { @@ -74,8 +84,8 @@ public enum DerivedFieldSupportedTypes { throw new ClassCastException("geo_point should be in format emit(double lat, double lon) for derived fields"); } return new LatLonPoint(name, (double) ((Tuple<?, ?>) o).v1(), (double) ((Tuple<?, ?>) o).v2()); - }, o -> new GeoPoint((double) ((Tuple) o).v1(), (double) ((Tuple) o).v2())), - IP("ip", (name, context) -> { + }, formatter -> o -> new GeoPoint((double) ((Tuple) o).v1(), (double) ((Tuple) o).v2())), + IP("ip", (name, context, indexAnalyzers) -> { IpFieldMapper.Builder builder = new IpFieldMapper.Builder(name, false, Version.CURRENT); return builder.build(context); }, name -> o -> { @@ -86,8 +96,8 @@ public enum DerivedFieldSupportedTypes { address = InetAddresses.forString(o.toString()); } return new InetAddressPoint(name, address); - }, o -> o), - KEYWORD("keyword", (name, context) -> { + }, formatter -> o -> o), + KEYWORD("keyword", (name, context, indexAnalyzers) -> { FieldType dummyFieldType = new FieldType(); dummyFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); KeywordFieldMapper.Builder keywordBuilder = new KeywordFieldMapper.Builder(name); @@ -101,28 +111,54 @@ public enum DerivedFieldSupportedTypes { keywordBuilder.copyTo.build(), keywordBuilder ); - }, name -> o -> new KeywordField(name, (String) o, Field.Store.NO), o -> o), - LONG("long", (name, context) -> { + }, name -> o -> new KeywordField(name, (String) o, Field.Store.NO), formatter -> o -> o), + TEXT("text", (name, context, indexAnalyzers) -> { + FieldType dummyFieldType = new FieldType(); + dummyFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + TextFieldMapper.Builder textBuilder = new TextFieldMapper.Builder(name, indexAnalyzers); + return textBuilder.build(context); + }, name -> o -> new TextField(name, (String) o, Field.Store.NO), formatter -> o -> o), + LONG("long", (name, context, indexAnalyzers) -> { NumberFieldMapper.Builder longBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.LONG, false, false); return longBuilder.build(context); - }, name -> o -> new LongField(name, Long.parseLong(o.toString()), Field.Store.NO), o -> o), - DOUBLE("double", (name, context) -> { + }, name -> o -> new LongField(name, Long.parseLong(o.toString()), Field.Store.NO), formatter -> o -> o), + DOUBLE("double", (name, context, indexAnalyzers) -> { NumberFieldMapper.Builder doubleBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.DOUBLE, false, false); return doubleBuilder.build(context); - }, name -> o -> new DoubleField(name, Double.parseDouble(o.toString()), Field.Store.NO), o -> o); + }, name -> o -> new DoubleField(name, Double.parseDouble(o.toString()), Field.Store.NO), formatter -> o -> o), + FLOAT("float", (name, context, indexAnalyzers) -> { + NumberFieldMapper.Builder floatBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.FLOAT, false, false); + return floatBuilder.build(context); + }, name -> o -> new FloatField(name, Float.parseFloat(o.toString()), Field.Store.NO), formatter -> o -> o), + OBJECT("object", (name, context, indexAnalyzers) -> { + // we create a keyword field type with index options set as NONE as we don't support queries directly on object type + KeywordFieldMapper.Builder keywordBuilder = new KeywordFieldMapper.Builder(name); + KeywordFieldMapper.KeywordFieldType keywordFieldType = keywordBuilder.buildFieldType(context, new FieldType()); + return new KeywordFieldMapper( + name, + new FieldType(), + keywordFieldType, + keywordBuilder.multiFieldsBuilder.build(keywordBuilder, context), + keywordBuilder.copyTo.build(), + keywordBuilder + ); + }, + name -> o -> { throw new OpenSearchException("Cannot create IndexableField to execute queries on object derived field"); }, + formatter -> o -> o + ); final String name; - private final BiFunction<String, Mapper.BuilderContext, FieldMapper> builder; + private final TriFunction<String, Mapper.BuilderContext, IndexAnalyzers, FieldMapper> builder; private final Function<String, Function<Object, IndexableField>> indexableFieldBuilder; - private final Function<Object, Object> valueForDisplay; + private final Function<DateFormatter, Function<Object, Object>> valueForDisplay; DerivedFieldSupportedTypes( String name, - BiFunction<String, Mapper.BuilderContext, FieldMapper> builder, + TriFunction<String, Mapper.BuilderContext, IndexAnalyzers, FieldMapper> builder, Function<String, Function<Object, IndexableField>> indexableFieldBuilder, - Function<Object, Object> valueForDisplay + Function<DateFormatter, Function<Object, Object>> valueForDisplay ) { this.name = name; this.builder = builder; @@ -134,26 +170,31 @@ public String getName() { return name; } - private FieldMapper getFieldMapper(String name, Mapper.BuilderContext context) { - return builder.apply(name, context); + private FieldMapper getFieldMapper(String name, Mapper.BuilderContext context, IndexAnalyzers indexAnalyzers) { + return builder.apply(name, context, indexAnalyzers); } private Function<Object, IndexableField> getIndexableFieldGenerator(String name) { return indexableFieldBuilder.apply(name); } - private Function<Object, Object> getValueForDisplayGenerator() { - return valueForDisplay; + private Function<Object, Object> getValueForDisplayGenerator(DateFormatter formatter) { + return valueForDisplay.apply(formatter); } private static final Map<String, DerivedFieldSupportedTypes> enumMap = Arrays.stream(DerivedFieldSupportedTypes.values()) .collect(Collectors.toMap(DerivedFieldSupportedTypes::getName, enumValue -> enumValue)); - public static FieldMapper getFieldMapperFromType(String type, String name, Mapper.BuilderContext context) { + public static FieldMapper getFieldMapperFromType( + String type, + String name, + Mapper.BuilderContext context, + IndexAnalyzers indexAnalyzers + ) { if (!enumMap.containsKey(type)) { throw new IllegalArgumentException("Type [" + type + "] isn't supported in Derived field context."); } - return enumMap.get(type).getFieldMapper(name, context); + return enumMap.get(type).getFieldMapper(name, context, indexAnalyzers); } public static Function<Object, IndexableField> getIndexableFieldGeneratorType(String type, String name) { @@ -163,10 +204,10 @@ public static Function<Object, IndexableField> getIndexableFieldGeneratorType(St return enumMap.get(type).getIndexableFieldGenerator(name); } - public static Function<Object, Object> getValueForDisplayGenerator(String type) { + public static Function<Object, Object> getValueForDisplayGenerator(String type, DateFormatter formatter) { if (!enumMap.containsKey(type)) { throw new IllegalArgumentException("Type [" + type + "] isn't supported in Derived field context."); } - return enumMap.get(type).getValueForDisplayGenerator(); + return enumMap.get(type).getValueForDisplayGenerator(formatter); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java index 8b480819acd0e..f0200e72c3bc2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java @@ -12,17 +12,22 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.queries.spans.SpanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.opensearch.common.Nullable; import org.opensearch.common.geo.ShapeRelation; +import org.opensearch.common.time.DateFormatter; import org.opensearch.common.time.DateMathParser; import org.opensearch.common.unit.Fuzziness; import org.opensearch.geometry.Geometry; +import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.query.DerivedFieldQuery; import org.opensearch.index.query.QueryShardContext; import org.opensearch.script.DerivedFieldScript; +import org.opensearch.script.Script; import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; @@ -30,6 +35,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Function; /** @@ -37,12 +43,10 @@ * Contains logic to execute different type of queries on a derived field of given type. * @opensearch.internal */ -public final class DerivedFieldType extends MappedFieldType implements GeoShapeQueryable { - - private final DerivedField derivedField; - - FieldMapper typeFieldMapper; +public class DerivedFieldType extends MappedFieldType implements GeoShapeQueryable { + final DerivedField derivedField; + final FieldMapper typeFieldMapper; final Function<Object, IndexableField> indexableFieldGenerator; public DerivedFieldType( @@ -60,10 +64,40 @@ public DerivedFieldType( this.indexableFieldGenerator = fieldFunction; } - public DerivedFieldType(DerivedField derivedField, FieldMapper typeFieldMapper, Function<Object, IndexableField> fieldFunction) { + public DerivedFieldType( + DerivedField derivedField, + FieldMapper typeFieldMapper, + Function<Object, IndexableField> fieldFunction, + IndexAnalyzers indexAnalyzers + ) { this(derivedField, false, false, false, Collections.emptyMap(), typeFieldMapper, fieldFunction); } + @Override + public TextSearchInfo getTextSearchInfo() { + return typeFieldMapper.fieldType().getTextSearchInfo(); + } + + TextFieldMapper.TextFieldType getPrefilterFieldType(QueryShardContext context) { + if (derivedField.getPrefilterField() == null || derivedField.getPrefilterField().isEmpty()) { + return null; + } + MappedFieldType mappedFieldType = context.fieldMapper(derivedField.getPrefilterField()); + if (mappedFieldType == null) { + throw new MapperException("prefilter_field[" + derivedField.getPrefilterField() + "] is not defined in the index mappings"); + } + if (!(mappedFieldType instanceof TextFieldMapper.TextFieldType)) { + throw new MapperException( + "prefilter_field[" + + derivedField.getPrefilterField() + + "] should be of type text. Type found [" + + mappedFieldType.typeName() + + "]." + ); + } + return (TextFieldMapper.TextFieldType) mappedFieldType; + } + @Override public String typeName() { return "derived"; @@ -73,12 +107,16 @@ public String getType() { return derivedField.getType(); } - public MappedFieldType getTypeMappedFieldType() { - return typeFieldMapper.mappedFieldType; + public FieldMapper getFieldMapper() { + return typeFieldMapper; + } + + public Function<Object, IndexableField> getIndexableFieldGenerator() { + return indexableFieldGenerator; } public NamedAnalyzer getIndexAnalyzer() { - return typeFieldMapper.mappedFieldType.indexAnalyzer(); + return getFieldMapper().mappedFieldType.indexAnalyzer(); } @Override @@ -86,11 +124,13 @@ public DerivedFieldValueFetcher valueFetcher(QueryShardContext context, SearchLo if (format != null) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); } - Function<Object, Object> valueForDisplay = DerivedFieldSupportedTypes.getValueForDisplayGenerator(getType()); + Function<Object, Object> valueForDisplay = DerivedFieldSupportedTypes.getValueForDisplayGenerator( + getType(), + derivedField.getFormat() != null ? DateFormatter.forPattern(derivedField.getFormat()) : null + ); return new DerivedFieldValueFetcher( - getDerivedFieldLeafFactory(context, searchLookup == null ? context.lookup() : searchLookup), - valueForDisplay, - indexableFieldGenerator + getDerivedFieldLeafFactory(derivedField.getScript(), context, searchLookup == null ? context.lookup() : searchLookup), + valueForDisplay ); } @@ -98,21 +138,53 @@ public DerivedFieldValueFetcher valueFetcher(QueryShardContext context, SearchLo public Query termQuery(Object value, QueryShardContext context) { Query query = typeFieldMapper.mappedFieldType.termQuery(value, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map(prefilterFieldType -> createConjuctionQuery(prefilterFieldType.termQuery(value, context), derivedFieldQuery)) + .orElse(derivedFieldQuery); } @Override public Query termQueryCaseInsensitive(Object value, @Nullable QueryShardContext context) { Query query = typeFieldMapper.mappedFieldType.termQueryCaseInsensitive(value, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map( + prefilterFieldType -> createConjuctionQuery(prefilterFieldType.termQueryCaseInsensitive(value, context), derivedFieldQuery) + ) + .orElse(derivedFieldQuery); } @Override public Query termsQuery(List<?> values, @Nullable QueryShardContext context) { Query query = typeFieldMapper.mappedFieldType.termsQuery(values, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map(prefilterFieldType -> createConjuctionQuery(prefilterFieldType.termsQuery(values, context), derivedFieldQuery)) + .orElse(derivedFieldQuery); } @Override @@ -137,7 +209,14 @@ public Query rangeQuery( context ); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); } @Override @@ -151,7 +230,22 @@ public Query fuzzyQuery( ) { Query query = typeFieldMapper.mappedFieldType.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map( + prefilterFieldType -> createConjuctionQuery( + prefilterFieldType.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context), + derivedFieldQuery + ) + ) + .orElse(derivedFieldQuery); } @Override @@ -174,7 +268,22 @@ public Query fuzzyQuery( context ); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map( + prefilterFieldType -> createConjuctionQuery( + prefilterFieldType.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context), + derivedFieldQuery + ) + ) + .orElse(derivedFieldQuery); } @Override @@ -186,7 +295,22 @@ public Query prefixQuery( ) { Query query = typeFieldMapper.mappedFieldType.prefixQuery(value, method, caseInsensitive, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map( + prefilterFieldType -> createConjuctionQuery( + prefilterFieldType.prefixQuery(value, method, caseInsensitive, context), + derivedFieldQuery + ) + ) + .orElse(derivedFieldQuery); } @Override @@ -198,14 +322,44 @@ public Query wildcardQuery( ) { Query query = typeFieldMapper.mappedFieldType.wildcardQuery(value, method, caseInsensitive, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map( + prefilterFieldType -> createConjuctionQuery( + prefilterFieldType.wildcardQuery(value, method, caseInsensitive, context), + derivedFieldQuery + ) + ) + .orElse(derivedFieldQuery); } @Override public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) { Query query = typeFieldMapper.mappedFieldType.normalizedWildcardQuery(value, method, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map( + prefilterFieldType -> createConjuctionQuery( + prefilterFieldType.normalizedWildcardQuery(value, method, context), + derivedFieldQuery + ) + ) + .orElse(derivedFieldQuery); } @Override @@ -219,14 +373,46 @@ public Query regexpQuery( ) { Query query = typeFieldMapper.mappedFieldType.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)) + .map( + prefilterFieldType -> createConjuctionQuery( + prefilterFieldType.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context), + derivedFieldQuery + ) + ) + .orElse(derivedFieldQuery); } @Override public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { Query query = typeFieldMapper.mappedFieldType.phraseQuery(stream, slop, enablePositionIncrements, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)).map(prefilterFieldType -> { + try { + return createConjuctionQuery( + prefilterFieldType.phraseQuery(stream, slop, enablePositionIncrements, context), + derivedFieldQuery + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).orElse(derivedFieldQuery); } @Override @@ -234,14 +420,45 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi throws IOException { Query query = typeFieldMapper.mappedFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)).map(prefilterFieldType -> { + try { + return createConjuctionQuery( + prefilterFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context), + derivedFieldQuery + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).orElse(derivedFieldQuery); } @Override public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { Query query = typeFieldMapper.mappedFieldType.phrasePrefixQuery(stream, slop, maxExpansions, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); + return Optional.ofNullable(getPrefilterFieldType(context)).map(prefilterFieldType -> { + try { + return createConjuctionQuery(prefilterFieldType.phrasePrefixQuery(stream, slop, maxExpansions, context), derivedFieldQuery); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).orElse(derivedFieldQuery); } @Override @@ -255,14 +472,28 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) { Query query = typeFieldMapper.mappedFieldType.distanceFeatureQuery(origin, pivot, boost, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); } @Override public Query geoShapeQuery(Geometry shape, String fieldName, ShapeRelation relation, QueryShardContext context) { Query query = ((GeoShapeQueryable) (typeFieldMapper.mappedFieldType)).geoShapeQuery(shape, fieldName, relation, context); DerivedFieldValueFetcher valueFetcher = valueFetcher(context, context.lookup(), null); - return new DerivedFieldQuery(query, valueFetcher, context.lookup(), getIndexAnalyzer()); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + getIndexAnalyzer(), + indexableFieldGenerator, + derivedField.getIgnoreMalformed() + ); } @Override @@ -275,7 +506,18 @@ public boolean isAggregatable() { return false; } - private DerivedFieldScript.LeafFactory getDerivedFieldLeafFactory(QueryShardContext context, SearchLookup searchLookup) { + private Query createConjuctionQuery(Query filterQuery, DerivedFieldQuery derivedFieldQuery) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(filterQuery, BooleanClause.Occur.FILTER); + builder.add(derivedFieldQuery, BooleanClause.Occur.FILTER); + return builder.build(); + } + + public static DerivedFieldScript.LeafFactory getDerivedFieldLeafFactory( + Script script, + QueryShardContext context, + SearchLookup searchLookup + ) { if (!context.documentMapper("").sourceMapper().enabled()) { throw new IllegalArgumentException( "DerivedFieldQuery error: unable to fetch fields from _source field: _source is disabled in the mappings " @@ -284,7 +526,7 @@ private DerivedFieldScript.LeafFactory getDerivedFieldLeafFactory(QueryShardCont + "]" ); } - DerivedFieldScript.Factory factory = context.compile(derivedField.getScript(), DerivedFieldScript.CONTEXT); - return factory.newFactory(derivedField.getScript().getParams(), searchLookup); + DerivedFieldScript.Factory factory = context.compile(script, DerivedFieldScript.CONTEXT); + return factory.newFactory(script.getParams(), searchLookup); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java index 2d9379e04c512..1b4625ab7234b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java @@ -25,21 +25,15 @@ * {@link #setNextReader(LeafReaderContext)} whenever a segment is switched. */ @PublicApi(since = "2.14.0") -public final class DerivedFieldValueFetcher implements ValueFetcher { +public class DerivedFieldValueFetcher implements ValueFetcher { private DerivedFieldScript derivedFieldScript; private final DerivedFieldScript.LeafFactory derivedFieldScriptFactory; private final Function<Object, Object> valueForDisplay; - private final Function<Object, IndexableField> indexableFieldFunction; - public DerivedFieldValueFetcher( - DerivedFieldScript.LeafFactory derivedFieldScriptFactory, - Function<Object, Object> valueForDisplay, - Function<Object, IndexableField> indexableFieldFunction - ) { + public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScriptFactory, Function<Object, Object> valueForDisplay) { this.derivedFieldScriptFactory = derivedFieldScriptFactory; this.valueForDisplay = valueForDisplay; - this.indexableFieldFunction = indexableFieldFunction; } @Override @@ -55,17 +49,19 @@ public List<Object> fetchValues(SourceLookup lookup) { return result; } - private List<Object> fetchValuesInternal(SourceLookup lookup) { + public List<Object> fetchValuesInternal(SourceLookup lookup) { derivedFieldScript.setDocument(lookup.docId()); derivedFieldScript.execute(); return derivedFieldScript.getEmittedValues(); } - public List<IndexableField> getIndexableField(SourceLookup lookup) { + public List<IndexableField> getIndexableField(SourceLookup lookup, Function<Object, IndexableField> indexableFieldFunction) { List<Object> values = fetchValuesInternal(lookup); List<IndexableField> indexableFields = new ArrayList<>(); for (Object v : values) { - indexableFields.add(indexableFieldFunction.apply(v)); + if (v != null) { + indexableFields.add(indexableFieldFunction.apply(v)); + } } return indexableFields; } diff --git a/server/src/main/java/org/opensearch/index/mapper/NoOpDerivedFieldResolver.java b/server/src/main/java/org/opensearch/index/mapper/NoOpDerivedFieldResolver.java new file mode 100644 index 0000000000000..b3b8c83685407 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/NoOpDerivedFieldResolver.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import java.util.Collections; +import java.util.Set; + +/** + * Used when the derived field feature is disabled + */ +public class NoOpDerivedFieldResolver implements DerivedFieldResolver { + + @Override + public Set<String> resolvePattern(String pattern) { + return Collections.emptySet(); + } + + @Override + public MappedFieldType resolve(String fieldName) { + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java new file mode 100644 index 0000000000000..7e5c9a3f3da93 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java @@ -0,0 +1,168 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.IndexableField; +import org.opensearch.OpenSearchParseException; +import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.DerivedFieldScript; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +/** + * Represents a derived field in OpenSearch, which behaves similarly to an Object field type within the context of derived fields. + * It is not a primitive field type and does not directly support queries. However, any nested derived fields contained within a DerivedField object + * are also classified as Object derived fields, which support queries depending on their inferred type. + * + * <p> + * For example, consider the following mapping: + * <pre> + * mappings: + * derived: + * regular_field: + * type: keyword + * script: "emit(keyword_string)" + * derived_obj: + * type: object + * script: "emit(json_obj)" + * </pre> + * + * Here, we have a regular keyword derived field and an object type derived field. Any nested field within `derived_obj` does not need to be explicitly defined. + * Their type will be inferred, and the value will be extracted from the `json_obj` emitted by the script associated with the parent object `derived_obj`. + * The {@link ObjectDerivedFieldValueFetcher} is used for this purpose, which accepts `sub_field` and can extract the nested fields from JSON. + * + * <p> + * For instance, if `derived_obj` emits the following document: + * <pre> + * "derived_obj" : { + * "sub_field_1": "value 1", + * "sub_field_2": { + * "sub_field_3": "value_3" + * } + * } + * </pre> + * + * Then nested fields such as `sub_field_1` and `sub_field_3` can be used in the query as `derived_obj.sub_field_1` and `derived_obj.sub_field_2.sub_field_3` respectively. + * Both of these nested derived fields will be an instance of `ObjectDerivedFieldType`; however, their mapped field type will be inferred based on the type of value they hold, to support queries on them. + * + * @see FieldTypeInference for details on the type inference logic used in derived fields. + */ +public class ObjectDerivedFieldType extends DerivedFieldType { + + ObjectDerivedFieldType( + DerivedField derivedField, + FieldMapper typeFieldMapper, + Function<Object, IndexableField> fieldFunction, + IndexAnalyzers indexAnalyzers + ) { + super(derivedField, typeFieldMapper, derivedField.getType().equals(DerivedFieldSupportedTypes.DATE.getName()) ? (o -> { + // this is needed to support date type for nested fields as they are required to be converted to long to create + // IndexableField + if (o instanceof String) { + return fieldFunction.apply(((DateFieldMapper) typeFieldMapper).fieldType().parse((String) o)); + } else { + return fieldFunction.apply(o); + } + }) : fieldFunction, indexAnalyzers); + } + + @Override + public DerivedFieldValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + Function<Object, Object> valueForDisplay = DerivedFieldSupportedTypes.getValueForDisplayGenerator( + getType(), + derivedField.getFormat() != null ? DateFormatter.forPattern(derivedField.getFormat()) : null + ); + + Function<Object, Object> valueForDisplayUpdated = derivedField.getType().equals(DerivedFieldSupportedTypes.DATE.getName()) ? (o -> { + // this is needed to support date type for nested fields as they are required to be converted to long + if (o instanceof String) { + return valueForDisplay.apply(((DateFieldMapper) typeFieldMapper).fieldType().parse((String) o)); + } else { + return valueForDisplay.apply(o); + } + }) : valueForDisplay; + + String subFieldName = name().substring(name().indexOf(".") + 1); + return new ObjectDerivedFieldValueFetcher( + subFieldName, + getDerivedFieldLeafFactory(derivedField.getScript(), context, searchLookup == null ? context.lookup() : searchLookup), + valueForDisplayUpdated, + derivedField.getIgnoreMalformed() + ); + } + + static class ObjectDerivedFieldValueFetcher extends DerivedFieldValueFetcher { + private final String subField; + + // TODO add it as part of index setting? + private final boolean ignoreOnMalFormed; + + ObjectDerivedFieldValueFetcher( + String subField, + DerivedFieldScript.LeafFactory derivedFieldScriptFactory, + Function<Object, Object> valueForDisplay, + boolean ignoreOnMalFormed + ) { + super(derivedFieldScriptFactory, valueForDisplay); + this.subField = subField; + this.ignoreOnMalFormed = ignoreOnMalFormed; + } + + @Override + public List<Object> fetchValuesInternal(SourceLookup lookup) { + List<Object> jsonObjects = super.fetchValuesInternal(lookup); + List<Object> result = new ArrayList<>(); + for (Object o : jsonObjects) { + try { + if (o == null) { + continue; + } + Map<String, Object> s = XContentHelper.convertToMap(JsonXContent.jsonXContent, (String) o, false); + Object nestedFieldObj = getNestedField(s, subField); + if (nestedFieldObj instanceof List) { + result.addAll((List<?>) nestedFieldObj); + } else { + result.add(nestedFieldObj); + } + } catch (OpenSearchParseException e) { + if (!ignoreOnMalFormed) { + throw e; + } + } + } + return result; + } + + private static Object getNestedField(Map<String, Object> obj, String key) { + String[] keyParts = key.split("\\."); + Map<String, Object> currentObj = obj; + for (int i = 0; i < keyParts.length - 1; i++) { + Object value = currentObj.get(keyParts[i]); + if (value instanceof Map) { + currentObj = (Map<String, Object>) value; + } else { + return null; + } + } + return currentObj.get(keyParts[keyParts.length - 1]); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java b/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java index 42ac61bf98f73..db943bdef0a12 100644 --- a/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java +++ b/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java @@ -29,6 +29,7 @@ import java.io.IOException; import java.util.List; import java.util.Objects; +import java.util.function.Function; /** * DerivedFieldQuery used for querying derived fields. It contains the logic to execute an input lucene query against @@ -39,6 +40,9 @@ public final class DerivedFieldQuery extends Query { private final DerivedFieldValueFetcher valueFetcher; private final SearchLookup searchLookup; private final Analyzer indexAnalyzer; + private final boolean ignoreMalformed; + + private final Function<Object, IndexableField> indexableFieldGenerator; /** * @param query lucene query to be executed against the derived field @@ -46,11 +50,20 @@ public final class DerivedFieldQuery extends Query { * using LeafSearchLookup * @param searchLookup SearchLookup to get the LeafSearchLookup look used by valueFetcher to fetch the _source */ - public DerivedFieldQuery(Query query, DerivedFieldValueFetcher valueFetcher, SearchLookup searchLookup, Analyzer indexAnalyzer) { + public DerivedFieldQuery( + Query query, + DerivedFieldValueFetcher valueFetcher, + SearchLookup searchLookup, + Analyzer indexAnalyzer, + Function<Object, IndexableField> indexableFieldGenerator, + boolean ignoreMalformed + ) { this.query = query; this.valueFetcher = valueFetcher; this.searchLookup = searchLookup; this.indexAnalyzer = indexAnalyzer; + this.indexableFieldGenerator = indexableFieldGenerator; + this.ignoreMalformed = ignoreMalformed; } @Override @@ -60,11 +73,11 @@ public void visit(QueryVisitor visitor) { @Override public Query rewrite(IndexSearcher indexSearcher) throws IOException { - Query rewritten = indexSearcher.rewrite(query); + Query rewritten = query.rewrite(indexSearcher); if (rewritten == query) { return this; } - return new DerivedFieldQuery(rewritten, valueFetcher, searchLookup, indexAnalyzer); + return new DerivedFieldQuery(rewritten, valueFetcher, searchLookup, indexAnalyzer, indexableFieldGenerator, ignoreMalformed); } @Override @@ -73,16 +86,23 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) { - DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc()); + DocIdSetIterator approximation; + approximation = DocIdSetIterator.all(context.reader().maxDoc()); valueFetcher.setNextReader(context); LeafSearchLookup leafSearchLookup = searchLookup.getLeafSearchLookup(context); TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { @Override public boolean matches() { leafSearchLookup.source().setSegmentAndDocument(context, approximation.docID()); - List<IndexableField> indexableFields = valueFetcher.getIndexableField(leafSearchLookup.source()); - // TODO: in case of errors from script, should it be ignored and treated as missing field - // by using a configurable setting? + List<IndexableField> indexableFields; + try { + indexableFields = valueFetcher.getIndexableField(leafSearchLookup.source(), indexableFieldGenerator); + } catch (Exception e) { + if (ignoreMalformed) { + return false; + } + throw e; + } MemoryIndex memoryIndex = new MemoryIndex(); for (IndexableField indexableField : indexableFields) { memoryIndex.addField(indexableField, indexAnalyzer); diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java index 64643ad6d2c94..91313092d8d28 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java @@ -45,7 +45,6 @@ import org.opensearch.common.TriFunction; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.lucene.search.Queries; -import org.opensearch.common.regex.Regex; import org.opensearch.common.util.BigArrays; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.ParsingException; @@ -59,6 +58,9 @@ import org.opensearch.index.cache.bitset.BitsetFilterCache; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.DerivedFieldResolver; +import org.opensearch.index.mapper.DerivedFieldResolverFactory; +import org.opensearch.index.mapper.DerivedFieldType; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.Mapper; @@ -88,6 +90,8 @@ import java.util.function.Predicate; import java.util.function.Supplier; +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; /** @@ -120,8 +124,7 @@ public class QueryShardContext extends QueryRewriteContext { private NestedScope nestedScope; private final ValuesSourceRegistry valuesSourceRegistry; private BitSetProducer parentFilter; - - private Map<String, MappedFieldType> derivedFieldTypeMap = new HashMap<>(); + private DerivedFieldResolver derivedFieldResolver; public QueryShardContext( int shardId, @@ -268,6 +271,12 @@ private QueryShardContext( this.fullyQualifiedIndex = fullyQualifiedIndex; this.allowExpensiveQueries = allowExpensiveQueries; this.valuesSourceRegistry = valuesSourceRegistry; + this.derivedFieldResolver = DerivedFieldResolverFactory.createResolver( + this, + emptyMap(), + emptyList(), + indexSettings.isDerivedFieldAllowed() + ); } private void reset() { @@ -333,17 +342,9 @@ public Map<String, Query> copyNamedQueries() { * type then the fields will be returned with a type prefix. */ public Set<String> simpleMatchToIndexNames(String pattern) { - Set<String> matchingFields = mapperService.simpleMatchToFullName(pattern); - if (derivedFieldTypeMap != null && !derivedFieldTypeMap.isEmpty()) { - Set<String> matchingDerivedFields = new HashSet<>(matchingFields); - for (String fieldName : derivedFieldTypeMap.keySet()) { - if (!matchingDerivedFields.contains(fieldName) && Regex.simpleMatch(pattern, fieldName)) { - matchingDerivedFields.add(fieldName); - } - } - return matchingDerivedFields; - } - return matchingFields; + Set<String> allMatchingFields = new HashSet<>(mapperService.simpleMatchToFullName(pattern)); + allMatchingFields.addAll(derivedFieldResolver.resolvePattern(pattern)); + return allMatchingFields; } /** @@ -409,12 +410,8 @@ public ValuesSourceRegistry getValuesSourceRegistry() { return valuesSourceRegistry; } - public void setDerivedFieldTypes(Map<String, MappedFieldType> derivedFieldTypeMap) { - this.derivedFieldTypeMap = derivedFieldTypeMap; - } - - public MappedFieldType getDerivedFieldType(String fieldName) { - return derivedFieldTypeMap == null ? null : derivedFieldTypeMap.get(fieldName); + public void setDerivedFieldResolver(DerivedFieldResolver derivedFieldResolver) { + this.derivedFieldResolver = derivedFieldResolver; } public void setAllowUnmappedFields(boolean allowUnmappedFields) { @@ -427,9 +424,14 @@ public void setMapUnmappedFieldAsString(boolean mapUnmappedFieldAsString) { MappedFieldType failIfFieldMappingNotFound(String name, MappedFieldType fieldMapping) { if (fieldMapping != null) { + if (fieldMapping instanceof DerivedFieldType) { + // resolveDerivedFieldType() will give precedence to search time definitions over index mapping, thus + // calling it instead of directly returning. It also ensures the feature flags are honoured. + return resolveDerivedFieldType(name); + } + return fieldMapping; + } else if ((fieldMapping = resolveDerivedFieldType(name)) != null) { return fieldMapping; - } else if (getDerivedFieldType(name) != null) { - return getDerivedFieldType(name); } else if (allowUnmappedFields) { return fieldMapping; } else if (mapUnmappedFieldAsString) { @@ -440,6 +442,10 @@ MappedFieldType failIfFieldMappingNotFound(String name, MappedFieldType fieldMap } } + public MappedFieldType resolveDerivedFieldType(String name) { + return derivedFieldResolver.resolve(name); + } + private SearchLookup lookup = null; /** diff --git a/server/src/main/java/org/opensearch/index/query/VectorGeoPointShapeQueryProcessor.java b/server/src/main/java/org/opensearch/index/query/VectorGeoPointShapeQueryProcessor.java index c55d88439b11f..c4237cd94a834 100644 --- a/server/src/main/java/org/opensearch/index/query/VectorGeoPointShapeQueryProcessor.java +++ b/server/src/main/java/org/opensearch/index/query/VectorGeoPointShapeQueryProcessor.java @@ -81,7 +81,7 @@ private void validateIsGeoPointFieldType(String fieldName, QueryShardContext con MappedFieldType fieldType = context.fieldMapper(fieldName); if (fieldType instanceof GeoPointFieldMapper.GeoPointFieldType == false && !(fieldType instanceof DerivedFieldType - && (((DerivedFieldType) fieldType).getTypeMappedFieldType() instanceof GeoPointFieldMapper.GeoPointFieldType))) { + && (((DerivedFieldType) fieldType).getFieldMapper().fieldType() instanceof GeoPointFieldMapper.GeoPointFieldType))) { throw new QueryShardException( context, "Expected " diff --git a/server/src/main/java/org/opensearch/index/search/QueryParserHelper.java b/server/src/main/java/org/opensearch/index/search/QueryParserHelper.java index 5e9839385ce20..bae58c0ce1ebf 100644 --- a/server/src/main/java/org/opensearch/index/search/QueryParserHelper.java +++ b/server/src/main/java/org/opensearch/index/search/QueryParserHelper.java @@ -144,6 +144,9 @@ static Map<String, Float> resolveMappingField( } MappedFieldType fieldType = context.getMapperService().fieldType(fieldName); + if (fieldType == null) { + fieldType = context.resolveDerivedFieldType(fieldName); + } if (fieldType == null) { continue; } diff --git a/server/src/main/java/org/opensearch/script/DerivedFieldScript.java b/server/src/main/java/org/opensearch/script/DerivedFieldScript.java index 0a2b7cf691283..5329e8e6a1f30 100644 --- a/server/src/main/java/org/opensearch/script/DerivedFieldScript.java +++ b/server/src/main/java/org/opensearch/script/DerivedFieldScript.java @@ -124,8 +124,10 @@ private int getObjectByteSize(Object obj) { } else if (obj instanceof Tuple) { // Assuming each element in the tuple is a double for GeoPoint case return Double.BYTES * 2; + } else if (obj == null) { + return 0; } else { - throw new IllegalArgumentException("Unsupported object type passed in emit()"); + throw new IllegalArgumentException("Unsupported object type passed in emit() - " + obj); } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 744d3a19f1593..d371d69a57804 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -77,9 +77,8 @@ import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.Engine; -import org.opensearch.index.mapper.DerivedField; -import org.opensearch.index.mapper.DerivedFieldMapper; -import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.DerivedFieldResolver; +import org.opensearch.index.mapper.DerivedFieldResolverFactory; import org.opensearch.index.query.InnerHitContextBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchNoneQueryBuilder; @@ -281,6 +280,13 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv Property.NodeScope ); + public static final Setting<Boolean> CLUSTER_ALLOW_DERIVED_FIELD_SETTING = Setting.boolSetting( + "search.derived_field.enabled", + true, + Property.Dynamic, + Property.NodeScope + ); + public static final int DEFAULT_SIZE = 10; public static final int DEFAULT_FROM = 0; @@ -318,6 +324,8 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private volatile int maxOpenPitContext; + private volatile boolean allowDerivedField; + private final Cancellable keepAliveReaper; private final AtomicLong idGenerator = new AtomicLong(); @@ -389,6 +397,9 @@ public SearchService( lowLevelCancellation = LOW_LEVEL_CANCELLATION_SETTING.get(settings); clusterService.getClusterSettings().addSettingsUpdateConsumer(LOW_LEVEL_CANCELLATION_SETTING, this::setLowLevelCancellation); + + allowDerivedField = CLUSTER_ALLOW_DERIVED_FIELD_SETTING.get(settings); + clusterService.getClusterSettings().addSettingsUpdateConsumer(CLUSTER_ALLOW_DERIVED_FIELD_SETTING, this::setAllowDerivedField); } private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) { @@ -457,6 +468,10 @@ private void setMaxOpenScrollContext(int maxOpenScrollContext) { this.maxOpenScrollContext = maxOpenScrollContext; } + private void setAllowDerivedField(boolean allowDerivedField) { + this.allowDerivedField = allowDerivedField; + } + private void setMaxOpenPitContext(int maxOpenPitContext) { this.maxOpenPitContext = maxOpenPitContext; } @@ -1078,28 +1093,14 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear // might end up with incorrect state since we are using now() or script services // during rewrite and normalized / evaluate templates etc. QueryShardContext context = new QueryShardContext(searchContext.getQueryShardContext()); - if (request.source() != null - && request.source().size() != 0 - && (request.source().getDerivedFieldsObject() != null || request.source().getDerivedFields() != null)) { - Map<String, MappedFieldType> derivedFieldTypeMap = new HashMap<>(); - if (request.source().getDerivedFieldsObject() != null) { - Map<String, Object> derivedFieldObject = new HashMap<>(); - derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, request.source().getDerivedFieldsObject()); - derivedFieldTypeMap.putAll( - DerivedFieldMapper.getAllDerivedFieldTypeFromObject(derivedFieldObject, searchContext.mapperService()) - ); - } - if (request.source().getDerivedFields() != null) { - for (DerivedField derivedField : request.source().getDerivedFields()) { - derivedFieldTypeMap.put( - derivedField.getName(), - DerivedFieldMapper.getDerivedFieldType(derivedField, searchContext.mapperService()) - ); - } - } - context.setDerivedFieldTypes(derivedFieldTypeMap); - searchContext.getQueryShardContext().setDerivedFieldTypes(derivedFieldTypeMap); - } + DerivedFieldResolver derivedFieldResolver = DerivedFieldResolverFactory.createResolver( + searchContext.getQueryShardContext(), + Optional.ofNullable(request.source()).map(SearchSourceBuilder::getDerivedFieldsObject).orElse(Collections.emptyMap()), + Optional.ofNullable(request.source()).map(SearchSourceBuilder::getDerivedFields).orElse(Collections.emptyList()), + context.getIndexSettings().isDerivedFieldAllowed() && allowDerivedField + ); + context.setDerivedFieldResolver(derivedFieldResolver); + searchContext.getQueryShardContext().setDerivedFieldResolver(derivedFieldResolver); Rewriteable.rewrite(request.getRewriteable(), context, true); assert searchContext.getQueryShardContext().isCacheable(); success = true; diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java index 6c22567d8cf0d..8a9704b04566f 100644 --- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java @@ -1010,7 +1010,7 @@ public SearchSourceBuilder derivedField(String name, String type, Script script) * @param type type of the derived field * @param script script associated with derived field * @param properties map of field name and type of field for nested fields within object derived field - * @param sourceIndexedField source text field which is indexed to filter documents for better performance + * @param prefilterField source text field which is indexed to filter documents for better performance * @param format date format * @param ignoreMalformed ignores malformed fields instead of failing search request */ @@ -1019,7 +1019,7 @@ public SearchSourceBuilder derivedField( String type, Script script, Map<String, Object> properties, - String sourceIndexedField, + String prefilterField, String format, Boolean ignoreMalformed ) { @@ -1028,7 +1028,7 @@ public SearchSourceBuilder derivedField( } DerivedField derivedField = new DerivedField(name, type, script); derivedField.setProperties(properties); - derivedField.setSourceIndexedField(sourceIndexedField); + derivedField.setPrefilterField(prefilterField); derivedField.setFormat(format); derivedField.setIgnoreMalformed(ignoreMalformed); derivedFields.add(derivedField); diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/HighlightPhase.java index b16f06e7e3989..41a7e9934fc4d 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/HighlightPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -145,8 +145,8 @@ private Map<String, Function<HitContext, FieldHighlightContext>> contextBuilders boolean fieldNameContainsWildcards = field.field().contains("*"); for (String fieldName : fieldNamesToHighlight) { MappedFieldType fieldType = context.mapperService().fieldType(fieldName); - if (fieldType == null && context.getQueryShardContext().getDerivedFieldType(fieldName) != null) { - fieldType = context.getQueryShardContext().getDerivedFieldType(fieldName); + if (fieldType == null) { + fieldType = context.getQueryShardContext().resolveDerivedFieldType(fieldName); } if (fieldType == null) { continue; diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/UnifiedHighlighter.java index c791c8bc05054..510773c8bb2b6 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -160,9 +160,9 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th Integer fieldMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzerOffset(); int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments(); Analyzer analyzer = getAnalyzer(fieldContext.context.mapperService().documentMapper()); - if (fieldContext.context.getQueryShardContext().getDerivedFieldType(fieldContext.fieldName) != null) { - analyzer = ((DerivedFieldType) fieldContext.context.getQueryShardContext().getDerivedFieldType(fieldContext.fieldName)) - .getIndexAnalyzer(); + MappedFieldType derivedFieldType = fieldContext.context.getQueryShardContext().resolveDerivedFieldType(fieldContext.fieldName); + if (derivedFieldType != null) { + analyzer = ((DerivedFieldType) derivedFieldType).getIndexAnalyzer(); } if (fieldMaxAnalyzedOffset != null) { analyzer = getLimitedOffsetAnalyzer(analyzer, fieldMaxAnalyzedOffset); diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java index 1307028dd27b0..b9bdfca3509e3 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java @@ -19,12 +19,14 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; +import org.opensearch.OpenSearchParseException; import org.opensearch.common.collect.Tuple; import org.opensearch.common.lucene.Lucene; import org.opensearch.core.index.Index; import org.opensearch.geometry.Rectangle; import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.script.DerivedFieldScript; import java.io.IOException; @@ -35,6 +37,7 @@ import org.mockito.Mockito; import static org.opensearch.index.query.QueryBuilders.geoShapeQuery; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { @@ -50,7 +53,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 1.5, 1500L, - new Tuple<>(10.0, 20.0) }, + new Tuple<>(10.0, 20.0), + "{ " + + "\"float_field\": 1.5," + + "\"boolean_field\": true," + + "\"long_field\": 1500," + + "\"date_field\": \"2024-03-20T08:30:45\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [1, 2, 3]," + + "\"text_field\": \"document number 1\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "232.0.0.0 GET /images/hm_bg.jpg?size=2.3KB HTTP/1.0 400 2024-03-20T09:15:20 2300", false, @@ -59,7 +72,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 2.3, 2300L, - new Tuple<>(20.0, 30.0) }, + new Tuple<>(20.0, 30.0), + "{ " + + "\"float_field\": 2.3," + + "\"boolean_field\": false," + + "\"long_field\": 2300," + + "\"date_field\": \"2024-03-20T09:15:20\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [4, 5, 6]," + + "\"text_field\": \"document number 2\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "26.1.0.0 DELETE /images/hm_bg.jpg?size=3.7KB HTTP/1.0 200 2024-03-20T10:05:55 3700", true, @@ -68,7 +91,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "DELETE", 3.7, 3700L, - new Tuple<>(30.0, 40.0) }, + new Tuple<>(30.0, 40.0), + "{ " + + "\"float_field\": 3.7," + + "\"boolean_field\": true," + + "\"long_field\": 3700," + + "\"date_field\": \"2024-03-20T10:05:55\"," + + "\"array_field\": [7, 8, 9]," + + "\"keyword_field\": \"DELETE\"," + + "\"text_field\": \"document number 3\"," + + "\"nested_field\": { \"sub_field_1\": \"DELETE\"}" + + "}" }, { "247.37.0.0 GET /french/splash_inet.html?size=4.1KB HTTP/1.0 400 2024-03-20T11:20:10 4100", false, @@ -77,7 +110,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 4.1, 4100L, - new Tuple<>(40.0, 50.0) }, + new Tuple<>(40.0, 50.0), + "{ " + + "\"float_field\": 4.1," + + "\"boolean_field\": false," + + "\"long_field\": 4100," + + "\"date_field\": \"2024-03-20T11:20:10\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [10, 11, 12]," + + "\"text_field\": \"document number 4\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "247.37.0.0 DELETE /french/splash_inet.html?size=5.8KB HTTP/1.0 400 2024-03-20T12:45:30 5800", false, @@ -86,7 +129,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "DELETE", 5.8, 5800L, - new Tuple<>(50.0, 60.0) }, + new Tuple<>(50.0, 60.0), + "{ " + + "\"float_field\": 5.8," + + "\"boolean_field\": false," + + "\"long_field\": 5800," + + "\"date_field\": \"2024-03-20T12:45:30\"," + + "\"keyword_field\": \"DELETE\"," + + "\"array_field\": [13, 14, 15]," + + "\"text_field\": \"document number 5\"," + + "\"nested_field\": { \"sub_field_1\": \"DELETE\"}" + + "}" }, { "10.20.30.40 GET /path/to/resource?size=6.3KB HTTP/1.0 200 2024-03-20T13:10:15 6300", true, @@ -95,7 +148,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 6.3, 6300L, - new Tuple<>(60.0, 70.0) }, + new Tuple<>(60.0, 70.0), + "{ " + + "\"float_field\": 6.3," + + "\"boolean_field\": true," + + "\"long_field\": 6300," + + "\"date_field\": \"2024-03-20T13:10:15\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [16, 17, 18]," + + "\"text_field\": \"document number 6\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "50.60.70.80 GET /path/to/resource?size=7.2KB HTTP/1.0 404 2024-03-20T14:20:50 7200", false, @@ -104,7 +167,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 7.2, 7200L, - new Tuple<>(70.0, 80.0) }, + new Tuple<>(70.0, 80.0), + "{ " + + "\"float_field\": 7.2," + + "\"boolean_field\": false," + + "\"long_field\": 7200," + + "\"date_field\": \"2024-03-20T14:20:50\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [19, 20, 21]," + + "\"text_field\": \"document number 7\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "127.0.0.1 PUT /path/to/resource?size=8.9KB HTTP/1.0 500 2024-03-20T15:30:25 8900", false, @@ -113,7 +186,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "PUT", 8.9, 8900L, - new Tuple<>(80.0, 90.0) }, + new Tuple<>(80.0, 90.0), + "{ " + + "\"float_field\": 8.9," + + "\"boolean_field\": false," + + "\"long_field\": 8900," + + "\"date_field\": \"2024-03-20T15:30:25\"," + + "\"keyword_field\": \"PUT\"," + + "\"array_field\": [22, 23, 24]," + + "\"text_field\": \"document number 8\"," + + "\"nested_field\": { \"sub_field_1\": \"PUT\"}" + + "}" }, { "127.0.0.1 GET /path/to/resource?size=9.4KB HTTP/1.0 200 2024-03-20T16:40:15 9400", true, @@ -122,7 +205,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 9.4, 9400L, - new Tuple<>(85.0, 90.0) }, + new Tuple<>(85.0, 90.0), + "{ " + + "\"float_field\": 9.4," + + "\"boolean_field\": true," + + "\"long_field\": 9400," + + "\"date_field\": \"2024-03-20T16:40:15\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [25, 26, 27]," + + "\"text_field\": \"document number 9\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "192.168.1.1 GET /path/to/resource?size=10.7KB HTTP/1.0 400 2024-03-20T17:50:40 10700", false, @@ -131,7 +224,39 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 10.7, 10700L, - new Tuple<>(90.0, 90.0) } }; + new Tuple<>(90.0, 90.0), + "{ " + + "\"float_field\": 10.7," + + "\"boolean_field\": false," + + "\"long_field\": 10700," + + "\"date_field\": \"2024-03-20T17:50:40\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [28, 29, 30]," + + "\"text_field\": \"document number 10\"," + + "\"invalid_field\": {}," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, + // malformed json + { + "192.168.1.1 GET /path/to/resource?size=10.7KB HTTP/1.0 400 2024-03-20T17:50:40 10700", + false, + -1L, + "192.169.1.1", + "JKJ", + -1, + -1, + new Tuple<>(-1.0, -1.0), + "{ " + + "\"float_field\": 10.7," + + "\"boolean_field\": false," + + "\"long_field\": 10700," + + "\"date_field\": \"2024-03-20T17:50:40\"," + + "\"keyword_field\": \"GET\"," + + "\"array_field\": [28, 29, 30]," + + "\"text_field\": \"document number 10\"," + + "\"invalid_field\": {}," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "" } }; public void testAllPossibleQueriesOnDerivedFields() throws IOException { MapperService mapperService = createMapperService(topMapping(b -> { @@ -156,6 +281,7 @@ public void testAllPossibleQueriesOnDerivedFields() throws IOException { { b.field("type", "date"); b.field("script", ""); + b.field("format", "yyyy-MM-dd"); } b.endObject(); b.startObject("client_ip"); @@ -188,6 +314,12 @@ public void testAllPossibleQueriesOnDerivedFields() throws IOException { b.field("script", ""); } b.endObject(); + b.startObject("object_field"); + { + b.field("type", "object"); + b.field("script", ""); + } + b.endObject(); } b.endObject(); })); @@ -302,4 +434,514 @@ public void execute() { } } } + + public void testObjectDerivedFields() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("raw_message"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("object_field"); + { + b.field("type", "object"); + b.field("script", ""); + b.field("ignore_malformed", "true"); + } + b.endObject(); + b.startObject("object_field_without_ignored_malformed"); + { + b.field("type", "object"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + + List<Document> docs = new ArrayList<>(); + for (Object[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_message", (String) request[0], Field.Store.YES)); + docs.add(document); + } + + int[] scriptIndex = { 8 }; + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> new DerivedFieldScript( + params, + lookup, + ctx + ) { + int docId = 0; + + @Override + public void setDocument(int docId) { + super.setDocument(docId); + this.docId = docId; + } + + @Override + public void execute() { + addEmittedValue(raw_requests[docId][scriptIndex[0]]); + } + }; + + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.compile(Mockito.any(), Mockito.any())).thenReturn(factory); + when(queryShardContext.sourcePath("raw_message")).thenReturn(Set.of("raw_message")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + when(queryShardContext.getIndexReader()).thenReturn(reader); + when(queryShardContext.fieldMapper(anyString())).thenAnswer(inv -> { + MappedFieldType res = mapperService.fieldType(inv.getArguments()[0].toString()); + if (res == null) { + DerivedFieldResolver derivedFieldResolver = DerivedFieldResolverFactory.createResolver( + queryShardContext, + null, + null, + true + ); + res = derivedFieldResolver.resolve(inv.getArguments()[0].toString()); + } + return res; + }); + IndexSearcher searcher = new IndexSearcher(reader); + + // NOTE: object_field.keyword_field will be resolved to a text field for nested field as that's the default behaviour in + // dynamic mappings too + TermQueryBuilder termQueryBuilder = new TermQueryBuilder("object_field.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + Query query = termQueryBuilder.toQuery(queryShardContext); + TopDocs topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + // since last doc has a malformed json, if ignore_malformed isn't set or set as false, the query should fail + termQueryBuilder = new TermQueryBuilder("object_field_without_ignored_malformed.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + query = termQueryBuilder.toQuery(queryShardContext); + Query finalQuery = query; + assertThrows(OpenSearchParseException.class, () -> searcher.search(finalQuery, 10)); + + query = QueryBuilders.matchPhraseQuery("object_field.text_field", "document number 1").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(1, topDocs.totalHits.value); + + query = QueryBuilders.matchPhraseQuery("object_field.text_field", "document number 11").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(0, topDocs.totalHits.value); + + // Range queries of types - date, long and double + query = QueryBuilders.rangeQuery("object_field.date_field").from("2024-03-20T14:20:50").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(4, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.float_field").from("4.1").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.long_field").from("5800").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(6, topDocs.totalHits.value); + + // Prefix Query + query = QueryBuilders.prefixQuery("object_field.keyword_field", "de").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + + query = QueryBuilders.wildcardQuery("object_field.keyword_field", "g*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + // Regexp Query + query = QueryBuilders.regexpQuery("object_field.keyword_field", ".*let.*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + + // tested deep nested field + query = QueryBuilders.regexpQuery("object_field.nested_field.sub_field_1", ".*let.*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + + // Test nested array field + query = QueryBuilders.rangeQuery("object_field.array_field").from("1").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(10, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.array_field").from("3").to("6").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.array_field").from("9").to("9").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(1, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.array_field").from("10").to("12").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(1, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.array_field").from("31").to("50").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(0, topDocs.totalHits.value); + + // tested missing nested field + query = QueryBuilders.regexpQuery("object_field.invalid_field.sub_field", ".*let.*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(0, topDocs.totalHits.value); + } + } + } + + public void testObjectDerivedFieldsWithPrefilterField() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("raw_message"); + { + b.field("type", "text"); + } + b.endObject(); + b.startObject("long_field"); + { + b.field("type", "long"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("regular_derived_field"); + { + b.field("type", "text"); + b.field("script", ""); + b.field("ignore_malformed", true); + b.field("prefilter_field", "raw_message"); + } + b.endObject(); + b.startObject("regular_derived_field_without_prefilter_field"); + { + b.field("type", "text"); + b.field("script", ""); + b.field("ignore_malformed", true); + } + b.endObject(); + b.startObject("object_field"); + { + b.field("type", "object"); + b.field("script", ""); + b.field("ignore_malformed", true); + } + b.endObject(); + b.startObject("object_field_2"); + { + b.field("type", "object"); + b.field("script", ""); + b.startObject("properties"); + { + b.field("keyword_field", "keyword"); + } + b.endObject(); + b.field("prefilter_field", "raw_message"); + b.field("ignore_malformed", true); + } + b.endObject(); + b.startObject("invalid_object"); + { + b.field("type", "object"); + b.field("script", ""); + b.startObject("properties"); + { + b.field("keyword_field", "keyword"); + } + b.endObject(); + b.field("prefilter_field", "invalid_field"); + b.field("ignore_malformed", true); + } + b.endObject(); + b.startObject("long_prefilter_field_object"); + { + b.field("type", "object"); + b.field("script", ""); + b.startObject("properties"); + { + b.field("keyword_field", "keyword"); + } + b.endObject(); + b.field("prefilter_field", "long_field"); + b.field("ignore_malformed", true); + } + b.endObject(); + } + b.endObject(); + })); + + List<Document> docs = new ArrayList<>(); + for (Object[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_message", (String) request[0], Field.Store.YES)); + docs.add(document); + } + + int[] scriptIndex = { 8 }; + int[] docsEvaluated = { 0 }; + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> new DerivedFieldScript( + params, + lookup, + ctx + ) { + int docId = 0; + + @Override + public void setDocument(int docId) { + docsEvaluated[0]++; + super.setDocument(docId); + this.docId = docId; + } + + @Override + public void execute() { + addEmittedValue(raw_requests[docId][scriptIndex[0]]); + } + }; + + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.compile(Mockito.any(), Mockito.any())).thenReturn(factory); + when(queryShardContext.sourcePath("raw_message")).thenReturn(Set.of("raw_message")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + when(queryShardContext.getIndexReader()).thenReturn(reader); + when(queryShardContext.fieldMapper(anyString())).thenAnswer(inv -> { + MappedFieldType res = mapperService.fieldType(inv.getArguments()[0].toString()); + if (res == null) { + DerivedFieldResolver derivedFieldResolver = DerivedFieldResolverFactory.createResolver( + queryShardContext, + null, + null, + true + ); + res = derivedFieldResolver.resolve(inv.getArguments()[0].toString()); + } + return res; + }); + IndexSearcher searcher = new IndexSearcher(reader); + + // NOTE: object_field.keyword_field will be resolved to a text field for nested field as that's the default behaviour in + // dynamic mappings too + TermQueryBuilder termQueryBuilder = new TermQueryBuilder("object_field.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + Query query = termQueryBuilder.toQuery(queryShardContext); + TopDocs topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + // test object_field_2 + docsEvaluated[0] = 0; + termQueryBuilder = new TermQueryBuilder("object_field_2.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + query = termQueryBuilder.toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + // since we have prefilter_field set to "raw_message", it should not evaluate all documents + assertEquals(8, docsEvaluated[0]); + + termQueryBuilder = new TermQueryBuilder("invalid_object.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + TermQueryBuilder finalTermQueryBuilder = termQueryBuilder; + assertThrows(MapperException.class, () -> finalTermQueryBuilder.toQuery(queryShardContext)); + + termQueryBuilder = new TermQueryBuilder("long_prefilter_field_object.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + TermQueryBuilder finalTermQueryBuilder2 = termQueryBuilder; + assertThrows(MapperException.class, () -> finalTermQueryBuilder2.toQuery(queryShardContext)); + + // when nested field is of numeric type or date, the range queries should still work irrespective of invalid + // prefilter_field + query = QueryBuilders.rangeQuery("invalid_object.date_field").from("2024-03-20T14:20:50").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(4, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("long_prefilter_field_object.date_field") + .from("2024-03-20T14:20:50") + .toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(4, topDocs.totalHits.value); + + // test regular_derived_field + docsEvaluated[0] = 0; + scriptIndex[0] = 4; + query = QueryBuilders.termQuery("regular_derived_field", "delete").caseInsensitive(true).toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + assertEquals(2, docsEvaluated[0]); + + // test regular_derived_field_without_prefilter_field + docsEvaluated[0] = 0; + scriptIndex[0] = 4; + query = QueryBuilders.termQuery("regular_derived_field_without_prefilter_field", "delete") + .caseInsensitive(true) + .toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + assertEquals(11, docsEvaluated[0]); + } + } + } + + public void testObjectDerivedFieldsWithIgnoreMalformed() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("raw_message"); + { + b.field("type", "text"); + } + b.endObject(); + b.startObject("long_field"); + { + b.field("type", "long"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("object_field"); + { + b.field("type", "object"); + b.field("script", ""); + b.field("ignore_malformed", true); + } + b.endObject(); + b.startObject("object_field_2"); + { + b.field("type", "object"); + b.field("script", ""); + b.startObject("properties"); + { + b.field("keyword_field", "date"); + } + b.endObject(); + b.field("prefilter_field", "raw_message"); + b.field("ignore_malformed", true); + } + b.endObject(); + b.startObject("object_field_3"); + { + b.field("type", "object"); + b.field("script", ""); + b.startObject("properties"); + { + b.field("keyword_field", "date"); + } + b.endObject(); + b.field("prefilter_field", "raw_message"); + b.field("ignore_malformed", false); + } + b.endObject(); + } + b.endObject(); + })); + + List<Document> docs = new ArrayList<>(); + for (Object[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_message", (String) request[0], Field.Store.YES)); + docs.add(document); + } + + int[] scriptIndex = { 8 }; + int[] docsEvaluated = { 0 }; + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> new DerivedFieldScript( + params, + lookup, + ctx + ) { + int docId = 0; + + @Override + public void setDocument(int docId) { + docsEvaluated[0]++; + super.setDocument(docId); + this.docId = docId; + } + + @Override + public void execute() { + addEmittedValue(raw_requests[docId][scriptIndex[0]]); + } + }; + + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.compile(Mockito.any(), Mockito.any())).thenReturn(factory); + when(queryShardContext.sourcePath("raw_message")).thenReturn(Set.of("raw_message")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + when(queryShardContext.getIndexReader()).thenReturn(reader); + when(queryShardContext.fieldMapper(anyString())).thenAnswer(inv -> { + MappedFieldType res = mapperService.fieldType(inv.getArguments()[0].toString()); + if (res == null) { + DerivedFieldResolver derivedFieldResolver = DerivedFieldResolverFactory.createResolver( + queryShardContext, + null, + null, + true + ); + res = derivedFieldResolver.resolve(inv.getArguments()[0].toString()); + } + return res; + }); + IndexSearcher searcher = new IndexSearcher(reader); + + // NOTE: object_field.keyword_field will be resolved to a text field for nested field as that's the default behaviour in + // dynamic mappings too + TermQueryBuilder termQueryBuilder = new TermQueryBuilder("object_field.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + Query query = termQueryBuilder.toQuery(queryShardContext); + TopDocs topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + // check if ignoreMalformed is set to false, the query fails on malformed values + query = QueryBuilders.rangeQuery("object_field_3.keyword_field").from("2024-03-20T14:20:50").toQuery(queryShardContext); + Query finalQuery = query; + assertThrows(IllegalArgumentException.class, () -> searcher.search(finalQuery, 10)); + + // check if ignoreMalformed is set to true, the query passes with 0 results + query = QueryBuilders.rangeQuery("object_field_2.keyword_field").from("2024-03-20T14:20:50").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(0, topDocs.totalHits.value); + } + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java new file mode 100644 index 0000000000000..b2a7cfc4359da --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java @@ -0,0 +1,537 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.opensearch.OpenSearchException; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.core.index.Index; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.Script; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class DerivedFieldResolverTests extends MapperServiceTestCase { + public void testResolutionFromIndexMapping() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = (DefaultDerivedFieldResolver) DerivedFieldResolverFactory.createResolver( + queryShardContext, + null, + null, + true + ); + assertEquals("keyword", resolver.resolve("derived_text").getType()); + assertEqualDerivedField(new DerivedField("derived_text", "keyword", new Script("")), resolver.resolve("derived_text").derivedField); + } + + public void testResolutionFromSearchRequest() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = (DefaultDerivedFieldResolver) DerivedFieldResolverFactory.createResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + true + ); + assertEquals("text", resolver.resolve("derived_text").getType()); + assertEqualDerivedField(new DerivedField("derived_text", "text", new Script("")), resolver.resolve("derived_text").derivedField); + assertEquals("object", resolver.resolve("derived_object").getType()); + assertEqualDerivedField( + new DerivedField("derived_object", "object", new Script("")), + resolver.resolve("derived_object").derivedField + ); + assertEquals("keyword", resolver.resolve("derived_keyword").getType()); + assertEqualDerivedField( + new DerivedField("derived_keyword", "keyword", new Script("")), + resolver.resolve("derived_keyword").derivedField + ); + } + + public void testEmpty() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = (DefaultDerivedFieldResolver) DerivedFieldResolverFactory.createResolver( + queryShardContext, + null, + null, + true + ); + assertNull(resolver.resolve("derived_keyword")); + } + + public void testResolutionPrecedence() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + b.startObject("derived_2"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = (DefaultDerivedFieldResolver) DerivedFieldResolverFactory.createResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + true + ); + + // precedence given to search definition; derived_text is present in both - + // search definition uses type text, whereas index definition uses the type keyword + + assertEquals("text", resolver.resolve("derived_text").getType()); + assertEqualDerivedField(new DerivedField("derived_text", "text", new Script("")), resolver.resolve("derived_text").derivedField); + + assertEquals("keyword", resolver.resolve("derived_2").getType()); + assertEqualDerivedField(new DerivedField("derived_2", "keyword", new Script("")), resolver.resolve("derived_2").derivedField); + } + + public void testNestedWithParentDefinedInIndexMapping() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_obj"); + { + b.field("type", "object"); + b.field("script", ""); + } + b.endObject(); + b.startObject("derived_obj_2"); + { + b.field("type", "object"); + b.field("script", ""); + b.field("format", "yyyy-MM-dd"); + + b.startObject("properties"); + { + b.field("sub_field1", "long"); + b.field("sub_field2", "date"); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver(queryShardContext, null, null, typeInference); + assertEquals("text", resolver.resolve("derived_obj.sub_field1").getType()); + assertEqualDerivedField( + new DerivedField("derived_obj.sub_field1", "text", new Script("")), + resolver.resolve("derived_obj.sub_field1").derivedField + ); + assertEquals("text", resolver.resolve("derived_obj.sub_field1.sub_field2").getType()); + assertEqualDerivedField( + new DerivedField("derived_obj.sub_field1.sub_field2", "text", new Script("")), + resolver.resolve("derived_obj.sub_field1.sub_field2").derivedField + ); + // when explicit type is set in properties + DerivedField expectedDerivedField1 = new DerivedField("derived_obj_2.sub_field1", "long", new Script("")); + expectedDerivedField1.setProperties(Map.of("sub_field1", "long", "sub_field2", "date")); + expectedDerivedField1.setFormat("yyyy-MM-dd"); + assertEqualDerivedField(expectedDerivedField1, resolver.resolve("derived_obj_2.sub_field1").derivedField); + DerivedField expectedDerivedField2 = new DerivedField("derived_obj_2.sub_field2", "date", new Script("")); + expectedDerivedField2.setProperties(Map.of("sub_field1", "long", "sub_field2", "date")); + expectedDerivedField2.setFormat("yyyy-MM-dd"); + assertEqualDerivedField(expectedDerivedField2, resolver.resolve("derived_obj_2.sub_field2").derivedField); + } + } + } + + public void testNestedWithParentDefinedInSearchRequest() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInference + ); + assertEquals("text", resolver.resolve("derived_object.sub_field1").getType()); + assertEqualDerivedField( + new DerivedField("derived_object.sub_field1", "text", new Script("")), + resolver.resolve("derived_object.sub_field1").derivedField + ); + assertEquals("text", resolver.resolve("derived_object.sub_field1.sub_field2").getType()); + assertEqualDerivedField( + new DerivedField("derived_object.sub_field1.sub_field2", "text", new Script("")), + resolver.resolve("derived_object.sub_field1.sub_field2").derivedField + ); + assertEquals(2, resolver.cnt); + + } + } + } + + public void testNestedWithParentUndefined() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver(queryShardContext, null, null, typeInference); + assertNull(resolver.resolve("derived_object.sub_field1")); + } + } + } + + public void testInferredTypeNull() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInference, + true + ); + assertNull(resolver.resolve("derived_object.field")); + } + } + } + + public void testInferThrowsIOException() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + FieldTypeInference typeInferenceMock = mock(FieldTypeInference.class); + when(typeInferenceMock.infer(any())).thenThrow(new IOException("Simulated IOException")); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInferenceMock, + true + ); + assertNull(resolver.resolve("derived_object.field")); + } + + public void testRegularFieldTypesAreNotResolved() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + b.startObject("indexed_field_2.sub_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = (DefaultDerivedFieldResolver) DerivedFieldResolverFactory.createResolver( + queryShardContext, + null, + null, + true + ); + assertNull(resolver.resolve("indexed_field")); + assertNull(resolver.resolve("indexed_field_2.sub_field")); + } + + public void testResolutionCaching() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInference + ); + assertEquals("text", resolver.resolve("derived_object.sub_field1").getType()); + assertEquals("text", resolver.resolve("derived_object.sub_field1").getType()); + assertEquals(1, resolver.cnt); + } + } + } + + public void testResolutionDisabled() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + b.startObject("indexed_field_2.sub_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DerivedFieldResolver resolver = DerivedFieldResolverFactory.createResolver(queryShardContext, null, null, false); + assertTrue(resolver instanceof NoOpDerivedFieldResolver); + assertNull(resolver.resolve("derived_text")); + assertEquals(0, resolver.resolvePattern("*").size()); + assertNull(resolver.resolve("indexed_field")); + assertNull(resolver.resolve("indexed_field_2.sub_field")); + + assertThrows( + OpenSearchException.class, + () -> DerivedFieldResolverFactory.createResolver(queryShardContext, createDerivedFieldsObject(), createDerivedFields(), false) + ); + + when(queryShardContext.allowExpensiveQueries()).thenReturn(false); + + assertThrows( + OpenSearchException.class, + () -> DerivedFieldResolverFactory.createResolver(queryShardContext, createDerivedFieldsObject(), createDerivedFields(), true) + ); + } + + public void testResolvePattern() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + b.startObject("derived_2"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = (DefaultDerivedFieldResolver) DerivedFieldResolverFactory.createResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + true + ); + assertEquals(4, resolver.resolvePattern("derived_*").size()); + assertEquals(4, resolver.resolvePattern("*").size()); // should not include regular field indexed_field + } + + private void assertEqualDerivedField(DerivedField expected, DerivedField actual) { + assertEquals(expected, actual); + } + + private Map<String, Object> createDerivedFieldsObject() { + return new HashMap<>() { + { + put("derived_text", new HashMap<String, Object>() { + { + put("type", "text"); + put("script", ""); + } + }); + put("derived_object", new HashMap<String, Object>() { + { + put("type", "object"); + put("script", ""); + } + }); + } + }; + } + + private static class TestDerivedFieldResolver extends DefaultDerivedFieldResolver { + private final boolean error; + private int cnt; + + public TestDerivedFieldResolver( + QueryShardContext queryShardContext, + Map<String, Object> derivedFieldsObject, + List<DerivedField> derivedFields, + FieldTypeInference typeInference + ) { + this(queryShardContext, derivedFieldsObject, derivedFields, typeInference, false); + } + + public TestDerivedFieldResolver( + QueryShardContext queryShardContext, + Map<String, Object> derivedFieldsObject, + List<DerivedField> derivedFields, + FieldTypeInference typeInference, + boolean error + ) { + super(queryShardContext, derivedFieldsObject, derivedFields, typeInference); + this.error = error; + this.cnt = 0; + } + + @Override + ValueFetcher getValueFetcher(String fieldName, Script script, boolean ignoreMalFormed) { + cnt++; + if (!error) { + return lookup -> List.of("text field content"); + } else { + return lookup -> null; + } + } + } + + private List<DerivedField> createDerivedFields() { + DerivedField derivedField = new DerivedField("derived_keyword", "keyword", new Script("")); + return Collections.singletonList(derivedField); + } + +} diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java index 897848008fd5f..f65acd0db0627 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java @@ -15,11 +15,13 @@ import org.apache.lucene.document.LatLonPoint; import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; +import org.opensearch.OpenSearchException; import org.opensearch.common.collect.Tuple; import org.opensearch.script.Script; import java.util.List; +import static org.apache.lucene.index.IndexOptions.NONE; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -30,61 +32,69 @@ private DerivedFieldType createDerivedFieldType(String type) { when(context.path()).thenReturn(new ContentPath()); return new DerivedFieldType( new DerivedField(type + " _derived_field", type, new Script("")), - DerivedFieldSupportedTypes.getFieldMapperFromType(type, type + "_derived_field", context), - DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(type, type + "_derived_field") + DerivedFieldSupportedTypes.getFieldMapperFromType(type, type + "_derived_field", context, null), + DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(type, type + "_derived_field"), + null ); } public void testBooleanType() { DerivedFieldType dft = createDerivedFieldType("boolean"); - assertTrue(dft.typeFieldMapper instanceof BooleanFieldMapper); - assertTrue(dft.indexableFieldGenerator.apply(true) instanceof Field); - assertTrue(dft.indexableFieldGenerator.apply(false) instanceof Field); + assertTrue(dft.getFieldMapper() instanceof BooleanFieldMapper); + assertTrue(dft.getIndexableFieldGenerator().apply(true) instanceof Field); + assertTrue(dft.getIndexableFieldGenerator().apply(false) instanceof Field); } public void testDateType() { DerivedFieldType dft = createDerivedFieldType("date"); - assertTrue(dft.typeFieldMapper instanceof DateFieldMapper); - assertTrue(dft.indexableFieldGenerator.apply(System.currentTimeMillis()) instanceof LongPoint); - expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply("blah")); + assertTrue(dft.getFieldMapper() instanceof DateFieldMapper); + assertTrue(dft.getIndexableFieldGenerator().apply(System.currentTimeMillis()) instanceof LongPoint); + expectThrows(Exception.class, () -> dft.getIndexableFieldGenerator().apply("blah")); } public void testGeoPointType() { DerivedFieldType dft = createDerivedFieldType("geo_point"); - assertTrue(dft.typeFieldMapper instanceof GeoPointFieldMapper); - assertTrue(dft.indexableFieldGenerator.apply(new Tuple<>(10.0, 20.0)) instanceof LatLonPoint); - expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of(10.0))); - expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of())); - expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of("10"))); - expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of(10.0, 20.0, 30.0))); + assertTrue(dft.getFieldMapper() instanceof GeoPointFieldMapper); + assertTrue(dft.getIndexableFieldGenerator().apply(new Tuple<>(10.0, 20.0)) instanceof LatLonPoint); + expectThrows(ClassCastException.class, () -> dft.getIndexableFieldGenerator().apply(List.of(10.0))); + expectThrows(ClassCastException.class, () -> dft.getIndexableFieldGenerator().apply(List.of())); + expectThrows(ClassCastException.class, () -> dft.getIndexableFieldGenerator().apply(List.of("10"))); + expectThrows(ClassCastException.class, () -> dft.getIndexableFieldGenerator().apply(List.of(10.0, 20.0, 30.0))); } public void testIPType() { DerivedFieldType dft = createDerivedFieldType("ip"); - assertTrue(dft.typeFieldMapper instanceof IpFieldMapper); - assertTrue(dft.indexableFieldGenerator.apply("127.0.0.1") instanceof InetAddressPoint); - expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply("blah")); + assertTrue(dft.getFieldMapper() instanceof IpFieldMapper); + assertTrue(dft.getIndexableFieldGenerator().apply("127.0.0.1") instanceof InetAddressPoint); + expectThrows(Exception.class, () -> dft.getIndexableFieldGenerator().apply("blah")); } public void testKeywordType() { DerivedFieldType dft = createDerivedFieldType("keyword"); - assertTrue(dft.typeFieldMapper instanceof KeywordFieldMapper); - assertTrue(dft.indexableFieldGenerator.apply("test_keyword") instanceof KeywordField); - expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply(10)); + assertTrue(dft.getFieldMapper() instanceof KeywordFieldMapper); + assertTrue(dft.getIndexableFieldGenerator().apply("test_keyword") instanceof KeywordField); + expectThrows(Exception.class, () -> dft.getIndexableFieldGenerator().apply(10)); } public void testLongType() { DerivedFieldType dft = createDerivedFieldType("long"); - assertTrue(dft.typeFieldMapper instanceof NumberFieldMapper); - assertTrue(dft.indexableFieldGenerator.apply(10) instanceof LongField); - expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply(10.0)); + assertTrue(dft.getFieldMapper() instanceof NumberFieldMapper); + assertTrue(dft.getIndexableFieldGenerator().apply(10) instanceof LongField); + expectThrows(Exception.class, () -> dft.getIndexableFieldGenerator().apply(10.0)); } public void testDoubleType() { DerivedFieldType dft = createDerivedFieldType("double"); - assertTrue(dft.typeFieldMapper instanceof NumberFieldMapper); - assertTrue(dft.indexableFieldGenerator.apply(10.0) instanceof DoubleField); - expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply("")); + assertTrue(dft.getFieldMapper() instanceof NumberFieldMapper); + assertTrue(dft.getIndexableFieldGenerator().apply(10.0) instanceof DoubleField); + expectThrows(Exception.class, () -> dft.getIndexableFieldGenerator().apply("")); + } + + public void testObjectType() { + DerivedFieldType dft = createDerivedFieldType("object"); + assertTrue(dft.getFieldMapper() instanceof KeywordFieldMapper); + assertEquals(dft.getFieldMapper().fieldType.indexOptions(), NONE); + assertThrows(OpenSearchException.class, () -> dft.getIndexableFieldGenerator().apply("")); } public void testUnsupportedType() { diff --git a/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java b/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java index 5a11ebebb312e..ecad1291bed19 100644 --- a/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java @@ -16,6 +16,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; @@ -34,6 +35,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.function.Function; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -45,7 +47,8 @@ public class DerivedFieldQueryTests extends OpenSearchTestCase { { "232.0.0.0 GET /images/hm_bg.jpg HTTP/1.0", "400", "232.0.0.0" }, { "26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0", "200", "26.1.0.0" }, { "247.37.0.0 GET /french/splash_inet.html HTTP/1.0", "400", "247.37.0.0" }, - { "247.37.0.0 GET /french/splash_inet.html HTTP/1.0", "400", "247.37.0.0" } }; + { "247.37.0.0 GET /french/splash_inet.html HTTP/1.0", "400", "247.37.0.0" }, + { "247.37.0.0 GET /french/splash_inet.html HTTP/1.0", "200", "247.37.0.0" } }; public void testDerivedField() throws IOException { // Create lucene documents @@ -76,18 +79,20 @@ public void execute() { // Create ValueFetcher from mocked DerivedFieldScript.Factory DerivedFieldScript.LeafFactory leafFactory = factory.newFactory((new Script("")).getParams(), searchLookup); - DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher( - leafFactory, - null, - DerivedFieldSupportedTypes.getIndexableFieldGeneratorType("keyword", "ip_from_raw_request") + Function<Object, IndexableField> indexableFieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType( + "keyword", + "ip_from_raw_request" ); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(leafFactory, null); // Create DerivedFieldQuery DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( new TermQuery(new Term("ip_from_raw_request", "247.37.0.0")), valueFetcher, searchLookup, - Lucene.STANDARD_ANALYZER + Lucene.STANDARD_ANALYZER, + indexableFieldFunction, + true ); // Index and Search @@ -101,7 +106,78 @@ public void execute() { iw.close(); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(derivedFieldQuery, 10); - assertEquals(2, topDocs.totalHits.value); + assertEquals(3, topDocs.totalHits.value); + } + } + } + + public void testDerivedFieldWithIgnoreMalformed() throws IOException { + // Create lucene documents + List<Document> docs = new ArrayList<>(); + for (String[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_request", request[0], Field.Store.YES)); + document.add(new KeywordField("status", request[1], Field.Store.YES)); + docs.add(document); + } + + // Mock SearchLookup + SearchLookup searchLookup = mock(SearchLookup.class); + SourceLookup sourceLookup = new SourceLookup(); + LeafSearchLookup leafLookup = mock(LeafSearchLookup.class); + when(leafLookup.source()).thenReturn(sourceLookup); + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> { + when(searchLookup.getLeafSearchLookup(ctx)).thenReturn(leafLookup); + return new DerivedFieldScript(params, lookup, ctx) { + @Override + public void execute() { + addEmittedValue(raw_requests[sourceLookup.docId()][2]); + } + }; + }; + + // Create ValueFetcher from mocked DerivedFieldScript.Factory + DerivedFieldScript.LeafFactory leafFactory = factory.newFactory((new Script("")).getParams(), searchLookup); + Function<Object, IndexableField> badIndexableFieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType( + "date", + "ip_from_raw_request" + ); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(leafFactory, null); + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + // Create DerivedFieldQuery + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + new TermQuery(new Term("ip_from_raw_request", "247.37.0.0")), + valueFetcher, + searchLookup, + Lucene.STANDARD_ANALYZER, + badIndexableFieldFunction, + false + ); + DerivedFieldQuery finalDerivedFieldQuery = derivedFieldQuery; + assertThrows(ClassCastException.class, () -> searcher.search(finalDerivedFieldQuery, 10)); + + // set ignore_malformed as true, query should pass + derivedFieldQuery = new DerivedFieldQuery( + new TermQuery(new Term("ip_from_raw_request", "247.37.0.0")), + valueFetcher, + searchLookup, + Lucene.STANDARD_ANALYZER, + badIndexableFieldFunction, + true + ); + searcher.search(derivedFieldQuery, 10); + TopDocs topDocs = searcher.search(derivedFieldQuery, 10); + assertEquals(0, topDocs.totalHits.value); } } } diff --git a/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java b/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java index 6a7bf10835ddd..12677edc8efa7 100644 --- a/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java +++ b/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java @@ -31,7 +31,6 @@ package org.opensearch.index.query; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; @@ -64,17 +63,12 @@ import org.opensearch.index.fielddata.LeafFieldData; import org.opensearch.index.fielddata.ScriptDocValues; import org.opensearch.index.fielddata.plain.AbstractLeafOrdinalsFieldData; -import org.opensearch.index.mapper.ContentPath; -import org.opensearch.index.mapper.DerivedField; -import org.opensearch.index.mapper.DerivedFieldMapper; -import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.DerivedFieldResolver; +import org.opensearch.index.mapper.DerivedFieldType; import org.opensearch.index.mapper.IndexFieldMapper; import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.mapper.MappingLookup; import org.opensearch.index.mapper.TextFieldMapper; -import org.opensearch.script.Script; import org.opensearch.search.lookup.LeafDocLookup; import org.opensearch.search.lookup.LeafSearchLookup; import org.opensearch.search.lookup.SearchLookup; @@ -85,7 +79,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.function.BiFunction; import java.util.function.Supplier; @@ -129,24 +122,14 @@ public void testFailIfFieldMappingNotFound() { public void testDerivedFieldMapping() { QueryShardContext context = createQueryShardContext(IndexMetadata.INDEX_UUID_NA_VALUE, null); - assertNull(context.failIfFieldMappingNotFound("test_derived", null)); - context.setDerivedFieldTypes(null); - assertNull(context.failIfFieldMappingNotFound("test_derived", null)); - DocumentMapper documentMapper = mock(DocumentMapper.class); - Mapper.BuilderContext builderContext = new Mapper.BuilderContext(Settings.EMPTY, new ContentPath(0)); - DerivedFieldMapper derivedFieldMapper = new DerivedFieldMapper.Builder(new DerivedField("test_derived", "keyword", new Script(""))) - .build(builderContext); - MappingLookup mappingLookup = new MappingLookup( - Collections.singletonList(derivedFieldMapper), - Collections.emptyList(), - Collections.emptyList(), - 0, - new StandardAnalyzer() - ); - when(documentMapper.mappers()).thenReturn(mappingLookup); - context.setDerivedFieldTypes(Map.of("test_derived", derivedFieldMapper.fieldType())); - context.setAllowUnmappedFields(false); - assertEquals(derivedFieldMapper.fieldType(), context.failIfFieldMappingNotFound("test_derived", null)); + assertNull(context.failIfFieldMappingNotFound("derived_field_search_req", null)); + DerivedFieldResolver derivedFieldResolver = mock(DerivedFieldResolver.class); + context.setDerivedFieldResolver(derivedFieldResolver); + DerivedFieldType mockDerivedFieldType = mock(DerivedFieldType.class); + when(derivedFieldResolver.resolve("derived_field_search_req")).thenReturn(mockDerivedFieldType); + assertEquals(mockDerivedFieldType, context.failIfFieldMappingNotFound("derived_field_search_req", null)); + when(derivedFieldResolver.resolve("field_missing")).thenReturn(null); + assertNull(context.failIfFieldMappingNotFound("field_missing", null)); } public void testToQueryFails() { diff --git a/server/src/test/java/org/opensearch/search/SearchServiceTests.java b/server/src/test/java/org/opensearch/search/SearchServiceTests.java index 7207df7d4d13d..1caa2c99fc3b8 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceTests.java @@ -37,6 +37,7 @@ import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.OpenSearchException; import org.opensearch.action.OriginalIndices; import org.opensearch.action.index.IndexResponse; import org.opensearch.action.search.ClearScrollRequest; @@ -566,6 +567,7 @@ public void testDerivedFieldsSearch() throws IOException { new Script(ScriptType.INLINE, MockScriptEngine.NAME, CustomScriptPlugin.DUMMY_SCRIPT, Collections.emptyMap()) ); } + indexService.getIndexSettings().isDerivedFieldAllowed(); final ShardSearchRequest request = new ShardSearchRequest( OriginalIndices.NONE, searchRequest, @@ -582,15 +584,96 @@ public void testDerivedFieldsSearch() throws IOException { try (SearchContext context = service.createContext(reader, request, null, randomBoolean())) { assertNotNull(context); for (int i = 0; i < 5; i++) { - DerivedFieldType derivedFieldType = (DerivedFieldType) context.getQueryShardContext().getDerivedFieldType("field" + i); + DerivedFieldType derivedFieldType = (DerivedFieldType) context.getQueryShardContext() + .resolveDerivedFieldType("field" + i); assertEquals("field" + i, derivedFieldType.name()); assertEquals("date", derivedFieldType.getType()); } - assertNull(context.getQueryShardContext().getDerivedFieldType("field" + 5)); + assertNull(context.getQueryShardContext().resolveDerivedFieldType("field" + 5)); } } } + public void testDerivedFieldDisabled() throws IOException { + createIndex("index"); + final SearchService service = getInstanceFromNode(SearchService.class); + final IndicesService indicesService = getInstanceFromNode(IndicesService.class); + final IndexService indexService = indicesService.indexServiceSafe(resolveIndex("index")); + final IndexShard indexShard = indexService.getShard(0); + + SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchRequest.source(searchSourceBuilder); + + searchSourceBuilder.derivedField( + "field", + "date", + new Script(ScriptType.INLINE, MockScriptEngine.NAME, CustomScriptPlugin.DUMMY_SCRIPT, Collections.emptyMap()) + ); + indexService.getIndexSettings().isDerivedFieldAllowed(); + final ShardSearchRequest request = new ShardSearchRequest( + OriginalIndices.NONE, + searchRequest, + indexShard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1.0f, + -1, + null, + null + ); + + try (ReaderContext reader = createReaderContext(indexService, indexShard)) { + SearchContext context = service.createContext(reader, request, null, randomBoolean()); + + // nothing disabled, derived field resolved fine + assertNotNull(context.getQueryShardContext().resolveDerivedFieldType("field")); + + // disabled using cluster setting, assert create context fails + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING.getKey(), false)) + .get(); + assertThrows(OpenSearchException.class, () -> service.createContext(reader, request, null, randomBoolean())); + + // dynamically enabled using cluster setting, assert derived field resolved fine + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING.getKey(), true)) + .get(); + context = service.createContext(reader, request, null, randomBoolean()); + assertNotNull(context.getQueryShardContext().resolveDerivedFieldType("field")); + + // disabled using index setting, assert create context fails + client().admin() + .indices() + .prepareUpdateSettings("index") + .setSettings(Settings.builder().put(IndexSettings.ALLOW_DERIVED_FIELDS.getKey(), false)) + .get(); + + assertThrows(OpenSearchException.class, () -> service.createContext(reader, request, null, randomBoolean())); + + // dynamically enabled using index setting, assert derived field resolved fine + client().admin() + .indices() + .prepareUpdateSettings("index") + .setSettings(Settings.builder().put(IndexSettings.ALLOW_DERIVED_FIELDS.getKey(), true)) + .get(); + + context = service.createContext(reader, request, null, randomBoolean()); + assertNotNull(context.getQueryShardContext().resolveDerivedFieldType("field")); + + // Cleanup + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().putNull(SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING.getKey())) + .get(); + } + } + /** * test that getting more than the allowed number of script_fields throws an exception */ diff --git a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java index fd3dd8c12e84e..9697f4cee0d58 100644 --- a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java +++ b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java @@ -373,7 +373,7 @@ public void testDerivedFieldsParsingAndSerializationObjectType() throws IOExcept + " \"type\": \"object\",\n" + " \"script\": \"emit(doc['test'])\",\n" + " \"format\": \"dd-MM-yyyy\",\n" - + " \"source_indexed_field\": \"test\",\n" + + " \"prefilter_field\": \"test\",\n" + " \"ignore_malformed\": true,\n" + " \"properties\": {\n" + " \"sub_field\": \"text\"\n" @@ -386,7 +386,7 @@ public void testDerivedFieldsParsingAndSerializationObjectType() throws IOExcept + "}"; String expectedContent = - "{\"query\":{\"match\":{\"content\":{\"query\":\"foo bar\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"derived\":{\"duration\":{\"type\":\"long\",\"script\":\"emit(doc['test'])\"},\"ip_from_message\":{\"type\":\"keyword\",\"script\":\"emit(doc['message'])\"},\"object\":{\"format\":\"dd-MM-yyyy\",\"source_indexed_field\":\"test\",\"ignore_malformed\":true,\"type\":\"object\",\"script\":\"emit(doc['test'])\",\"properties\":{\"sub_field\":\"text\"}},\"derived_field\":{\"type\":\"object\",\"script\":{\"source\":\"emit(doc['message']\",\"lang\":\"painless\"},\"properties\":{\"sub_field_2\":\"keyword\"},\"source_indexed_field\":\"message\",\"format\":\"dd-MM-yyyy\",\"ignore_malformed\":true}}}"; + "{\"query\":{\"match\":{\"content\":{\"query\":\"foo bar\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"derived\":{\"duration\":{\"type\":\"long\",\"script\":\"emit(doc['test'])\"},\"ip_from_message\":{\"type\":\"keyword\",\"script\":\"emit(doc['message'])\"},\"object\":{\"format\":\"dd-MM-yyyy\",\"prefilter_field\":\"test\",\"ignore_malformed\":true,\"type\":\"object\",\"script\":\"emit(doc['test'])\",\"properties\":{\"sub_field\":\"text\"}},\"derived_field\":{\"type\":\"object\",\"script\":{\"source\":\"emit(doc['message']\",\"lang\":\"painless\"},\"properties\":{\"sub_field_2\":\"keyword\"},\"prefilter_field\":\"message\",\"format\":\"dd-MM-yyyy\",\"ignore_malformed\":true}}}"; try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); @@ -403,7 +403,7 @@ public void testDerivedFieldsParsingAndSerializationObjectType() throws IOExcept assertEquals(3, searchSourceBuilder.getDerivedFieldsObject().size()); assertEquals(1, searchSourceBuilder.getDerivedFields().size()); assertEquals(1, searchSourceBuilder.getDerivedFields().get(0).getProperties().size()); - assertEquals("message", searchSourceBuilder.getDerivedFields().get(0).getSourceIndexedField()); + assertEquals("message", searchSourceBuilder.getDerivedFields().get(0).getPrefilterField()); assertEquals("dd-MM-yyyy", searchSourceBuilder.getDerivedFields().get(0).getFormat()); assertTrue(searchSourceBuilder.getDerivedFields().get(0).getIgnoreMalformed()); diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java index 92127da9654aa..f106aaa13dc48 100644 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java +++ b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java @@ -27,7 +27,9 @@ import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.DefaultDerivedFieldResolver; import org.opensearch.index.mapper.DerivedField; +import org.opensearch.index.mapper.DerivedFieldResolverFactory; import org.opensearch.index.mapper.DerivedFieldSupportedTypes; import org.opensearch.index.mapper.DerivedFieldType; import org.opensearch.index.mapper.Mapper; @@ -64,9 +66,14 @@ public class DerivedFieldFetchAndHighlightTests extends OpenSearchSingleNodeTestCase { private static String DERIVED_FIELD_SCRIPT_1 = "derived_field_script_1"; private static String DERIVED_FIELD_SCRIPT_2 = "derived_field_script_2"; + private static String DERIVED_FIELD_SCRIPT_3 = "derived_field_script_3"; + private static String DERIVED_FIELD_SCRIPT_4 = "derived_field_script_4"; private static String DERIVED_FIELD_1 = "derived_1"; private static String DERIVED_FIELD_2 = "derived_2"; + private static String DERIVED_FIELD_3 = "derived_3"; + private static String DERIVED_FIELD_4 = "derived_4"; + private static String NESTED_FIELD = "field"; public void testDerivedFieldFromIndexMapping() throws IOException { // Create index and mapper service @@ -88,6 +95,21 @@ public void testDerivedFieldFromIndexMapping() throws IOException { .field("lang", "mockscript") .endObject() .endObject() + .startObject(DERIVED_FIELD_3) + .field("type", "date") + .field("format", "yyyy-MM-dd") + .startObject("script") + .field("source", DERIVED_FIELD_SCRIPT_3) + .field("lang", "mockscript") + .endObject() + .endObject() + .startObject(DERIVED_FIELD_4) + .field("type", "object") + .startObject("script") + .field("source", DERIVED_FIELD_SCRIPT_4) + .field("lang", "mockscript") + .endObject() + .endObject() .endObject() .endObject(); @@ -97,6 +119,8 @@ public void testDerivedFieldFromIndexMapping() throws IOException { .startObject() .field("field1", "some_text_1") .field("field2", "some_text_2") + .field("field3", 1710923445000L) + .field("field4", "{ \"field\": \"foo bar baz\"}") .endObject(); int docId = 0; @@ -121,14 +145,21 @@ public void testDerivedFieldFromIndexMapping() throws IOException { // Assert the fetch phase works for both of the derived fields Map<String, DocumentField> fields = fetchFields(mockShardContext, context, "*"); + Map<String, DocumentField> nestedFields = fetchFields(mockShardContext, context, DERIVED_FIELD_4 + "." + NESTED_FIELD); // Validate FetchPhase { - assertEquals(fields.size(), 2); + assertEquals(fields.size(), 4); assertEquals(1, fields.get(DERIVED_FIELD_1).getValues().size()); assertEquals(1, fields.get(DERIVED_FIELD_2).getValues().size()); + assertEquals(1, fields.get(DERIVED_FIELD_3).getValues().size()); + assertEquals(1, fields.get(DERIVED_FIELD_4).getValues().size()); assertEquals("some_text_1", fields.get(DERIVED_FIELD_1).getValue()); assertEquals("some_text_2", fields.get(DERIVED_FIELD_2).getValue()); + assertEquals("2024-03-20", fields.get(DERIVED_FIELD_3).getValue()); + assertEquals("{ \"field\": \"foo bar baz\"}", fields.get(DERIVED_FIELD_4).getValue()); + assertEquals(1, nestedFields.get(DERIVED_FIELD_4 + "." + NESTED_FIELD).getValues().size()); + assertEquals("foo bar baz", nestedFields.get(DERIVED_FIELD_4 + "." + NESTED_FIELD).getValue()); } // Create a HighlightBuilder of type unified, set its fields as derived_1 and derived_2 @@ -136,6 +167,7 @@ public void testDerivedFieldFromIndexMapping() throws IOException { highlightBuilder.highlighterType("unified"); highlightBuilder.field(DERIVED_FIELD_1); highlightBuilder.field(DERIVED_FIELD_2); + highlightBuilder.field(DERIVED_FIELD_4 + "." + NESTED_FIELD); highlightBuilder = Rewriteable.rewrite(highlightBuilder, mockShardContext); SearchHighlightContext searchHighlightContext = highlightBuilder.build(mockShardContext); @@ -158,30 +190,56 @@ public void testDerivedFieldFromIndexMapping() throws IOException { null ) ); - - // The query used by FetchSubPhaseProcessor to highlight is a term query on DERIVED_FIELD_1 - FetchSubPhaseProcessor subPhaseProcessor = highlightPhase.getProcessor( - fetchContext, - searchHighlightContext, - new TermQuery(new Term(DERIVED_FIELD_1, "some_text_1")) - ); - - // Create a search hit using the derived fields fetched above in fetch phase - SearchHit searchHit = new SearchHit(docId, "0", null, fields, null); - - // Create a HitContext of search hit - FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( - searchHit, - context, - docId, - mockShardContext.lookup().source() - ); - hitContext.sourceLookup().loadSourceIfNeeded(); - // process the HitContext using the highlightPhase subPhaseProcessor - subPhaseProcessor.process(hitContext); - - // Validate that 1 highlight field is present - assertEquals(hitContext.hit().getHighlightFields().size(), 1); + { + // The query used by FetchSubPhaseProcessor to highlight is a term query on DERIVED_FIELD_1 + FetchSubPhaseProcessor subPhaseProcessor = highlightPhase.getProcessor( + fetchContext, + searchHighlightContext, + new TermQuery(new Term(DERIVED_FIELD_1, "some_text_1")) + ); + + // Create a search hit using the derived fields fetched above in fetch phase + SearchHit searchHit = new SearchHit(docId, "0", null, fields, null); + + // Create a HitContext of search hit + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( + searchHit, + context, + docId, + mockShardContext.lookup().source() + ); + hitContext.sourceLookup().loadSourceIfNeeded(); + // process the HitContext using the highlightPhase subPhaseProcessor + subPhaseProcessor.process(hitContext); + + // Validate that 1 highlight field is present + assertEquals(hitContext.hit().getHighlightFields().size(), 1); + } + { + // The query used by FetchSubPhaseProcessor to highlight is a term query on DERIVED_FIELD_1 + FetchSubPhaseProcessor subPhaseProcessor = highlightPhase.getProcessor( + fetchContext, + searchHighlightContext, + new TermQuery(new Term(DERIVED_FIELD_4 + "." + NESTED_FIELD, "foo")) + ); + + // Create a search hit using the derived fields fetched above in fetch phase + SearchHit searchHit = new SearchHit(docId, "0", null, nestedFields, null); + + // Create a HitContext of search hit + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( + searchHit, + context, + docId, + mockShardContext.lookup().source() + ); + hitContext.sourceLookup().loadSourceIfNeeded(); + // process the HitContext using the highlightPhase subPhaseProcessor + subPhaseProcessor.process(hitContext); + + // Validate that 1 highlight field is present + assertEquals(hitContext.hit().getHighlightFields().size(), 1); + } } } } @@ -193,6 +251,8 @@ public void testDerivedFieldFromSearchMapping() throws IOException { .startObject() .field("field1", "some_text_1") .field("field2", "some_text_2") + .field("field3", 1710923445000L) + .field("field4", "{ \"field\": \"foo bar baz\"}") .endObject(); int docId = 0; @@ -218,26 +278,56 @@ public void testDerivedFieldFromSearchMapping() throws IOException { QueryShardContext mockShardContext = createQueryShardContext(mapperService, searcher); mockShardContext.lookup().source().setSegmentAndDocument(context, docId); + DerivedField derivedField3 = new DerivedField( + DERIVED_FIELD_3, + "date", + new Script(ScriptType.INLINE, "mockscript", DERIVED_FIELD_SCRIPT_3, emptyMap()) + ); + derivedField3.setFormat("dd-MM-yyyy"); // This mock behavior is similar to adding derived fields in search request - mockShardContext.setDerivedFieldTypes( - Map.of( - DERIVED_FIELD_1, - createDerivedFieldType(DERIVED_FIELD_1, "keyword", DERIVED_FIELD_SCRIPT_1), - DERIVED_FIELD_2, - createDerivedFieldType(DERIVED_FIELD_2, "keyword", DERIVED_FIELD_SCRIPT_2) + mockShardContext.setDerivedFieldResolver( + (DefaultDerivedFieldResolver) DerivedFieldResolverFactory.createResolver( + mockShardContext, + null, + List.of( + new DerivedField( + DERIVED_FIELD_1, + "keyword", + new Script(ScriptType.INLINE, "mockscript", DERIVED_FIELD_SCRIPT_1, emptyMap()) + ), + new DerivedField( + DERIVED_FIELD_2, + "keyword", + new Script(ScriptType.INLINE, "mockscript", DERIVED_FIELD_SCRIPT_2, emptyMap()) + ), + derivedField3, + new DerivedField( + DERIVED_FIELD_4, + "object", + new Script(ScriptType.INLINE, "mockscript", DERIVED_FIELD_SCRIPT_4, emptyMap()) + ) + ), + true ) ); // Assert the fetch phase works for both of the derived fields Map<String, DocumentField> fields = fetchFields(mockShardContext, context, "derived_*"); + Map<String, DocumentField> nestedFields = fetchFields(mockShardContext, context, DERIVED_FIELD_4 + "." + NESTED_FIELD); // Validate FetchPhase { - assertEquals(fields.size(), 2); + assertEquals(fields.size(), 4); assertEquals(1, fields.get(DERIVED_FIELD_1).getValues().size()); assertEquals(1, fields.get(DERIVED_FIELD_2).getValues().size()); + assertEquals(1, fields.get(DERIVED_FIELD_3).getValues().size()); + assertEquals(1, fields.get(DERIVED_FIELD_4).getValues().size()); assertEquals("some_text_1", fields.get(DERIVED_FIELD_1).getValue()); assertEquals("some_text_2", fields.get(DERIVED_FIELD_2).getValue()); + assertEquals("20-03-2024", fields.get(DERIVED_FIELD_3).getValue()); + assertEquals("{ \"field\": \"foo bar baz\"}", fields.get(DERIVED_FIELD_4).getValue()); + assertEquals(1, nestedFields.get(DERIVED_FIELD_4 + "." + NESTED_FIELD).getValues().size()); + assertEquals("foo bar baz", nestedFields.get(DERIVED_FIELD_4 + "." + NESTED_FIELD).getValue()); } // Create a HighlightBuilder of type unified, set its fields as derived_1 and derived_2 @@ -245,6 +335,7 @@ public void testDerivedFieldFromSearchMapping() throws IOException { highlightBuilder.highlighterType("unified"); highlightBuilder.field(DERIVED_FIELD_1); highlightBuilder.field(DERIVED_FIELD_2); + highlightBuilder.field(DERIVED_FIELD_4 + "." + NESTED_FIELD); highlightBuilder = Rewriteable.rewrite(highlightBuilder, mockShardContext); SearchHighlightContext searchHighlightContext = highlightBuilder.build(mockShardContext); @@ -267,30 +358,56 @@ public void testDerivedFieldFromSearchMapping() throws IOException { null ) ); - - // The query used by FetchSubPhaseProcessor to highlight is a term query on DERIVED_FIELD_1 - FetchSubPhaseProcessor subPhaseProcessor = highlightPhase.getProcessor( - fetchContext, - searchHighlightContext, - new TermQuery(new Term(DERIVED_FIELD_1, "some_text_1")) - ); - - // Create a search hit using the derived fields fetched above in fetch phase - SearchHit searchHit = new SearchHit(docId, "0", null, fields, null); - - // Create a HitContext of search hit - FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( - searchHit, - context, - docId, - mockShardContext.lookup().source() - ); - hitContext.sourceLookup().loadSourceIfNeeded(); - // process the HitContext using the highlightPhase subPhaseProcessor - subPhaseProcessor.process(hitContext); - - // Validate that 1 highlight field is present - assertEquals(hitContext.hit().getHighlightFields().size(), 1); + { + // The query used by FetchSubPhaseProcessor to highlight is a term query on DERIVED_FIELD_1 + FetchSubPhaseProcessor subPhaseProcessor = highlightPhase.getProcessor( + fetchContext, + searchHighlightContext, + new TermQuery(new Term(DERIVED_FIELD_1, "some_text_1")) + ); + + // Create a search hit using the derived fields fetched above in fetch phase + SearchHit searchHit = new SearchHit(docId, "0", null, fields, null); + + // Create a HitContext of search hit + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( + searchHit, + context, + docId, + mockShardContext.lookup().source() + ); + hitContext.sourceLookup().loadSourceIfNeeded(); + // process the HitContext using the highlightPhase subPhaseProcessor + subPhaseProcessor.process(hitContext); + + // Validate that 1 highlight field is present + assertEquals(hitContext.hit().getHighlightFields().size(), 1); + } + { + // test highlighting nested field DERIVED_FIELD_4 + "." + NESTED_FIELD + FetchSubPhaseProcessor subPhaseProcessor = highlightPhase.getProcessor( + fetchContext, + searchHighlightContext, + new TermQuery(new Term(DERIVED_FIELD_4 + "." + NESTED_FIELD, "foo")) + ); + + // Create a search hit using the derived fields fetched above in fetch phase + SearchHit searchHit = new SearchHit(docId, "0", null, nestedFields, null); + + // Create a HitContext of search hit + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( + searchHit, + context, + docId, + mockShardContext.lookup().source() + ); + hitContext.sourceLookup().loadSourceIfNeeded(); + // process the HitContext using the highlightPhase subPhaseProcessor + subPhaseProcessor.process(hitContext); + + // Validate that 1 highlight field is present + assertEquals(hitContext.hit().getHighlightFields().size(), 1); + } } } } @@ -333,7 +450,7 @@ private static QueryShardContext createQueryShardContext(MapperService mapperSer null, null, null, - null, + () -> true, null ); } @@ -345,7 +462,11 @@ private static ScriptService getScriptService() { DERIVED_FIELD_SCRIPT_1, (script) -> ((String) ((Map<String, Object>) script.get("_source")).get("field1")).replace(" ", "_"), DERIVED_FIELD_SCRIPT_2, - (script) -> ((String) ((Map<String, Object>) script.get("_source")).get("field2")).replace(" ", "_") + (script) -> ((String) ((Map<String, Object>) script.get("_source")).get("field2")).replace(" ", "_"), + DERIVED_FIELD_SCRIPT_3, + (script) -> ((Map<String, Object>) script.get("_source")).get("field3"), + DERIVED_FIELD_SCRIPT_4, + (script) -> ((Map<String, Object>) script.get("_source")).get("field4") ), Collections.emptyMap() ); @@ -359,8 +480,9 @@ private DerivedFieldType createDerivedFieldType(String name, String type, String when(context.path()).thenReturn(new ContentPath()); return new DerivedFieldType( new DerivedField(name, type, new Script(ScriptType.INLINE, "mockscript", script, emptyMap())), - DerivedFieldSupportedTypes.getFieldMapperFromType(type, name, context), - DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(type, name) + DerivedFieldSupportedTypes.getFieldMapperFromType(type, name, context, null), + DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(type, name), + null ); } }