Skip to content

Commit

Permalink
Add option to skip using _ignored_source field for synthetic source (#…
Browse files Browse the repository at this point in the history
…112963) (#113065)

(cherry picked from commit 36b3549)

# Conflicts:
#	server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java
#	server/src/main/java/org/elasticsearch/index/IndexSettings.java
#	server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java
#	server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java
#	x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
  • Loading branch information
lkts authored Sep 17, 2024
1 parent b80b018 commit 05173f2
Show file tree
Hide file tree
Showing 15 changed files with 501 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.index.engine.EngineConfig;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.store.FsDirectoryFactory;
Expand Down Expand Up @@ -181,6 +182,8 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING,
IndexSettings.PREFER_ILM_SETTING,
DataStreamFailureStoreDefinition.FAILURE_STORE_DEFINITION_VERSION_SETTING,
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING,
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING,

// validate that built-in similarities don't get redefined
Setting.groupSetting("index.similarity.", (s) -> {
Expand Down
26 changes: 26 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.ingest.IngestService;
import org.elasticsearch.node.Node;
Expand Down Expand Up @@ -776,6 +777,8 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private volatile long mappingDepthLimit;
private volatile long mappingFieldNameLengthLimit;
private volatile long mappingDimensionFieldsLimit;
private volatile boolean skipIgnoredSourceWrite;
private volatile boolean skipIgnoredSourceRead;

/**
* The maximum number of refresh listeners allows on this shard.
Expand Down Expand Up @@ -923,6 +926,8 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
mappingDimensionFieldsLimit = scopedSettings.get(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING);
indexRouting = IndexRouting.fromIndexMetadata(indexMetadata);
es87TSDBCodecEnabled = scopedSettings.get(TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING);
skipIgnoredSourceWrite = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING);
skipIgnoredSourceRead = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING);

scopedSettings.addSettingsUpdateConsumer(
MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING,
Expand Down Expand Up @@ -1005,6 +1010,11 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_DEPTH_LIMIT_SETTING, this::setMappingDepthLimit);
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING, this::setMappingFieldNameLengthLimit);
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING, this::setMappingDimensionFieldsLimit);
scopedSettings.addSettingsUpdateConsumer(
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING,
this::setSkipIgnoredSourceWrite
);
scopedSettings.addSettingsUpdateConsumer(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING, this::setSkipIgnoredSourceRead);
}

private void setSearchIdleAfter(TimeValue searchIdleAfter) {
Expand Down Expand Up @@ -1581,6 +1591,22 @@ private void setMappingDimensionFieldsLimit(long value) {
this.mappingDimensionFieldsLimit = value;
}

public boolean getSkipIgnoredSourceWrite() {
return skipIgnoredSourceWrite;
}

private void setSkipIgnoredSourceWrite(boolean value) {
this.skipIgnoredSourceWrite = value;
}

public boolean getSkipIgnoredSourceRead() {
return skipIgnoredSourceRead;
}

private void setSkipIgnoredSourceRead(boolean value) {
this.skipIgnoredSourceRead = value;
}

/**
* The bounds for {@code @timestamp} on this index or
* {@code null} if there are no bounds.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ final boolean getClonedSource() {
}

public final boolean canAddIgnoredField() {
return mappingLookup.isSourceSynthetic() && clonedSource == false;
return mappingLookup.isSourceSynthetic() && clonedSource == false && indexSettings().getSkipIgnoredSourceWrite() == false;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
Expand All @@ -26,6 +29,8 @@
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;

/**
Expand All @@ -39,6 +44,7 @@
* if we can replace it for all use cases to avoid duplication, assuming that the storage tradeoff is favorable.
*/
public class IgnoredSourceFieldMapper extends MetadataFieldMapper {
private final IndexSettings indexSettings;

// This factor is used to combine two offsets within the same integer:
// - the offset of the end of the parent field within the field name (N / PARENT_OFFSET_IN_NAME_OFFSET)
Expand All @@ -48,12 +54,32 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper {

public static final String NAME = "_ignored_source";

public static final IgnoredSourceFieldMapper INSTANCE = new IgnoredSourceFieldMapper();

public static final TypeParser PARSER = new FixedTypeParser(context -> INSTANCE);
public static final TypeParser PARSER = new FixedTypeParser(context -> new IgnoredSourceFieldMapper(context.getIndexSettings()));

static final NodeFeature TRACK_IGNORED_SOURCE = new NodeFeature("mapper.track_ignored_source");

/*
Setting to disable encoding and writing values for this field.
This is needed to unblock index functionality in case there is a bug on this code path.
*/
public static final Setting<Boolean> SKIP_IGNORED_SOURCE_WRITE_SETTING = Setting.boolSetting(
"index.mapping.synthetic_source.skip_ignored_source_write",
false,
Setting.Property.Dynamic,
Setting.Property.IndexScope
);

/*
Setting to disable reading and decoding values stored in this field.
This is needed to unblock search functionality in case there is a bug on this code path.
*/
public static final Setting<Boolean> SKIP_IGNORED_SOURCE_READ_SETTING = Setting.boolSetting(
"index.mapping.synthetic_source.skip_ignored_source_read",
false,
Setting.Property.Dynamic,
Setting.Property.IndexScope
);

/*
* Container for the ignored field data:
* - the full name
Expand Down Expand Up @@ -107,8 +133,9 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)
}
}

private IgnoredSourceFieldMapper() {
private IgnoredSourceFieldMapper(IndexSettings indexSettings) {
super(IgnoredValuesFieldMapperType.INSTANCE);
this.indexSettings = indexSettings;
}

@Override
Expand Down Expand Up @@ -150,6 +177,18 @@ static NameValue decode(Object field) {
return new NameValue(name, parentOffset, value, null);
}

// In rare cases decoding values stored in this field can fail leading to entire source
// not being available.
// We would like to have an option to lose some values in synthetic source
// but have search not fail.
public static Set<String> ensureLoaded(Set<String> fieldsToLoadForSyntheticSource, IndexSettings indexSettings) {
if (indexSettings.getSkipIgnoredSourceRead() == false) {
fieldsToLoadForSyntheticSource.add(NAME);
}

return fieldsToLoadForSyntheticSource;
}

public record MappedNameValue(NameValue nameValue, XContentType type, Map<String, Object> map) {}

/**
Expand Down Expand Up @@ -200,11 +239,49 @@ public static byte[] encodeFromMap(MappedNameValue mappedNameValue, Map<String,
return IgnoredSourceFieldMapper.encode(filteredNameValue);
}

// This mapper doesn't contribute to source directly as it has no access to the object structure. Instead, its contents
// are loaded by SourceLoader and passed to object mappers that, in turn, write their ignore fields at the appropriate level.
// This loader controls if this field is loaded in scope of synthetic source constructions.
// In rare cases decoding values stored in this field can fail leading to entire source
// not being available.
// We would like to have an option to lose some values in synthetic source
// but have search not fail.
@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
return SourceLoader.SyntheticFieldLoader.NOTHING;
return new SourceLoader.SyntheticFieldLoader() {
@Override
public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
if (indexSettings.getSkipIgnoredSourceRead()) {
return Stream.empty();
}

// Values are handled in `SourceLoader`.
return Stream.of(Map.entry(NAME, (v) -> {}));
}

@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
return null;
}

@Override
public boolean hasValue() {
return false;
}

@Override
public void write(XContentBuilder b) throws IOException {

}

@Override
public String fieldName() {
// Does not really matter.
return NAME;
}

@Override
public void reset() {

}
};
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
Expand Down Expand Up @@ -46,11 +47,18 @@ public static class Builder extends ObjectMapper.Builder {
private Explicit<Boolean> includeInParent = Explicit.IMPLICIT_FALSE;
private final IndexVersion indexCreatedVersion;
private final Function<Query, BitSetProducer> bitSetProducer;
private final IndexSettings indexSettings;

public Builder(String name, IndexVersion indexCreatedVersion, Function<Query, BitSetProducer> bitSetProducer) {
public Builder(
String name,
IndexVersion indexCreatedVersion,
Function<Query, BitSetProducer> bitSetProducer,
IndexSettings indexSettings
) {
super(name, Explicit.IMPLICIT_TRUE);
this.indexCreatedVersion = indexCreatedVersion;
this.bitSetProducer = bitSetProducer;
this.indexSettings = indexSettings;
}

Builder includeInRoot(boolean includeInRoot) {
Expand Down Expand Up @@ -111,7 +119,8 @@ public NestedObjectMapper build(MapperBuilderContext context) {
parentTypeFilter,
nestedTypePath,
nestedTypeFilter,
bitSetProducer
bitSetProducer,
indexSettings
);
}
}
Expand All @@ -126,7 +135,8 @@ public Mapper.Builder parse(String name, Map<String, Object> node, MappingParser
NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(
name,
parserContext.indexVersionCreated(),
parserContext::bitSetProducer
parserContext::bitSetProducer,
parserContext.getIndexSettings()
);
parseNested(name, node, builder);
parseObjectFields(node, parserContext, builder);
Expand Down Expand Up @@ -193,6 +203,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) {
private final Query nestedTypeFilter;
// Function to create a bitset for identifying parent documents
private final Function<Query, BitSetProducer> bitsetProducer;
private final IndexSettings indexSettings;

NestedObjectMapper(
String name,
Expand All @@ -206,7 +217,8 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) {
Query parentTypeFilter,
String nestedTypePath,
Query nestedTypeFilter,
Function<Query, BitSetProducer> bitsetProducer
Function<Query, BitSetProducer> bitsetProducer,
IndexSettings indexSettings
) {
super(name, fullPath, enabled, Explicit.IMPLICIT_TRUE, storeArraySource, dynamic, mappers);
this.parentTypeFilter = parentTypeFilter;
Expand All @@ -215,6 +227,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) {
this.includeInParent = includeInParent;
this.includeInRoot = includeInRoot;
this.bitsetProducer = bitsetProducer;
this.indexSettings = indexSettings;
}

public Query parentTypeFilter() {
Expand Down Expand Up @@ -252,7 +265,7 @@ public Map<String, Mapper> getChildren() {

@Override
public ObjectMapper.Builder newBuilder(IndexVersion indexVersionCreated) {
NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(leafName(), indexVersionCreated, bitsetProducer);
NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(leafName(), indexVersionCreated, bitsetProducer, indexSettings);
builder.enabled = enabled;
builder.dynamic = dynamic;
builder.includeInRoot = includeInRoot;
Expand All @@ -274,7 +287,8 @@ NestedObjectMapper withoutMappers() {
parentTypeFilter,
nestedTypePath,
nestedTypeFilter,
bitsetProducer
bitsetProducer,
indexSettings
);
}

Expand Down Expand Up @@ -349,7 +363,8 @@ public ObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeContex
parentTypeFilter,
nestedTypePath,
nestedTypeFilter,
bitsetProducer
bitsetProducer,
indexSettings
);
}

Expand Down Expand Up @@ -382,7 +397,9 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
}

SourceLoader sourceLoader = new SourceLoader.Synthetic(() -> super.syntheticFieldLoader(mappers.values().stream(), true), NOOP);
var storedFieldLoader = org.elasticsearch.index.fieldvisitor.StoredFieldLoader.create(false, sourceLoader.requiredStoredFields());
// Some synthetic source use cases require using _ignored_source field
var requiredStoredFields = IgnoredSourceFieldMapper.ensureLoaded(sourceLoader.requiredStoredFields(), indexSettings);
var storedFieldLoader = org.elasticsearch.index.fieldvisitor.StoredFieldLoader.create(false, requiredStoredFields);
return new NestedSyntheticFieldLoader(
storedFieldLoader,
sourceLoader,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ public Synthetic(Supplier<SyntheticFieldLoader> fieldLoaderSupplier, SourceField
.storedFieldLoaders()
.map(Map.Entry::getKey)
.collect(Collectors.toSet());
this.requiredStoredFields.add(IgnoredSourceFieldMapper.NAME);
this.metrics = metrics;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,8 @@ private static ObjectMapper createObjectMapper(String name) {
}

private static NestedObjectMapper createNestedObjectMapper(String name) {
return new NestedObjectMapper.Builder(name, IndexVersion.current(), query -> { throw new UnsupportedOperationException(); }).build(
MapperBuilderContext.root(false, false)
);
return new NestedObjectMapper.Builder(name, IndexVersion.current(), query -> { throw new UnsupportedOperationException(); }, null)
.build(MapperBuilderContext.root(false, false));
}

private static MappingLookup createMappingLookup(
Expand Down
Loading

0 comments on commit 05173f2

Please sign in to comment.