From 4560a0c5cd97965f77dcda10d73739e1d25a928a Mon Sep 17 00:00:00 2001 From: mushaoqiong Date: Wed, 12 Jan 2022 21:34:40 +0800 Subject: [PATCH] Add XContentFieldFilter (#81970) This commit introduces XContentFieldFilter, which applies field includes/excludes to XContent without having to realise the xcontent itself as a java map. SourceFieldMapper and ShardGetService are cut over to use this class --- .../common/xcontent/XContentFieldFilter.java | 76 +++++++++++++++++++ .../common/xcontent/XContentHelper.java | 26 +++++++ .../index/get/ShardGetService.java | 14 +--- .../index/mapper/SourceFieldMapper.java | 29 ++----- 4 files changed, 111 insertions(+), 34 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/common/xcontent/XContentFieldFilter.java diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentFieldFilter.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentFieldFilter.java new file mode 100644 index 0000000000000..af29edd0234b2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentFieldFilter.java @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.xcontent; + +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; + +/** + * A filter that filter fields away from source + */ +public interface XContentFieldFilter { + /** + * filter source in {@link BytesReference} format and in {@link XContentType} content type + * note that xContentType may be null in some case, we should guess xContentType from sourceBytes in such cases + */ + BytesReference apply(BytesReference sourceBytes, @Nullable XContentType xContentType) throws IOException; + + /** + * Construct {@link XContentFieldFilter} using given includes and excludes + * + * @param includes fields to keep, wildcard supported + * @param excludes fields to remove, wildcard supported + * @return filter using {@link XContentMapValues#filter(String[], String[])} if wildcard found in excludes + * , otherwise return filter using {@link XContentParser} + */ + static XContentFieldFilter newFieldFilter(String[] includes, String[] excludes) { + if ((CollectionUtils.isEmpty(excludes) == false) && Arrays.stream(excludes).filter(field -> field.contains("*")).count() > 0) { + return (originalSource, contentType) -> { + Function, Map> mapFilter = XContentMapValues.filter(includes, excludes); + Tuple> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType); + Map filteredSource = mapFilter.apply(mapTuple.v2()); + BytesStreamOutput bStream = new BytesStreamOutput(); + XContentType actualContentType = mapTuple.v1(); + XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource); + builder.close(); + return bStream.bytes(); + }; + } else { + final XContentParserConfiguration parserConfig = XContentParserConfiguration.EMPTY.withFiltering( + Set.of(includes), + Set.of(excludes) + ); + return (originalSource, contentType) -> { + if (contentType == null) { + contentType = XContentHelper.xContentTypeMayCompressed(originalSource); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, originalSource.length())); + XContentBuilder builder = new XContentBuilder(contentType.xContent(), streamOutput); + XContentParser parser = contentType.xContent().createParser(parserConfig, originalSource.streamInput()); + builder.copyCurrentStructure(parser); + return BytesReference.bytes(builder); + }; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java index 1eaac03641c62..b46464f034ba0 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java @@ -518,6 +518,32 @@ public static BytesReference toXContent(ToXContent toXContent, XContentType xCon } } + /** + * Guesses the content type based on the provided bytes which may be compressed. + * + * @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type. + * The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed. + * This method is deprecated to prevent usages of it from spreading further without specific reasons. + */ + @Deprecated + public static XContentType xContentTypeMayCompressed(BytesReference bytes) { + Compressor compressor = CompressorFactory.compressor(bytes); + if (compressor != null) { + try { + InputStream compressedStreamInput = compressor.threadLocalInputStream(bytes.streamInput()); + if (compressedStreamInput.markSupported() == false) { + compressedStreamInput = new BufferedInputStream(compressedStreamInput); + } + return XContentFactory.xContentType(compressedStreamInput); + } catch (IOException e) { + assert false : "Should not happen, we're just reading bytes from memory"; + throw new UncheckedIOException(e); + } + } else { + return XContentHelper.xContentType(bytes); + } + } + /** * Guesses the content type based on the provided bytes. * diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index 8153e3e406dfb..992afdeb99881 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -16,10 +16,8 @@ import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; import org.elasticsearch.common.util.set.Sets; -import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.common.xcontent.XContentFieldFilter; import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.Tuple; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.engine.Engine; @@ -33,8 +31,6 @@ import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.HashMap; @@ -253,15 +249,11 @@ private GetResult innerGetLoadFromStoredFields( if (fetchSourceContext.fetchSource() == false) { source = null; } else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) { - Map sourceAsMap; // TODO: The source might be parsed and available in the sourceLookup but that one uses unordered maps so different. // Do we care? - Tuple> typeMapTuple = XContentHelper.convertToMap(source, true); - XContentType sourceContentType = typeMapTuple.v1(); - sourceAsMap = typeMapTuple.v2(); - sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes()); try { - source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap)); + source = XContentFieldFilter.newFieldFilter(fetchSourceContext.includes(), fetchSourceContext.excludes()) + .apply(source, null); } catch (IOException e) { throw new ElasticsearchException("Failed to get id [" + id + "] with includes/excludes set", e); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index 75bcd1eab5432..d30c24925a6c3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -16,32 +16,24 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.util.CollectionUtils; -import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.common.xcontent.XContentFieldFilter; import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Map; -import java.util.function.Function; public class SourceFieldMapper extends MetadataFieldMapper { - public static final String NAME = "_source"; public static final String RECOVERY_SOURCE_NAME = "_recovery_source"; public static final String CONTENT_TYPE = "_source"; - private final Function, Map> filter; + private final XContentFieldFilter filter; private static final SourceFieldMapper DEFAULT = new SourceFieldMapper(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); @@ -145,7 +137,9 @@ private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes) this.includes = includes; this.excludes = excludes; final boolean filtered = CollectionUtils.isEmpty(includes) == false || CollectionUtils.isEmpty(excludes) == false; - this.filter = enabled && filtered ? XContentMapValues.filter(includes, excludes) : null; + this.filter = enabled && filtered + ? XContentFieldFilter.newFieldFilter(includes, excludes) + : (sourceBytes, contentType) -> sourceBytes; this.complete = enabled && CollectionUtils.isEmpty(includes) && CollectionUtils.isEmpty(excludes); } @@ -180,18 +174,7 @@ public void preParse(DocumentParserContext context) throws IOException { public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable XContentType contentType) throws IOException { if (enabled && originalSource != null) { // Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data - if (filter != null) { - // we don't update the context source if we filter, we want to keep it as is... - Tuple> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType); - Map filteredSource = filter.apply(mapTuple.v2()); - BytesStreamOutput bStream = new BytesStreamOutput(); - XContentType actualContentType = mapTuple.v1(); - XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource); - builder.close(); - return bStream.bytes(); - } else { - return originalSource; - } + return filter.apply(originalSource, contentType); } else { return null; }