From 21bae1f8d450c5369eba0bd4120cd6b9cc83ad97 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Wed, 13 Oct 2021 15:03:09 -0400 Subject: [PATCH 1/3] [ML] optimize source extraction for categorize_text aggregation --- .../org/elasticsearch/xcontent/XContent.java | 4 ++ .../xcontent/cbor/CborXContent.java | 22 ++++++ .../xcontent/json/JsonXContent.java | 20 ++++++ .../xcontent/smile/SmileXContent.java | 20 ++++++ .../xcontent/yaml/YamlXContent.java | 20 ++++++ .../common/xcontent/XContentHelper.java | 67 +++++++++++++++++-- .../search/lookup/SourceLookup.java | 41 ++++++++++++ .../CategorizeTextAggregator.java | 2 +- 8 files changed, 188 insertions(+), 8 deletions(-) diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java index d40bedf38b39f..227518b44c209 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java @@ -82,6 +82,10 @@ XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationH XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, byte[] data, int offset, int length) throws IOException; + XContentParser createParser(NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, byte[] data, int offset, int length, FilterPath[] includes, + FilterPath[] excludes) throws IOException; + /** * Creates a parser over the provided reader. */ diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java index 9dfb6f47f7e86..753b237e5d516 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java @@ -112,6 +112,14 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser(NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, byte[] data, int offset, int length, FilterPath[] includes, + FilterPath[] excludes) throws IOException { + return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current(), includes, + excludes); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { @@ -138,4 +146,18 @@ public XContentParser createParserForCompatibility(NamedXContentRegistry xConten ); } + public XContentParser createParserForCompatibility(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, + byte[] data, int offset, int length, RestApiVersion restApiVersion, + FilterPath[] includes, FilterPath[] excludes) + throws IOException { + return new CborXContentParser( + xContentRegistry, + deprecationHandler, + cborFactory.createParser(new ByteArrayInputStream(data, offset, length)), + restApiVersion, + includes, + excludes + ); + } + } diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java index cf551f5761315..10df2c1c10d8d 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java @@ -113,6 +113,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new JsonXContentParser( + xContentRegistry, + deprecationHandler, + jsonFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java index e02f8ec307af8..696865a242830 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java @@ -114,6 +114,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new SmileXContentParser( + xContentRegistry, + deprecationHandler, + smileFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java index b3a684d20583d..68f1ac2bbf27b 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java @@ -106,6 +106,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new YamlXContentParser( + xContentRegistry, + deprecationHandler, + yamlFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java index 5c891843c0438..bb52fefd32230 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; import org.elasticsearch.common.compress.Compressor; import org.elasticsearch.common.compress.CompressorFactory; @@ -27,6 +28,7 @@ import org.elasticsearch.xcontent.XContentParseException; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.support.filtering.FilterPath; import java.io.BufferedInputStream; import java.io.IOException; @@ -101,6 +103,10 @@ public static Tuple> convertToMap(BytesReferen return convertToMap(bytes, ordered, null); } + public static Tuple> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType) { + return convertToMap(bytes, ordered, xContentType, null, null); + } + /** * Converts the given bytes into a map that is optionally ordered. The provided {@link XContentType} must be non-null. *

@@ -110,8 +116,13 @@ public static Tuple> convertToMap(BytesReferen * frequently when folks write nanosecond precision dates as a decimal * number. */ - public static Tuple> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType) - throws ElasticsearchParseException { + public static Tuple> convertToMap( + BytesReference bytes, + boolean ordered, + XContentType xContentType, + @Nullable FilterPath[] include, + @Nullable FilterPath[] exclude + ) throws ElasticsearchParseException { try { final XContentType contentType; InputStream input; @@ -129,14 +140,16 @@ public static Tuple> convertToMap(BytesReferen final int length = bytes.length(); contentType = xContentType != null ? xContentType : XContentFactory.xContentType(raw, offset, length); return new Tuple<>(Objects.requireNonNull(contentType), - convertToMap(XContentFactory.xContent(contentType), raw, offset, length, ordered)); + convertToMap(XContentFactory.xContent(contentType), raw, offset, length, ordered, include, exclude)); } else { input = bytes.streamInput(); contentType = xContentType != null ? xContentType : XContentFactory.xContentType(input); } try (InputStream stream = input) { - return new Tuple<>(Objects.requireNonNull(contentType), - convertToMap(XContentFactory.xContent(contentType), stream, ordered)); + return new Tuple<>( + Objects.requireNonNull(contentType), + convertToMap(XContentFactory.xContent(contentType), stream, ordered, include, exclude) + ); } } catch (IOException e) { throw new ElasticsearchParseException("Failed to parse content to map", e); @@ -163,9 +176,24 @@ public static Map convertToMap(XContent xContent, String string, */ public static Map convertToMap(XContent xContent, InputStream input, boolean ordered) throws ElasticsearchParseException { + return convertToMap(xContent, input, ordered, null, null); + } + + public static Map convertToMap( + XContent xContent, + InputStream input, + boolean ordered, + @Nullable FilterPath[] include, + @Nullable FilterPath[] exclude + ) throws ElasticsearchParseException { // It is safe to use EMPTY here because this never uses namedObject - try (XContentParser parser = xContent.createParser(NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, input)) { + try (XContentParser parser = xContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + input, + include, + exclude + )) { return ordered ? parser.mapOrdered() : parser.map(); } catch (IOException e) { throw new ElasticsearchParseException("Failed to parse content to map", e); @@ -187,6 +215,31 @@ public static Map convertToMap(XContent xContent, byte[] bytes, } } + public static Map convertToMap( + XContent xContent, + byte[] bytes, + int offset, + int length, + boolean ordered, + @Nullable FilterPath[] include, + @Nullable FilterPath[] exclude + ) throws ElasticsearchParseException { + // It is safe to use EMPTY here because this never uses namedObject + try (XContentParser parser = xContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + bytes, + offset, + length, + include, + exclude) + ) { + return ordered ? parser.mapOrdered() : parser.map(); + } catch (IOException e) { + throw new ElasticsearchParseException("Failed to parse content to map", e); + } + } + @Deprecated public static String convertToJson(BytesReference bytes, boolean reformatJson) throws IOException { return convertToJson(bytes, reformatJson, false); diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index 505ea16927e97..f3c3bd0ad600c 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.fieldvisitor.FieldsVisitor; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import org.elasticsearch.xcontent.support.filtering.FilterPath; import java.io.IOException; import java.util.Collection; @@ -103,6 +104,14 @@ public static Map sourceAsMap(BytesReference source) throws Elas return sourceAsMapAndType(source).v2(); } + public static Map sourceAsMap( + BytesReference source, + FilterPath[] include, + FilterPath[] exclude + ) throws ElasticsearchParseException { + return XContentHelper.convertToMap(source, false, null, include, exclude).v2(); + } + public void setSegmentAndDocument( LeafReaderContext context, int docId @@ -157,6 +166,38 @@ public List extractRawValues(String path) { return XContentMapValues.extractRawValues(path, source()); } + /** + * Returns the values associated with the path. Those are "low" level values, and it can + * handle path expression where an array/list is navigated within. + * + * The major difference with {@link SourceLookup#extractRawValues(String)} is that this version will: + * + * - not cache source if it's not already parsed + * - will only extract the desired values from the compressed source instead of deserializing the whole object + * + * This is useful when the caller only wants a single value from source and does not care of source is fully parsed and cached + * for later use. + * @param path The path from which to extract the values from source + * @return The list of found values or an empty list if none are found + */ + public List extractRawValuesOptimized(String path) { + if (source != null) { + return XContentMapValues.extractRawValues(path, source); + } + FilterPath[] filterPaths = FilterPath.compile(Set.of(path)); + if (sourceAsBytes != null) { + return XContentMapValues.extractRawValues(path, sourceAsMap(sourceAsBytes, filterPaths, null)); + } + try { + FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true); + fieldReader.accept(docId, sourceFieldVisitor); + BytesReference source = sourceFieldVisitor.source(); + return XContentMapValues.extractRawValues(path, sourceAsMap(source, filterPaths, null)); + } catch (Exception e) { + throw new ElasticsearchParseException("failed to parse / load source", e); + } + } + /** * For the provided path, return its value in the source. * diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java index 16058fbdae4f2..27eb9e0330b62 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java @@ -181,7 +181,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { private void collectFromSource(int doc, long owningBucketOrd, CategorizationTokenTree categorizer) throws IOException { sourceLookup.setSegmentAndDocument(ctx, doc); - Iterator itr = sourceLookup.extractRawValues(sourceFieldName).stream().map(obj -> { + Iterator itr = sourceLookup.extractRawValuesOptimized(sourceFieldName).stream().map(obj -> { if (obj == null) { return null; } From 197f5d40101c0f9d64a6138f92ab9a54f000ad19 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Thu, 14 Oct 2021 12:55:17 -0400 Subject: [PATCH 2/3] Fixing optimization paths --- .../xcontent/cbor/CborXContent.java | 36 +++++++++---------- .../common/xcontent/XContentHelper.java | 29 ++++++++++----- .../search/lookup/SourceLookup.java | 18 +++++----- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java index 753b237e5d516..d43e3b10b225c 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java @@ -113,11 +113,23 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, } @Override - public XContentParser createParser(NamedXContentRegistry xContentRegistry, - DeprecationHandler deprecationHandler, byte[] data, int offset, int length, FilterPath[] includes, - FilterPath[] excludes) throws IOException { - return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current(), includes, - excludes); + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new CborXContentParser( + xContentRegistry, + deprecationHandler, + cborFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); } @Override @@ -146,18 +158,4 @@ public XContentParser createParserForCompatibility(NamedXContentRegistry xConten ); } - public XContentParser createParserForCompatibility(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, - byte[] data, int offset, int length, RestApiVersion restApiVersion, - FilterPath[] includes, FilterPath[] excludes) - throws IOException { - return new CborXContentParser( - xContentRegistry, - deprecationHandler, - cborFactory.createParser(new ByteArrayInputStream(data, offset, length)), - restApiVersion, - includes, - excludes - ); - } - } diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java index bb52fefd32230..6a5e253ffffe3 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java @@ -103,6 +103,10 @@ public static Tuple> convertToMap(BytesReferen return convertToMap(bytes, ordered, null); } + /** + * Exactly the same as {@link XContentHelper#convertToMap(BytesReference, boolean, XContentType, FilterPath[], FilterPath[])} but + * none of the fields are filtered + */ public static Tuple> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType) { return convertToMap(bytes, ordered, xContentType, null, null); } @@ -171,14 +175,20 @@ public static Map convertToMap(XContent xContent, String string, } /** - * Convert a string in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any - * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input. + * The same as {@link XContentHelper#convertToMap(XContent, byte[], int, int, boolean, FilterPath[], FilterPath[])} but none of the + * fields are filtered. */ public static Map convertToMap(XContent xContent, InputStream input, boolean ordered) throws ElasticsearchParseException { return convertToMap(xContent, input, ordered, null, null); } + /** + * Convert a string in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any + * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input. + * + * Additionally, fields may be included or excluded from the parsing. + */ public static Map convertToMap( XContent xContent, InputStream input, @@ -206,15 +216,16 @@ public static Map convertToMap( */ public static Map convertToMap(XContent xContent, byte[] bytes, int offset, int length, boolean ordered) throws ElasticsearchParseException { - // It is safe to use EMPTY here because this never uses namedObject - try (XContentParser parser = xContent.createParser(NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, bytes, offset, length)) { - return ordered ? parser.mapOrdered() : parser.map(); - } catch (IOException e) { - throw new ElasticsearchParseException("Failed to parse content to map", e); - } + return convertToMap(xContent, bytes, offset, length, ordered, null, null); } + /** + * Convert a byte array in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any + * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input. + * + * Unlike {@link XContentHelper#convertToMap(XContent, byte[], int, int, boolean)} this optionally accepts fields to include or exclude + * during XContent parsing. + */ public static Map convertToMap( XContent xContent, byte[] bytes, diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index f3c3bd0ad600c..e7b3772874b64 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -104,14 +104,6 @@ public static Map sourceAsMap(BytesReference source) throws Elas return sourceAsMapAndType(source).v2(); } - public static Map sourceAsMap( - BytesReference source, - FilterPath[] include, - FilterPath[] exclude - ) throws ElasticsearchParseException { - return XContentHelper.convertToMap(source, false, null, include, exclude).v2(); - } - public void setSegmentAndDocument( LeafReaderContext context, int docId @@ -186,13 +178,19 @@ public List extractRawValuesOptimized(String path) { } FilterPath[] filterPaths = FilterPath.compile(Set.of(path)); if (sourceAsBytes != null) { - return XContentMapValues.extractRawValues(path, sourceAsMap(sourceAsBytes, filterPaths, null)); + return XContentMapValues.extractRawValues( + path, + XContentHelper.convertToMap(sourceAsBytes, false, null, filterPaths, null).v2() + ); } try { FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true); fieldReader.accept(docId, sourceFieldVisitor); BytesReference source = sourceFieldVisitor.source(); - return XContentMapValues.extractRawValues(path, sourceAsMap(source, filterPaths, null)); + return XContentMapValues.extractRawValues( + path, + XContentHelper.convertToMap(source, false, null, filterPaths, null).v2() + ); } catch (Exception e) { throw new ElasticsearchParseException("failed to parse / load source", e); } From 368d801188e379d2f523551bf3b1eac951cff3c9 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Thu, 21 Oct 2021 12:31:05 -0400 Subject: [PATCH 3/3] renaming function --- .../main/java/org/elasticsearch/search/lookup/SourceLookup.java | 2 +- .../xpack/ml/aggs/categorization/CategorizeTextAggregator.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index e7b3772874b64..d498c9cffe468 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -172,7 +172,7 @@ public List extractRawValues(String path) { * @param path The path from which to extract the values from source * @return The list of found values or an empty list if none are found */ - public List extractRawValuesOptimized(String path) { + public List extractRawValuesWithoutCaching(String path) { if (source != null) { return XContentMapValues.extractRawValues(path, source); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java index 6d4be2c4167f5..386c41255747d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java @@ -183,7 +183,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { private void collectFromSource(int doc, long owningBucketOrd, CategorizationTokenTree categorizer) throws IOException { sourceLookup.setSegmentAndDocument(ctx, doc); - Iterator itr = sourceLookup.extractRawValuesOptimized(sourceFieldName).stream().map(obj -> { + Iterator itr = sourceLookup.extractRawValuesWithoutCaching(sourceFieldName).stream().map(obj -> { if (obj == null) { return null; }