From 21bae1f8d450c5369eba0bd4120cd6b9cc83ad97 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com>
Date: Wed, 13 Oct 2021 15:03:09 -0400
Subject: [PATCH 1/3] [ML] optimize source extraction for categorize_text
 aggregation

---
 .../org/elasticsearch/xcontent/XContent.java  |  4 ++
 .../xcontent/cbor/CborXContent.java           | 22 ++++++
 .../xcontent/json/JsonXContent.java           | 20 ++++++
 .../xcontent/smile/SmileXContent.java         | 20 ++++++
 .../xcontent/yaml/YamlXContent.java           | 20 ++++++
 .../common/xcontent/XContentHelper.java       | 67 +++++++++++++++++--
 .../search/lookup/SourceLookup.java           | 41 ++++++++++++
 .../CategorizeTextAggregator.java             |  2 +-
 8 files changed, 188 insertions(+), 8 deletions(-)

diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java
index d40bedf38b39f..227518b44c209 100644
--- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java
+++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java
@@ -82,6 +82,10 @@ XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationH
     XContentParser createParser(NamedXContentRegistry xContentRegistry,
                                 DeprecationHandler deprecationHandler, byte[] data, int offset, int length) throws IOException;
 
+    XContentParser createParser(NamedXContentRegistry xContentRegistry,
+                                DeprecationHandler deprecationHandler, byte[] data, int offset, int length, FilterPath[] includes,
+                                FilterPath[] excludes) throws IOException;
+
     /**
      * Creates a parser over the provided reader.
      */
diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java
index 9dfb6f47f7e86..753b237e5d516 100644
--- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java
+++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java
@@ -112,6 +112,14 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
         return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current());
     }
 
+    @Override
+    public XContentParser createParser(NamedXContentRegistry xContentRegistry,
+                                       DeprecationHandler deprecationHandler, byte[] data, int offset, int length, FilterPath[] includes,
+                                       FilterPath[] excludes) throws IOException {
+        return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current(), includes,
+            excludes);
+    }
+
     @Override
     public XContentParser createParser(NamedXContentRegistry xContentRegistry,
                                        DeprecationHandler deprecationHandler, Reader reader) throws IOException {
@@ -138,4 +146,18 @@ public XContentParser createParserForCompatibility(NamedXContentRegistry xConten
         );
     }
 
+    public XContentParser createParserForCompatibility(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler,
+                                                       byte[] data, int offset, int length, RestApiVersion restApiVersion,
+                                                       FilterPath[] includes, FilterPath[] excludes)
+        throws IOException {
+        return new CborXContentParser(
+            xContentRegistry,
+            deprecationHandler,
+            cborFactory.createParser(new ByteArrayInputStream(data, offset, length)),
+            restApiVersion,
+            includes,
+            excludes
+        );
+    }
+
 }
diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java
index cf551f5761315..10df2c1c10d8d 100644
--- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java
+++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java
@@ -113,6 +113,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
         return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current());
     }
 
+    @Override
+    public XContentParser createParser(
+        NamedXContentRegistry xContentRegistry,
+        DeprecationHandler deprecationHandler,
+        byte[] data,
+        int offset,
+        int length,
+        FilterPath[] includes,
+        FilterPath[] excludes
+    ) throws IOException {
+        return new JsonXContentParser(
+            xContentRegistry,
+            deprecationHandler,
+            jsonFactory.createParser(new ByteArrayInputStream(data, offset, length)),
+            RestApiVersion.current(),
+            includes,
+            excludes
+        );
+    }
+
     @Override
     public XContentParser createParser(NamedXContentRegistry xContentRegistry,
             DeprecationHandler deprecationHandler, Reader reader) throws IOException {
diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java
index e02f8ec307af8..696865a242830 100644
--- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java
+++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java
@@ -114,6 +114,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
         return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current());
     }
 
+    @Override
+    public XContentParser createParser(
+        NamedXContentRegistry xContentRegistry,
+        DeprecationHandler deprecationHandler,
+        byte[] data,
+        int offset,
+        int length,
+        FilterPath[] includes,
+        FilterPath[] excludes
+    ) throws IOException {
+        return new SmileXContentParser(
+            xContentRegistry,
+            deprecationHandler,
+            smileFactory.createParser(new ByteArrayInputStream(data, offset, length)),
+            RestApiVersion.current(),
+            includes,
+            excludes
+        );
+    }
+
     @Override
     public XContentParser createParser(NamedXContentRegistry xContentRegistry,
             DeprecationHandler deprecationHandler, Reader reader) throws IOException {
diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java
index b3a684d20583d..68f1ac2bbf27b 100644
--- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java
+++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java
@@ -106,6 +106,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
         return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current());
     }
 
+    @Override
+    public XContentParser createParser(
+        NamedXContentRegistry xContentRegistry,
+        DeprecationHandler deprecationHandler,
+        byte[] data,
+        int offset,
+        int length,
+        FilterPath[] includes,
+        FilterPath[] excludes
+    ) throws IOException {
+        return new YamlXContentParser(
+            xContentRegistry,
+            deprecationHandler,
+            yamlFactory.createParser(new ByteArrayInputStream(data, offset, length)),
+            RestApiVersion.current(),
+            includes,
+            excludes
+        );
+    }
+
     @Override
     public XContentParser createParser(NamedXContentRegistry xContentRegistry,
             DeprecationHandler deprecationHandler, Reader reader) throws IOException {
diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
index 5c891843c0438..bb52fefd32230 100644
--- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
+++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
@@ -13,6 +13,7 @@
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.common.compress.Compressor;
 import org.elasticsearch.common.compress.CompressorFactory;
@@ -27,6 +28,7 @@
 import org.elasticsearch.xcontent.XContentParseException;
 import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.support.filtering.FilterPath;
 
 import java.io.BufferedInputStream;
 import java.io.IOException;
@@ -101,6 +103,10 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReferen
         return convertToMap(bytes, ordered, null);
     }
 
+    public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType) {
+        return convertToMap(bytes, ordered, xContentType, null, null);
+    }
+
     /**
      * Converts the given bytes into a map that is optionally ordered. The provided {@link XContentType} must be non-null.
      * <p>
@@ -110,8 +116,13 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReferen
      * frequently when folks write nanosecond precision dates as a decimal
      * number.
      */
-    public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType)
-        throws ElasticsearchParseException {
+    public static Tuple<XContentType, Map<String, Object>> convertToMap(
+            BytesReference bytes,
+            boolean ordered,
+            XContentType xContentType,
+            @Nullable FilterPath[] include,
+            @Nullable FilterPath[] exclude
+    ) throws ElasticsearchParseException {
         try {
             final XContentType contentType;
             InputStream input;
@@ -129,14 +140,16 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReferen
                 final int length = bytes.length();
                 contentType = xContentType != null ? xContentType : XContentFactory.xContentType(raw, offset, length);
                 return new Tuple<>(Objects.requireNonNull(contentType),
-                        convertToMap(XContentFactory.xContent(contentType), raw, offset, length, ordered));
+                        convertToMap(XContentFactory.xContent(contentType), raw, offset, length, ordered, include, exclude));
             } else {
                 input = bytes.streamInput();
                 contentType = xContentType != null ? xContentType : XContentFactory.xContentType(input);
             }
             try (InputStream stream = input) {
-                return new Tuple<>(Objects.requireNonNull(contentType),
-                    convertToMap(XContentFactory.xContent(contentType), stream, ordered));
+                return new Tuple<>(
+                        Objects.requireNonNull(contentType),
+                        convertToMap(XContentFactory.xContent(contentType), stream, ordered, include, exclude)
+                );
             }
         } catch (IOException e) {
             throw new ElasticsearchParseException("Failed to parse content to map", e);
@@ -163,9 +176,24 @@ public static Map<String, Object> convertToMap(XContent xContent, String string,
      */
     public static Map<String, Object> convertToMap(XContent xContent, InputStream input, boolean ordered)
             throws ElasticsearchParseException {
+        return convertToMap(xContent, input, ordered, null, null);
+    }
+
+    public static Map<String, Object> convertToMap(
+            XContent xContent,
+            InputStream input,
+            boolean ordered,
+            @Nullable FilterPath[] include,
+            @Nullable FilterPath[] exclude
+    ) throws ElasticsearchParseException {
         // It is safe to use EMPTY here because this never uses namedObject
-        try (XContentParser parser = xContent.createParser(NamedXContentRegistry.EMPTY,
-                DeprecationHandler.THROW_UNSUPPORTED_OPERATION, input)) {
+        try (XContentParser parser = xContent.createParser(
+                NamedXContentRegistry.EMPTY,
+                DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
+                input,
+                include,
+                exclude
+        )) {
             return ordered ? parser.mapOrdered() : parser.map();
         } catch (IOException e) {
             throw new ElasticsearchParseException("Failed to parse content to map", e);
@@ -187,6 +215,31 @@ public static Map<String, Object> convertToMap(XContent xContent, byte[] bytes,
         }
     }
 
+    public static Map<String, Object> convertToMap(
+            XContent xContent,
+            byte[] bytes,
+            int offset,
+            int length,
+            boolean ordered,
+            @Nullable FilterPath[] include,
+            @Nullable FilterPath[] exclude
+    ) throws ElasticsearchParseException {
+        // It is safe to use EMPTY here because this never uses namedObject
+        try (XContentParser parser = xContent.createParser(
+                NamedXContentRegistry.EMPTY,
+                DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
+                bytes,
+                offset,
+                length,
+                include,
+                exclude)
+        ) {
+            return ordered ? parser.mapOrdered() : parser.map();
+        } catch (IOException e) {
+            throw new ElasticsearchParseException("Failed to parse content to map", e);
+        }
+    }
+
     @Deprecated
     public static String convertToJson(BytesReference bytes, boolean reformatJson) throws IOException {
         return convertToJson(bytes, reformatJson, false);
diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
index 505ea16927e97..f3c3bd0ad600c 100644
--- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
+++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
@@ -20,6 +20,7 @@
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
 import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
 import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
+import org.elasticsearch.xcontent.support.filtering.FilterPath;
 
 import java.io.IOException;
 import java.util.Collection;
@@ -103,6 +104,14 @@ public static Map<String, Object> sourceAsMap(BytesReference source) throws Elas
         return sourceAsMapAndType(source).v2();
     }
 
+    public static Map<String, Object> sourceAsMap(
+        BytesReference source,
+        FilterPath[] include,
+        FilterPath[] exclude
+    ) throws ElasticsearchParseException {
+        return  XContentHelper.convertToMap(source, false, null, include, exclude).v2();
+    }
+
     public void setSegmentAndDocument(
         LeafReaderContext context,
         int docId
@@ -157,6 +166,38 @@ public List<Object> extractRawValues(String path) {
         return XContentMapValues.extractRawValues(path, source());
     }
 
+    /**
+     * Returns the values associated with the path. Those are "low" level values, and it can
+     * handle path expression where an array/list is navigated within.
+     *
+     * The major difference with {@link SourceLookup#extractRawValues(String)} is that this version will:
+     *
+     *  - not cache source if it's not already parsed
+     *  - will only extract the desired values from the compressed source instead of deserializing the whole object
+     *
+     * This is useful when the caller only wants a single value from source and does not care of source is fully parsed and cached
+     * for later use.
+     * @param path The path from which to extract the values from source
+     * @return The list of found values or an empty list if none are found
+     */
+    public List<Object> extractRawValuesOptimized(String path) {
+        if (source != null) {
+            return XContentMapValues.extractRawValues(path, source);
+        }
+        FilterPath[] filterPaths = FilterPath.compile(Set.of(path));
+        if (sourceAsBytes != null) {
+            return XContentMapValues.extractRawValues(path, sourceAsMap(sourceAsBytes, filterPaths, null));
+        }
+        try {
+            FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
+            fieldReader.accept(docId, sourceFieldVisitor);
+            BytesReference source = sourceFieldVisitor.source();
+            return XContentMapValues.extractRawValues(path, sourceAsMap(source, filterPaths, null));
+        } catch (Exception e) {
+            throw new ElasticsearchParseException("failed to parse / load source", e);
+        }
+    }
+
     /**
      * For the provided path, return its value in the source.
      *
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java
index 16058fbdae4f2..27eb9e0330b62 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java
@@ -181,7 +181,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
 
             private void collectFromSource(int doc, long owningBucketOrd, CategorizationTokenTree categorizer) throws IOException {
                 sourceLookup.setSegmentAndDocument(ctx, doc);
-                Iterator<String> itr = sourceLookup.extractRawValues(sourceFieldName).stream().map(obj -> {
+                Iterator<String> itr = sourceLookup.extractRawValuesOptimized(sourceFieldName).stream().map(obj -> {
                     if (obj == null) {
                         return null;
                     }

From 197f5d40101c0f9d64a6138f92ab9a54f000ad19 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com>
Date: Thu, 14 Oct 2021 12:55:17 -0400
Subject: [PATCH 2/3] Fixing optimization paths

---
 .../xcontent/cbor/CborXContent.java           | 36 +++++++++----------
 .../common/xcontent/XContentHelper.java       | 29 ++++++++++-----
 .../search/lookup/SourceLookup.java           | 18 +++++-----
 3 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java
index 753b237e5d516..d43e3b10b225c 100644
--- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java
+++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java
@@ -113,11 +113,23 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry,
     }
 
     @Override
-    public XContentParser createParser(NamedXContentRegistry xContentRegistry,
-                                       DeprecationHandler deprecationHandler, byte[] data, int offset, int length, FilterPath[] includes,
-                                       FilterPath[] excludes) throws IOException {
-        return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current(), includes,
-            excludes);
+    public XContentParser createParser(
+        NamedXContentRegistry xContentRegistry,
+        DeprecationHandler deprecationHandler,
+        byte[] data,
+        int offset,
+        int length,
+        FilterPath[] includes,
+        FilterPath[] excludes
+    ) throws IOException {
+        return new CborXContentParser(
+            xContentRegistry,
+            deprecationHandler,
+            cborFactory.createParser(new ByteArrayInputStream(data, offset, length)),
+            RestApiVersion.current(),
+            includes,
+            excludes
+        );
     }
 
     @Override
@@ -146,18 +158,4 @@ public XContentParser createParserForCompatibility(NamedXContentRegistry xConten
         );
     }
 
-    public XContentParser createParserForCompatibility(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler,
-                                                       byte[] data, int offset, int length, RestApiVersion restApiVersion,
-                                                       FilterPath[] includes, FilterPath[] excludes)
-        throws IOException {
-        return new CborXContentParser(
-            xContentRegistry,
-            deprecationHandler,
-            cborFactory.createParser(new ByteArrayInputStream(data, offset, length)),
-            restApiVersion,
-            includes,
-            excludes
-        );
-    }
-
 }
diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
index bb52fefd32230..6a5e253ffffe3 100644
--- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
+++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
@@ -103,6 +103,10 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReferen
         return convertToMap(bytes, ordered, null);
     }
 
+    /**
+     * Exactly the same as {@link XContentHelper#convertToMap(BytesReference, boolean, XContentType, FilterPath[], FilterPath[])} but
+     * none of the fields are filtered
+     */
     public static Tuple<XContentType, Map<String, Object>> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType) {
         return convertToMap(bytes, ordered, xContentType, null, null);
     }
@@ -171,14 +175,20 @@ public static Map<String, Object> convertToMap(XContent xContent, String string,
     }
 
     /**
-     * Convert a string in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any
-     * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input.
+     * The same as {@link XContentHelper#convertToMap(XContent, byte[], int, int, boolean, FilterPath[], FilterPath[])} but none of the
+     * fields are filtered.
      */
     public static Map<String, Object> convertToMap(XContent xContent, InputStream input, boolean ordered)
             throws ElasticsearchParseException {
         return convertToMap(xContent, input, ordered, null, null);
     }
 
+    /**
+     * Convert a string in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any
+     * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input.
+     *
+     * Additionally, fields may be included or excluded from the parsing.
+     */
     public static Map<String, Object> convertToMap(
             XContent xContent,
             InputStream input,
@@ -206,15 +216,16 @@ public static Map<String, Object> convertToMap(
      */
     public static Map<String, Object> convertToMap(XContent xContent, byte[] bytes, int offset, int length, boolean ordered)
             throws ElasticsearchParseException {
-        // It is safe to use EMPTY here because this never uses namedObject
-        try (XContentParser parser = xContent.createParser(NamedXContentRegistry.EMPTY,
-                DeprecationHandler.THROW_UNSUPPORTED_OPERATION, bytes, offset, length)) {
-            return ordered ? parser.mapOrdered() : parser.map();
-        } catch (IOException e) {
-            throw new ElasticsearchParseException("Failed to parse content to map", e);
-        }
+        return convertToMap(xContent, bytes, offset, length, ordered, null, null);
     }
 
+    /**
+     * Convert a byte array in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any
+     * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input.
+     *
+     * Unlike {@link XContentHelper#convertToMap(XContent, byte[], int, int, boolean)} this optionally accepts fields to include or exclude
+     * during XContent parsing.
+     */
     public static Map<String, Object> convertToMap(
             XContent xContent,
             byte[] bytes,
diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
index f3c3bd0ad600c..e7b3772874b64 100644
--- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
+++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
@@ -104,14 +104,6 @@ public static Map<String, Object> sourceAsMap(BytesReference source) throws Elas
         return sourceAsMapAndType(source).v2();
     }
 
-    public static Map<String, Object> sourceAsMap(
-        BytesReference source,
-        FilterPath[] include,
-        FilterPath[] exclude
-    ) throws ElasticsearchParseException {
-        return  XContentHelper.convertToMap(source, false, null, include, exclude).v2();
-    }
-
     public void setSegmentAndDocument(
         LeafReaderContext context,
         int docId
@@ -186,13 +178,19 @@ public List<Object> extractRawValuesOptimized(String path) {
         }
         FilterPath[] filterPaths = FilterPath.compile(Set.of(path));
         if (sourceAsBytes != null) {
-            return XContentMapValues.extractRawValues(path, sourceAsMap(sourceAsBytes, filterPaths, null));
+            return XContentMapValues.extractRawValues(
+                path,
+                XContentHelper.convertToMap(sourceAsBytes, false, null, filterPaths, null).v2()
+            );
         }
         try {
             FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
             fieldReader.accept(docId, sourceFieldVisitor);
             BytesReference source = sourceFieldVisitor.source();
-            return XContentMapValues.extractRawValues(path, sourceAsMap(source, filterPaths, null));
+            return XContentMapValues.extractRawValues(
+                path,
+                XContentHelper.convertToMap(source, false, null, filterPaths, null).v2()
+            );
         } catch (Exception e) {
             throw new ElasticsearchParseException("failed to parse / load source", e);
         }

From 368d801188e379d2f523551bf3b1eac951cff3c9 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com>
Date: Thu, 21 Oct 2021 12:31:05 -0400
Subject: [PATCH 3/3] renaming function

---
 .../main/java/org/elasticsearch/search/lookup/SourceLookup.java | 2 +-
 .../xpack/ml/aggs/categorization/CategorizeTextAggregator.java  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
index e7b3772874b64..d498c9cffe468 100644
--- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
+++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
@@ -172,7 +172,7 @@ public List<Object> extractRawValues(String path) {
      * @param path The path from which to extract the values from source
      * @return The list of found values or an empty list if none are found
      */
-    public List<Object> extractRawValuesOptimized(String path) {
+    public List<Object> extractRawValuesWithoutCaching(String path) {
         if (source != null) {
             return XContentMapValues.extractRawValues(path, source);
         }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java
index 6d4be2c4167f5..386c41255747d 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java
@@ -183,7 +183,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
 
             private void collectFromSource(int doc, long owningBucketOrd, CategorizationTokenTree categorizer) throws IOException {
                 sourceLookup.setSegmentAndDocument(ctx, doc);
-                Iterator<String> itr = sourceLookup.extractRawValuesOptimized(sourceFieldName).stream().map(obj -> {
+                Iterator<String> itr = sourceLookup.extractRawValuesWithoutCaching(sourceFieldName).stream().map(obj -> {
                     if (obj == null) {
                         return null;
                     }