Speed up aggs with sub-aggregations (backport of #69806) (#69940)

This allows many of the optimizations added in #63643 and #68871 to run on aggregations with sub-aggregations. This should: * Speed up `terms` aggregations on fields with less than 1000 values that also have sub-aggregations. Locally I see 2 second searches run in 1.2 seconds. * Applies that same speedup to `range` and `date_histogram` aggregations but it feels less impressive because the point range queries are a little slower to get up and go. * Massively speed up `filters` aggregations with sub-aggregations that don't have a `parent` aggregation or collect "other" buckets. Also save a ton of memory while collecting them.
elastic · Mar 5, 2021 · b9dc491 · b9dc491
1 parent 785a17c
commit b9dc491
Show file tree

Hide file tree

Showing 17 changed files with 603 additions and 98 deletions.
diff --git a/...in/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java b/...in/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java
@@ -26,6 +26,7 @@
 import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.index.mapper.Uid;
@@ -108,6 +109,7 @@ public void testParentChild() throws IOException {
     }
 
     public void testParentChildAsSubAgg() throws IOException {
+        MappedFieldType kwd = new KeywordFieldMapper.KeywordFieldType("kwd", randomBoolean(), true, null);
         try (Directory directory = newDirectory()) {
             RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
 
@@ -146,7 +148,7 @@ public void testParentChildAsSubAgg() throws IOException {
                     indexSearcher,
                     new MatchAllDocsQuery(),
                     request,
-                    withJoinFields(longField("number"), keywordField("kwd"))
+                    withJoinFields(longField("number"), kwd)
                 );
 
                 StringTerms.Bucket evenBucket = result.getBucketByKey("even");
@@ -190,6 +192,7 @@ private static List<Field> createParentDocument(String id, String kwd) {
         return Arrays.asList(
                 new StringField(IdFieldMapper.NAME, Uid.encodeId(id), Field.Store.NO),
                 new SortedSetDocValuesField("kwd", new BytesRef(kwd)),
+                new Field("kwd", new BytesRef(kwd), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new StringField("join_field", PARENT_TYPE, Field.Store.NO),
                 createJoinField(PARENT_TYPE, id)
         );

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml
@@ -820,27 +820,43 @@ setup:
         body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }
 
 ---
-"string profiler via global ordinals":
+"string profiler via global ordinals filters implementation":
   - skip:
-      version: " - 7.8.99"
-      reason: debug information added in 7.9.0
+      version: " - 7.12.99"
+      reason: filters implementation first supported with sub-aggregators in 7.13.0
+  - do:
+      indices.create:
+          index: test_3
+          body:
+            settings:
+              number_of_shards: 1
+              number_of_replicas: 0
+            mappings:
+              properties:
+                str:
+                   type: keyword
+                boolean:
+                   type: boolean
+                number:
+                  type: long
+
   - do:
       bulk:
-        index: test_1
+        index: test_3
         refresh: true
         body: |
           { "index": {} }
-          { "str": "sheep", "number": 1 }
+          { "boolean": true, "str": "sheep", "number": 1 }
           { "index": {} }
-          { "str": "sheep", "number": 3 }
+          { "boolean": true, "str": "sheep", "number": 3 }
           { "index": {} }
-          { "str": "cow", "number": 1 }
+          { "boolean": true, "str": "cow", "number": 1 }
           { "index": {} }
-          { "str": "pig", "number": 1 }
+          { "boolean": true, "str": "pig", "number": 1 }
 
   - do:
       search:
-        index: test_1
+        index: test_3
         body:
           profile: true
           size: 0
@@ -860,17 +876,73 @@ setup:
   - match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
   - match: { aggregations.str_terms.buckets.2.key: pig }
   - match: { aggregations.str_terms.buckets.2.max_number.value: 1 }
-  - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters }
   - match: { profile.shards.0.aggregations.0.description: str_terms }
-  - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
-  - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
-  - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
-  - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
-  - gt:    { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
-  - match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
-  - match: { profile.shards.0.aggregations.0.debug.has_filter: false }
+  - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 }
+  - match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter }
+  - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: str:cow }
+  - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: str:pig }
+  - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: str:sheep }
   - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
   - match: { profile.shards.0.aggregations.0.children.0.description: max_number }
+  - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
+
+---
+"string profiler via global ordinals native implementation":
+  - skip:
+      version: " - 7.8.99"
+      reason: debug information added in 7.9.0
+  - do:
+      bulk:
+        index: test_1
+        refresh: true
+        body: |
+          { "index": {} }
+          { "boolean": true, "str": "sheep", "number": 1 }
+          { "index": {} }
+          { "boolean": true, "str": "sheep", "number": 3 }
+          { "index": {} }
+          { "boolean": true, "str": "cow", "number": 1 }
+          { "index": {} }
+          { "boolean": true, "str": "pig", "number": 1 }
+
+  - do:
+      search:
+        index: test_1
+        body:
+          profile: true
+          size: 0
+          aggs:
+            bool:    # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
+              terms:
+                field: boolean
+              aggs:
+                str_terms:
+                  terms:
+                    field: str
+                    collect_mode: breadth_first
+                    execution_hint: global_ordinals
+                  aggs:
+                    max_number:
+                      max:
+                        field: number
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: sheep }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 3 }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: cow }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 1 }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.2.key: pig }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.2.max_number.value: 1 }
+  - match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
+  - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap"
+  - match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
+  - gt:    { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
 
   - do:
       indices.create:
@@ -889,7 +961,7 @@ setup:
         refresh: true
         body: |
           { "index": {} }
-          { "str": ["pig", "sheep"], "number": 100 }
+          { "boolean": true, "str": ["pig", "sheep"], "number": 100 }
 
   - do:
       search:
@@ -898,30 +970,35 @@ setup:
           profile: true
           size: 0
           aggs:
-            str_terms:
+            bool:    # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
               terms:
-                field: str
-                collect_mode: breadth_first
-                execution_hint: global_ordinals
+                field: boolean
               aggs:
-                max_number:
-                  max:
-                    field: number
-  - match: { aggregations.str_terms.buckets.0.key: pig }
-  - match: { aggregations.str_terms.buckets.0.max_number.value: 100 }
-  - match: { aggregations.str_terms.buckets.1.key: sheep }
-  - match: { aggregations.str_terms.buckets.1.max_number.value: 100 }
-  - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
-  - match: { profile.shards.0.aggregations.0.description: str_terms }
-  - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 }
-  - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
-  - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
-  - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
-  - match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
-  - gt:    { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
-  - match: { profile.shards.0.aggregations.0.debug.has_filter: false }
-  - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
-  - match: { profile.shards.0.aggregations.0.children.0.description: max_number }
+                str_terms:
+                  terms:
+                    field: str
+                    collect_mode: breadth_first
+                    execution_hint: global_ordinals
+                  aggs:
+                    max_number:
+                      max:
+                        field: number
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: pig }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 100 }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: sheep }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 100 }
+  - match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
+  - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 1 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap"
+  - match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
+  - gt:    { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
+
 
 ---
 "string profiler via map":

diff --git a/...nalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java b/...nalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java
@@ -585,6 +585,12 @@ public void testSingleValuedFieldOrderedByIllegalAgg() throws Exception {
                 } else {
                     throw e;
                 }
+            } else if (e.getCause() instanceof IllegalArgumentException) {
+                // Thrown when the terms agg runs as a filters agg
+                assertThat(
+                    e.getCause().getMessage(),
+                    equalTo("Invalid aggregation order path [inner_terms>avg]. Can't sort by a descendant of a [sterms] aggregation [avg]")
+                );
             } else {
                 throw e;
             }