Skip to content

Commit

Permalink
Speed up aggs with sub-aggregations (backport of #69806) (#69940)
Browse files Browse the repository at this point in the history
This allows many of the optimizations added in #63643 and #68871 to run
on aggregations with sub-aggregations. This should:
* Speed up `terms` aggregations on fields with less than 1000 values that
  also have sub-aggregations. Locally I see 2 second searches run in 1.2
  seconds.
* Applies that same speedup to `range` and `date_histogram` aggregations but
  it feels less impressive because the point range queries are a little
  slower to get up and go.
* Massively speed up `filters` aggregations with sub-aggregations that
  don't have a `parent` aggregation or collect "other" buckets. Also
  save a ton of memory while collecting them.
  • Loading branch information
nik9000 authored Mar 5, 2021
1 parent 785a17c commit b9dc491
Show file tree
Hide file tree
Showing 17 changed files with 603 additions and 98 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.Uid;
Expand Down Expand Up @@ -108,6 +109,7 @@ public void testParentChild() throws IOException {
}

public void testParentChildAsSubAgg() throws IOException {
MappedFieldType kwd = new KeywordFieldMapper.KeywordFieldType("kwd", randomBoolean(), true, null);
try (Directory directory = newDirectory()) {
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);

Expand Down Expand Up @@ -146,7 +148,7 @@ public void testParentChildAsSubAgg() throws IOException {
indexSearcher,
new MatchAllDocsQuery(),
request,
withJoinFields(longField("number"), keywordField("kwd"))
withJoinFields(longField("number"), kwd)
);

StringTerms.Bucket evenBucket = result.getBucketByKey("even");
Expand Down Expand Up @@ -190,6 +192,7 @@ private static List<Field> createParentDocument(String id, String kwd) {
return Arrays.asList(
new StringField(IdFieldMapper.NAME, Uid.encodeId(id), Field.Store.NO),
new SortedSetDocValuesField("kwd", new BytesRef(kwd)),
new Field("kwd", new BytesRef(kwd), KeywordFieldMapper.Defaults.FIELD_TYPE),
new StringField("join_field", PARENT_TYPE, Field.Store.NO),
createJoinField(PARENT_TYPE, id)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -820,27 +820,43 @@ setup:
body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }

---
"string profiler via global ordinals":
"string profiler via global ordinals filters implementation":
- skip:
version: " - 7.8.99"
reason: debug information added in 7.9.0
version: " - 7.12.99"
reason: filters implementation first supported with sub-aggregators in 7.13.0
- do:
indices.create:
index: test_3
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
str:
type: keyword
boolean:
type: boolean
number:
type: long

- do:
bulk:
index: test_1
index: test_3
refresh: true
body: |
{ "index": {} }
{ "str": "sheep", "number": 1 }
{ "boolean": true, "str": "sheep", "number": 1 }
{ "index": {} }
{ "str": "sheep", "number": 3 }
{ "boolean": true, "str": "sheep", "number": 3 }
{ "index": {} }
{ "str": "cow", "number": 1 }
{ "boolean": true, "str": "cow", "number": 1 }
{ "index": {} }
{ "str": "pig", "number": 1 }
{ "boolean": true, "str": "pig", "number": 1 }
- do:
search:
index: test_1
index: test_3
body:
profile: true
size: 0
Expand All @@ -860,17 +876,73 @@ setup:
- match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
- match: { aggregations.str_terms.buckets.2.key: pig }
- match: { aggregations.str_terms.buckets.2.max_number.value: 1 }
- match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
- match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters }
- match: { profile.shards.0.aggregations.0.description: str_terms }
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
- gt: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
- match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 }
- match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter }
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: str:cow }
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: str:pig }
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: str:sheep }
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }

---
"string profiler via global ordinals native implementation":
- skip:
version: " - 7.8.99"
reason: debug information added in 7.9.0
- do:
bulk:
index: test_1
refresh: true
body: |
{ "index": {} }
{ "boolean": true, "str": "sheep", "number": 1 }
{ "index": {} }
{ "boolean": true, "str": "sheep", "number": 3 }
{ "index": {} }
{ "boolean": true, "str": "cow", "number": 1 }
{ "index": {} }
{ "boolean": true, "str": "pig", "number": 1 }
- do:
search:
index: test_1
body:
profile: true
size: 0
aggs:
bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
terms:
field: boolean
aggs:
str_terms:
terms:
field: str
collect_mode: breadth_first
execution_hint: global_ordinals
aggs:
max_number:
max:
field: number
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: sheep }
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 3 }
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: cow }
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 1 }
- match: { aggregations.bool.buckets.0.str_terms.buckets.2.key: pig }
- match: { aggregations.bool.buckets.0.str_terms.buckets.2.max_number.value: 1 }
- match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
- match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
- match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
- match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap"
- match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
- gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
- match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
- match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
- match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
- match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }

- do:
indices.create:
Expand All @@ -889,7 +961,7 @@ setup:
refresh: true
body: |
{ "index": {} }
{ "str": ["pig", "sheep"], "number": 100 }
{ "boolean": true, "str": ["pig", "sheep"], "number": 100 }
- do:
search:
Expand All @@ -898,30 +970,35 @@ setup:
profile: true
size: 0
aggs:
str_terms:
bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
terms:
field: str
collect_mode: breadth_first
execution_hint: global_ordinals
field: boolean
aggs:
max_number:
max:
field: number
- match: { aggregations.str_terms.buckets.0.key: pig }
- match: { aggregations.str_terms.buckets.0.max_number.value: 100 }
- match: { aggregations.str_terms.buckets.1.key: sheep }
- match: { aggregations.str_terms.buckets.1.max_number.value: 100 }
- match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
- match: { profile.shards.0.aggregations.0.description: str_terms }
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 }
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
- match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
- gt: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
str_terms:
terms:
field: str
collect_mode: breadth_first
execution_hint: global_ordinals
aggs:
max_number:
max:
field: number
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: pig }
- match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 100 }
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: sheep }
- match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 100 }
- match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
- match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
- match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 1 }
- match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
- match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap"
- match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
- match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
- gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
- match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
- match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
- match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }


---
"string profiler via map":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,12 @@ public void testSingleValuedFieldOrderedByIllegalAgg() throws Exception {
} else {
throw e;
}
} else if (e.getCause() instanceof IllegalArgumentException) {
// Thrown when the terms agg runs as a filters agg
assertThat(
e.getCause().getMessage(),
equalTo("Invalid aggregation order path [inner_terms>avg]. Can't sort by a descendant of a [sterms] aggregation [avg]")
);
} else {
throw e;
}
Expand Down
Loading

0 comments on commit b9dc491

Please sign in to comment.