From b9dc491513c5e7739fccde04dab3119ff3e0ce14 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 5 Mar 2021 09:13:26 -0500 Subject: [PATCH] Speed up aggs with sub-aggregations (backport of #69806) (#69940) This allows many of the optimizations added in #63643 and #68871 to run on aggregations with sub-aggregations. This should: * Speed up `terms` aggregations on fields with less than 1000 values that also have sub-aggregations. Locally I see 2 second searches run in 1.2 seconds. * Applies that same speedup to `range` and `date_histogram` aggregations but it feels less impressive because the point range queries are a little slower to get up and go. * Massively speed up `filters` aggregations with sub-aggregations that don't have a `parent` aggregation or collect "other" buckets. Also save a ton of memory while collecting them. --- .../ParentToChildrenAggregatorTests.java | 5 +- .../test/search.aggregation/20_terms.yml | 157 ++++++++--- .../bucket/terms/StringTermsIT.java | 6 + .../bucket/filter/FiltersAggregator.java | 118 ++++++++- .../bucket/filter/QueryToFilterAdapter.java | 22 +- .../bucket/range/RangeAggregator.java | 12 +- .../StringTermsAggregatorFromFilters.java | 13 +- .../bucket/filter/FiltersAggregatorTests.java | 247 ++++++++++++++++++ .../geogrid/GeoGridAggregatorTestCase.java | 3 + .../AutoDateHistogramAggregatorTests.java | 4 + .../DateHistogramAggregatorTestCase.java | 11 + .../terms/RareTermsAggregatorTests.java | 4 + .../terms/SignificantTextAggregatorTests.java | 2 + .../bucket/terms/TermsAggregatorTests.java | 91 ++++--- .../RollupResponseTranslationTests.java | 1 + .../aggregations/GeoLineAggregatorTests.java | 2 +- x-pack/qa/runtime-fields/build.gradle | 3 +- 17 files changed, 603 insertions(+), 98 deletions(-) diff --git a/modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java b/modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java index 3ea226d7f80c5..9b9852bfa0714 100644 --- a/modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java +++ b/modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.index.Index; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.Uid; @@ -108,6 +109,7 @@ public void testParentChild() throws IOException { } public void testParentChildAsSubAgg() throws IOException { + MappedFieldType kwd = new KeywordFieldMapper.KeywordFieldType("kwd", randomBoolean(), true, null); try (Directory directory = newDirectory()) { RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); @@ -146,7 +148,7 @@ public void testParentChildAsSubAgg() throws IOException { indexSearcher, new MatchAllDocsQuery(), request, - withJoinFields(longField("number"), keywordField("kwd")) + withJoinFields(longField("number"), kwd) ); StringTerms.Bucket evenBucket = result.getBucketByKey("even"); @@ -190,6 +192,7 @@ private static List createParentDocument(String id, String kwd) { return Arrays.asList( new StringField(IdFieldMapper.NAME, Uid.encodeId(id), Field.Store.NO), new SortedSetDocValuesField("kwd", new BytesRef(kwd)), + new Field("kwd", new BytesRef(kwd), KeywordFieldMapper.Defaults.FIELD_TYPE), new StringField("join_field", PARENT_TYPE, Field.Store.NO), createJoinField(PARENT_TYPE, id) ); diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml index 7bbdc40a88373..9db9aa50fd705 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -820,27 +820,43 @@ setup: body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } } --- -"string profiler via global ordinals": +"string profiler via global ordinals filters implementation": - skip: - version: " - 7.8.99" - reason: debug information added in 7.9.0 + version: " - 7.12.99" + reason: filters implementation first supported with sub-aggregators in 7.13.0 + - do: + indices.create: + index: test_3 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + str: + type: keyword + boolean: + type: boolean + number: + type: long + - do: bulk: - index: test_1 + index: test_3 refresh: true body: | { "index": {} } - { "str": "sheep", "number": 1 } + { "boolean": true, "str": "sheep", "number": 1 } { "index": {} } - { "str": "sheep", "number": 3 } + { "boolean": true, "str": "sheep", "number": 3 } { "index": {} } - { "str": "cow", "number": 1 } + { "boolean": true, "str": "cow", "number": 1 } { "index": {} } - { "str": "pig", "number": 1 } + { "boolean": true, "str": "pig", "number": 1 } - do: search: - index: test_1 + index: test_3 body: profile: true size: 0 @@ -860,17 +876,73 @@ setup: - match: { aggregations.str_terms.buckets.1.max_number.value: 1 } - match: { aggregations.str_terms.buckets.2.key: pig } - match: { aggregations.str_terms.buckets.2.max_number.value: 1 } - - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator } + - match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters } - match: { profile.shards.0.aggregations.0.description: str_terms } - - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } - - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] } - - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense } - - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms } - - gt: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 } - - match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 } - - match: { profile.shards.0.aggregations.0.debug.has_filter: false } + - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 } + - match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter } + - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: str:cow } + - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: str:pig } + - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: str:sheep } - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator } - match: { profile.shards.0.aggregations.0.children.0.description: max_number } + - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 } + +--- +"string profiler via global ordinals native implementation": + - skip: + version: " - 7.8.99" + reason: debug information added in 7.9.0 + - do: + bulk: + index: test_1 + refresh: true + body: | + { "index": {} } + { "boolean": true, "str": "sheep", "number": 1 } + { "index": {} } + { "boolean": true, "str": "sheep", "number": 3 } + { "index": {} } + { "boolean": true, "str": "cow", "number": 1 } + { "index": {} } + { "boolean": true, "str": "pig", "number": 1 } + + - do: + search: + index: test_1 + body: + profile: true + size: 0 + aggs: + bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode + terms: + field: boolean + aggs: + str_terms: + terms: + field: str + collect_mode: breadth_first + execution_hint: global_ordinals + aggs: + max_number: + max: + field: number + - match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: sheep } + - match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 3 } + - match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: cow } + - match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 1 } + - match: { aggregations.bool.buckets.0.str_terms.buckets.2.key: pig } + - match: { aggregations.bool.buckets.0.str_terms.buckets.2.max_number.value: 1 } + - match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator } + - match: { profile.shards.0.aggregations.0.children.0.description: str_terms } + - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 } + - match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] } + - match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap" + - match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms } + - gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 } + - match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 } + - match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false } + - match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator } + - match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number } - do: indices.create: @@ -889,7 +961,7 @@ setup: refresh: true body: | { "index": {} } - { "str": ["pig", "sheep"], "number": 100 } + { "boolean": true, "str": ["pig", "sheep"], "number": 100 } - do: search: @@ -898,30 +970,35 @@ setup: profile: true size: 0 aggs: - str_terms: + bool: # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode terms: - field: str - collect_mode: breadth_first - execution_hint: global_ordinals + field: boolean aggs: - max_number: - max: - field: number - - match: { aggregations.str_terms.buckets.0.key: pig } - - match: { aggregations.str_terms.buckets.0.max_number.value: 100 } - - match: { aggregations.str_terms.buckets.1.key: sheep } - - match: { aggregations.str_terms.buckets.1.max_number.value: 100 } - - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator } - - match: { profile.shards.0.aggregations.0.description: str_terms } - - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 } - - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] } - - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense } - - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms } - - match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 } - - gt: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 } - - match: { profile.shards.0.aggregations.0.debug.has_filter: false } - - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator } - - match: { profile.shards.0.aggregations.0.children.0.description: max_number } + str_terms: + terms: + field: str + collect_mode: breadth_first + execution_hint: global_ordinals + aggs: + max_number: + max: + field: number + - match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: pig } + - match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 100 } + - match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: sheep } + - match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 100 } + - match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator } + - match: { profile.shards.0.aggregations.0.children.0.description: str_terms } + - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 1 } + - match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] } + - match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: '/remap( using many bucket ords)?/' } # older versions just say "remap" + - match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms } + - match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 } + - gt: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 } + - match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false } + - match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator } + - match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number } + --- "string profiler via map": diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java index ae505f19fbcf8..17ea1b4859d9a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java @@ -585,6 +585,12 @@ public void testSingleValuedFieldOrderedByIllegalAgg() throws Exception { } else { throw e; } + } else if (e.getCause() instanceof IllegalArgumentException) { + // Thrown when the terms agg runs as a filters agg + assertThat( + e.getCause().getMessage(), + equalTo("Invalid aggregation order path [inner_terms>avg]. Can't sort by a descendant of a [sterms] aggregation [avg]") + ); } else { throw e; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index a72433d5affb3..1c9e591aa40dc 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -135,8 +135,28 @@ public static FiltersAggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (canUseFilterByFilter(parent, factories, otherBucketKey)) { - return buildFilterByFilter(name, factories, filters, keyed, otherBucketKey, context, parent, cardinality, metadata); + if (canUseFilterByFilter(parent, otherBucketKey)) { + FilterByFilter filterByFilter = buildFilterByFilter( + name, + factories, + filters, + keyed, + otherBucketKey, + context, + parent, + cardinality, + metadata + ); + if (false == filterByFilter.scoreMode().needsScores()) { + /* + * Filter by filter won't produce the correct results if the + * sub-aggregators need scores because we're not careful with how + * we merge filters. Right now we have to build the whole + * aggregation in order to know if it'll need scores or not. + */ + // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter + return filterByFilter; + } } return new FiltersAggregator.Compatible( name, @@ -155,8 +175,8 @@ public static FiltersAggregator build( * Can this aggregation be executed using the {@link FilterByFilter}? That * aggregator is much faster than the fallback {@link Compatible} aggregator. */ - public static boolean canUseFilterByFilter(Aggregator parent, AggregatorFactories factories, String otherBucketKey) { - return parent == null && factories.countAggregators() == 0 && otherBucketKey == null; + public static boolean canUseFilterByFilter(Aggregator parent, String otherBucketKey) { + return parent == null && otherBucketKey == null; } /** @@ -165,6 +185,10 @@ public static boolean canUseFilterByFilter(Aggregator parent, AggregatorFactorie * collect filter by filter if there isn't a parent, there aren't children, * and we don't collect "other" buckets. Collecting {@link FilterByFilter} * is generally going to be much faster than the {@link Compatible} aggregator. + *

+ * Important: This doesn't properly handle sub-aggregators + * that need scores so callers must check {@code #scoreMode()} and not use + * this collector if it need scores. */ public static FilterByFilter buildFilterByFilter( String name, @@ -177,7 +201,7 @@ public static FilterByFilter buildFilterByFilter( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (false == canUseFilterByFilter(parent, factories, otherBucketKey)) { + if (false == canUseFilterByFilter(parent, otherBucketKey)) { throw new IllegalStateException("Can't execute filter-by-filter"); } List> filtersWithTopLevel = new ArrayList<>(filters.size()); @@ -186,6 +210,7 @@ public static FilterByFilter buildFilterByFilter( } return new FiltersAggregator.FilterByFilter( name, + factories, filtersWithTopLevel, keyed, context, @@ -274,9 +299,12 @@ public static class FilterByFilter extends FiltersAggregator { * field. */ private int segmentsWithDocCountField; + private int segmentsCollected; + private int segmentsCounted; private FilterByFilter( String name, + AggregatorFactories factories, List> filters, boolean keyed, AggregationContext context, @@ -284,7 +312,7 @@ private FilterByFilter( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - super(name, AggregatorFactories.EMPTY, filters, keyed, null, context, parent, cardinality, metadata); + super(name, factories, filters, keyed, null, context, parent, cardinality, metadata); this.profiling = context.profiling(); } @@ -294,6 +322,8 @@ private FilterByFilter( */ @SuppressWarnings("resource") // We're not in change of anything Closeable public long estimateCost(long maxCost) throws IOException { + assert scoreMode().needsScores() == false; + // TODO if we have children we should use a different cost estimate this.maxCost = maxCost; if (estimatedCost != -1) { return estimatedCost; @@ -303,7 +333,9 @@ public long estimateCost(long maxCost) throws IOException { for (LeafReaderContext ctx : searcher().getIndexReader().leaves()) { CheckedSupplier canUseMetadata = canUseMetadata(ctx); for (QueryToFilterAdapter filter : filters()) { - estimatedCost += filter.estimateCountCost(ctx, canUseMetadata); + estimatedCost += subAggregators().length > 0 + ? filter.estimateCollectCost(ctx) + : filter.estimateCountCost(ctx, canUseMetadata); if (estimatedCost < 0) { // We've overflowed so we cap out and stop counting. estimatedCost = Long.MAX_VALUE; @@ -344,21 +376,87 @@ public long estimateCost(long maxCost) throws IOException { */ @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + assert scoreMode().needsScores() == false; + if (filters().size() == 0) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } Bits live = ctx.reader().getLiveDocs(); - Counter counter = new Counter(docCountProvider); if (false == docCountProvider.alwaysOne()) { segmentsWithDocCountField++; } - for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) { - incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live)); + if (subAggregators.length == 0) { + // TOOD we'd be better off if we could do sub.isNoop() or something. + /* + * Without sub.isNoop we always end up in the `collectXXX` modes even if + * the sub-aggregators opt out of traditional collection. + */ + collectCount(ctx, live); + } else { + collectSubs(ctx, live, sub); } // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment. throw new CollectionTerminatedException(); } + /** + * Gather a count of the number of documents that match each filter + * without sending any documents to a sub-aggregator. This yields + * the correct response when there aren't any sub-aggregators or they + * all opt out of needing any sort of collection. + */ + private void collectCount(LeafReaderContext ctx, Bits live) throws IOException { + Counter counter = new Counter(docCountProvider); + for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) { + incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live)); + } + } + + /** + * Collect all documents that match all filters and send them to + * the sub-aggregators. This method is only required when there are + * sub-aggregators that haven't opted out of being collected. + *

+ * This collects each filter one at a time, resetting the + * sub-aggregators between each filter as though they were hitting + * a fresh segment. + *

+ * It's very tempting to try and collect the + * filters into blocks of matches and then reply the whole block + * into ascending order without the resetting. That'd probably + * work better if the disk was very, very slow and we didn't have + * any kind of disk caching. But with disk caching its about twice + * as fast to collect each filter one by one like this. And it uses + * less memory because there isn't a need to buffer a block of matches. + * And its a hell of a lot less code. + */ + private void collectSubs(LeafReaderContext ctx, Bits live, LeafBucketCollector sub) throws IOException { + class MatchCollector implements LeafCollector { + LeafBucketCollector subCollector = sub; + int filterOrd; + + @Override + public void collect(int docId) throws IOException { + collectBucket(subCollector, docId, filterOrd); + } + + @Override + public void setScorer(Scorable scorer) throws IOException { + } + } + MatchCollector collector = new MatchCollector(); + filters().get(0).collect(ctx, collector, live); + for (int filterOrd = 1; filterOrd < filters().size(); filterOrd++) { + collector.subCollector = collectableSubAggregators.getLeafCollector(ctx); + collector.filterOrd = filterOrd; + filters().get(filterOrd).collect(ctx, collector, live); + } + } + @Override public void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); + add.accept("segments_counted", segmentsCounted); + add.accept("segments_collected", segmentsCollected); add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs); add.accept("segments_with_doc_count_field", segmentsWithDocCountField); if (estimatedCost != -1) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java index f930124469bcd..097572aefe14b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java @@ -17,6 +17,7 @@ import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; @@ -216,12 +217,31 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) * Estimate the cost of calling {@code #count} in a leaf. */ long estimateCountCost(LeafReaderContext ctx, CheckedSupplier canUseMetadata) throws IOException { + return estimateCollectCost(ctx); + } + + /** + * Collect all documents that match this filter in this leaf. + */ + void collect(LeafReaderContext ctx, LeafCollector collector, Bits live) throws IOException { + BulkScorer scorer = bulkScorer(ctx, () -> {}); + if (scorer == null) { + // No hits in this segment. + return; + } + scorer.score(collector, live); + } + + /** + * Estimate the cost of calling {@code #count} in a leaf. + */ + long estimateCollectCost(LeafReaderContext ctx) throws IOException { BulkScorer scorer = bulkScorer(ctx, () -> scorersPreparedWhileEstimatingCost++); if (scorer == null) { // There aren't any matches for this filter in this leaf return 0; } - return scorer.cost(); // TODO in another PR (please) change this to ScorerSupplier.cost + return scorer.cost(); // TODO change this to ScorerSupplier.cost } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index dd9536f1dc105..c1b74e9ce3e86 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -350,7 +350,7 @@ public static FromFilters adaptIntoFiltersOrNull( // We don't generate sensible Queries for nanoseconds. return null; } - if (false == FiltersAggregator.canUseFilterByFilter(parent, factories, null)) { + if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) { return null; } boolean wholeNumbersOnly = false == ((ValuesSource.Numeric) valuesSourceConfig.getValuesSource()).isFloatingPoint(); @@ -404,6 +404,16 @@ public static FromFilters adaptIntoFiltersOrNull( rangeFactory, averageDocsPerRange ); + if (fromFilters.scoreMode().needsScores()) { + /* + * Filter by filter won't produce the correct results if the + * sub-aggregators need scores because we're not careful with how + * we merge filters. Right now we have to build the whole + * aggregation in order to know if it'll need scores or not. + */ + // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter + return null; + } return fromFilters; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java index 8a705a47c5036..3bf7c2807a703 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java @@ -68,7 +68,7 @@ static StringTermsAggregatorFromFilters adaptIntoFiltersOrNull( if (false == valuesSourceConfig.alignesWithSearchIndex()) { return null; } - if (false == FiltersAggregator.canUseFilterByFilter(parent, factories, null)) { + if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) { return null; } List> filters = new ArrayList<>(); @@ -112,7 +112,16 @@ static StringTermsAggregatorFromFilters adaptIntoFiltersOrNull( bucketCountThresholds, terms ); - return adapted.delegate() == null ? null : adapted; + if (adapted.scoreMode().needsScores()) { /* + * Filter by filter won't produce the correct results if the + * sub-aggregators need scores because we're not careful with how + * we merge filters. Right now we have to build the whole + * aggregation in order to know if it'll need scores or not. + */ + // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter + return null; + } + return adapted; } private final boolean showTermDocCountError; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java index c96d3ea270075..a2e78dd2c8889 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.CollectionTerminatedException; @@ -27,6 +28,7 @@ import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.cache.bitset.BitsetFilterCache; import org.elasticsearch.index.mapper.CustomTermFreqField; import org.elasticsearch.index.mapper.DateFieldMapper; @@ -35,6 +37,8 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; import org.elasticsearch.index.mapper.ObjectMapper; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; @@ -50,6 +54,10 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter; import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregatorTests; +import org.elasticsearch.search.aggregations.metrics.InternalMax; +import org.elasticsearch.search.aggregations.metrics.InternalSum; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; @@ -57,17 +65,23 @@ import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; +import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThan; import static org.mockito.Mockito.mock; public class FiltersAggregatorTests extends AggregatorTestCase { @@ -102,6 +116,24 @@ public void testEmpty() throws Exception { directory.close(); } + public void testNoFilters() throws IOException { + testCase(new FiltersAggregationBuilder("test", new KeyedFilter[0]), new MatchAllDocsQuery(), iw -> { + iw.addDocument(org.elasticsearch.common.collect.List.of()); + }, (InternalFilters result) -> { + assertThat(result.getBuckets(), hasSize(0)); + }); + } + + public void testNoFiltersWithSubAggs() throws IOException { + testCase( + new FiltersAggregationBuilder("test", new KeyedFilter[0]).subAggregation(new MaxAggregationBuilder("m").field("i")), + new MatchAllDocsQuery(), + iw -> { iw.addDocument(org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("i", 1))); }, + (InternalFilters result) -> { assertThat(result.getBuckets(), hasSize(0)); }, + new NumberFieldMapper.NumberFieldType("m", NumberFieldMapper.NumberType.INTEGER) + ); + } + public void testKeyedFilter() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); @@ -543,6 +575,221 @@ public void testTermQuery() throws IOException { }, ft); } + public void testSubAggs() throws IOException { + MappedFieldType dateFt = new DateFieldMapper.DateFieldType( + "test", + true, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); + MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER); + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + new KeyedFilter("q1", new RangeQueryBuilder("test").from("2010-01-01").to("2010-03-01").includeUpper(false)), + new KeyedFilter("q2", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false)) + ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int")); + List> docs = new ArrayList<>(); + docs.add( + org.elasticsearch.common.collect.List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")), + new SortedNumericDocValuesField("int", 100) + ) + ); + docs.add( + org.elasticsearch.common.collect.List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")), + new SortedNumericDocValuesField("int", 5) + ) + ); + docs.add( + org.elasticsearch.common.collect.List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-03")), + new SortedNumericDocValuesField("int", 10) + ) + ); + /* + * Shuffle the docs so we collect them in a random order which causes + * bad implementations of filter-by-filter aggregation to fail with + * assertion errors while executing. + */ + Collections.shuffle(docs, random()); + testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(2)); + + InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); + assertThat(b.getDocCount(), equalTo(1L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(100.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(100.0)); + + b = filters.getBucketByKey("q2"); + assertThat(b.getDocCount(), equalTo(2L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(10.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(15.0)); + }, dateFt, intFt); + withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { + assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); + FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; + int maxDoc = searcher.getIndexReader().maxDoc(); + assertThat(filterByFilter.estimateCost(maxDoc), equalTo(3L)); + Map debug = new HashMap<>(); + filterByFilter.filters().get(0).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + debug = new HashMap<>(); + filterByFilter.filters().get(1).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + }, dateFt, intFt); + } + + public void testSubAggsManyDocs() throws IOException { + MappedFieldType dateFt = new DateFieldMapper.DateFieldType( + "test", + true, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); + MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER); + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + new KeyedFilter("q1", new RangeQueryBuilder("test").from("2010-01-01").to("2010-03-01").includeUpper(false)), + new KeyedFilter("q2", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false)) + ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int")); + List> docs = new ArrayList<>(); + long[] times = new long[] { + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02"), + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02"), + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-03"), + }; + for (int i = 0; i < 10000; i++) { + docs.add( + org.elasticsearch.common.collect.List.of(new LongPoint("test", times[i % 3]), new SortedNumericDocValuesField("int", i)) + ); + } + /* + * Shuffle the docs so we collect them in a random order which causes + * bad implementations of filter-by-filter aggregation to fail with + * assertion errors while executing. + */ + Collections.shuffle(docs, random()); + testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(2)); + + InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); + assertThat(b.getDocCount(), equalTo(3334L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9999.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(16668333.0)); + + b = filters.getBucketByKey("q2"); + assertThat(b.getDocCount(), equalTo(6666L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9998.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(33326667.0)); + }, dateFt, intFt); + withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { + assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); + FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; + int maxDoc = searcher.getIndexReader().maxDoc(); + assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L))); + Map debug = new HashMap<>(); + filterByFilter.filters().get(0).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + debug = new HashMap<>(); + filterByFilter.filters().get(1).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + }, dateFt, intFt); + } + + public void testSubAggsManyFilters() throws IOException { + MappedFieldType dateFt = new DateFieldMapper.DateFieldType( + "test", + true, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); + MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER); + List buckets = new ArrayList<>(); + DateFormatter formatter = DateFormatter.forPattern("strict_date"); + long start = formatter.parseMillis("2010-01-01"); + long lastRange = formatter.parseMillis("2020-03-01"); + while (start < lastRange) { + long end = start + TimeUnit.DAYS.toMillis(30); + String key = formatter.formatMillis(start) + " to " + formatter.formatMillis(end); + buckets.add(new KeyedFilter(key, new RangeQueryBuilder("test").from(start).to(end).includeUpper(false))); + start = end; + } + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + buckets.toArray(new KeyedFilter[0]) + ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int")); + List> docs = new ArrayList<>(); + long[] times = new long[] { + formatter.parseMillis("2010-01-02"), + formatter.parseMillis("2020-01-02"), + formatter.parseMillis("2020-01-03"), }; + for (int i = 0; i < 10000; i++) { + docs.add( + org.elasticsearch.common.collect.List.of(new LongPoint("test", times[i % 3]), new SortedNumericDocValuesField("int", i)) + ); + } + /* + * Shuffle the docs so we collect them in a random order which causes + * bad implementations of filter-by-filter aggregation to fail with + * assertion errors while executing. + */ + Collections.shuffle(docs, random()); + testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(buckets.size())); + + InternalFilters.InternalBucket b = filters.getBucketByKey("2010-01-01 to 2010-01-31"); + assertThat(b.getDocCount(), equalTo(3334L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9999.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(16668333.0)); + + b = filters.getBucketByKey("2019-12-10 to 2020-01-09"); + assertThat(b.getDocCount(), equalTo(6666L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9998.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(33326667.0)); + }, dateFt, intFt); + withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { + assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); + FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; + int maxDoc = searcher.getIndexReader().maxDoc(); + assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L))); + for (int b = 0; b < buckets.size(); b++) { + Map debug = new HashMap<>(); + filterByFilter.filters().get(0).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + } + }, dateFt, intFt); + } + + + @Override protected List objectMappers() { return MOCK_OBJECT_MAPPERS; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregatorTestCase.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregatorTestCase.java index eb593ed1d77bb..8e20816eaa03c 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregatorTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregatorTestCase.java @@ -7,6 +7,7 @@ */ package org.elasticsearch.search.aggregations.bucket.geogrid; +import org.apache.lucene.document.Field; import org.apache.lucene.document.LatLonDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.geo.GeoEncodingUtils; @@ -24,6 +25,7 @@ import org.elasticsearch.common.geo.GeoBoundingBoxTests; import org.elasticsearch.common.geo.GeoUtils; import org.elasticsearch.index.mapper.GeoPointFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.aggregations.Aggregator; @@ -137,6 +139,7 @@ public void testAsSubAgg() throws IOException { docs.add(doc); doc.add(new LatLonDocValuesField(FIELD_NAME, latLng[0], latLng[1])); doc.add(new SortedSetDocValuesField("t", new BytesRef(t))); + doc.add(new Field("t", new BytesRef(t), KeywordFieldMapper.Defaults.FIELD_TYPE)); String hash = hashAsString(latLng[1], latLng[0], precision); Map expectedCountPerGeoHash = expectedCountPerTPerGeoHash.get(t); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorTests.java index 7a3ccea93de93..460252f512fa3 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.search.aggregations.bucket.histogram; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -28,6 +29,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.search.aggregations.Aggregation; @@ -272,6 +274,8 @@ public void testAsSubAggWithIncreasedRounding() throws IOException { new SortedNumericDocValuesField(AGGREGABLE_DATE, d), new SortedSetDocValuesField("k1", aBytes), new SortedSetDocValuesField("k1", d < useC ? bBytes : cBytes), + new Field("k1", aBytes, KeywordFieldMapper.Defaults.FIELD_TYPE), + new Field("k1", d < useC ? bBytes : cBytes, KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedNumericDocValuesField("n", n++) )); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java index 6c5eddae582a5..b3dd5e0bff131 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java @@ -8,6 +8,7 @@ package org.elasticsearch.search.aggregations.bucket.histogram; +import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.RandomIndexWriter; @@ -42,31 +43,41 @@ protected final void asSubAggTestCase(Aggregatio iw.addDocument(org.elasticsearch.common.collect.List.of( new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-02-01T00:00:00Z")), new SortedSetDocValuesField("k1", new BytesRef("a")), + new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("k2", new BytesRef("a")), + new Field("k2", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedNumericDocValuesField("n", 1) )); iw.addDocument(org.elasticsearch.common.collect.List.of( new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-03-01T00:00:00Z")), new SortedSetDocValuesField("k1", new BytesRef("a")), + new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("k2", new BytesRef("a")), + new Field("k2", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedNumericDocValuesField("n", 2) )); iw.addDocument(org.elasticsearch.common.collect.List.of( new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2021-02-01T00:00:00Z")), new SortedSetDocValuesField("k1", new BytesRef("a")), + new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("k2", new BytesRef("a")), + new Field("k2", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedNumericDocValuesField("n", 3) )); iw.addDocument(org.elasticsearch.common.collect.List.of( new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2021-03-01T00:00:00Z")), new SortedSetDocValuesField("k1", new BytesRef("a")), + new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("k2", new BytesRef("b")), + new Field("k2", new BytesRef("b"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedNumericDocValuesField("n", 4) )); iw.addDocument(org.elasticsearch.common.collect.List.of( new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-02-01T00:00:00Z")), new SortedSetDocValuesField("k1", new BytesRef("b")), + new Field("k1", new BytesRef("b"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedSetDocValuesField("k2", new BytesRef("b")), + new Field("k2", new BytesRef("b"), KeywordFieldMapper.Defaults.FIELD_TYPE), new SortedNumericDocValuesField("n", 5) )); }; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java index f7a18ea421173..c2fce0c65dffd 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java @@ -568,7 +568,11 @@ private A executeTestCase(Query query, document.add(new SortedNumericDocValuesField(LONG_FIELD, value)); document.add(new LongPoint(LONG_FIELD, value)); document.add(new SortedSetDocValuesField(KEYWORD_FIELD, new BytesRef(Long.toString(value)))); + document.add(new Field(KEYWORD_FIELD, new BytesRef(Long.toString(value)), KeywordFieldMapper.Defaults.FIELD_TYPE)); document.add(new SortedSetDocValuesField("even_odd", new BytesRef(value % 2 == 0 ? "even" : "odd"))); + document.add( + new Field("even_odd", new BytesRef(value % 2 == 0 ? "even" : "odd"), KeywordFieldMapper.Defaults.FIELD_TYPE) + ); indexWriter.addDocument(document); document.clear(); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java index da0021063c177..5a8b502233dd6 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java @@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.mapper.BinaryFieldMapper; import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MockFieldMapper; import org.elasticsearch.index.mapper.TextFieldMapper; @@ -304,6 +305,7 @@ private void indexDocuments(IndexWriter writer) throws IOException { " }"; doc.add(new StoredField("_source", new BytesRef(json))); doc.add(new SortedSetDocValuesField("kwd", i % 2 == 0 ? new BytesRef("even") : new BytesRef("odd"))); + doc.add(new Field("kwd", i % 2 == 0 ? new BytesRef("even") : new BytesRef("odd"), KeywordFieldMapper.Defaults.FIELD_TYPE)); writer.addDocument(doc); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 11ac56f664ad3..4c6a1de218884 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -29,9 +29,11 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.Version; import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; @@ -322,9 +324,9 @@ private List doc(MappedFieldType ft, String... values) { List doc = new ArrayList(); for (String v : values) { BytesRef bytes = new BytesRef(v); - doc.add(new SortedSetDocValuesField("string", bytes)); + doc.add(new SortedSetDocValuesField(ft.name(), bytes)); if (ft.isSearchable()) { - doc.add(new KeywordField("string", bytes, KeywordFieldMapper.Defaults.FIELD_TYPE)); + doc.add(new KeywordField(ft.name(), bytes, KeywordFieldMapper.Defaults.FIELD_TYPE)); } } return doc; @@ -650,7 +652,7 @@ public void testNumericIncludeExclude() throws Exception { } public void testStringTermsAggregator() throws Exception { - MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("field"); + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("field", randomBoolean(), true, null); BiFunction> luceneFieldFactory = (val, mv) -> { List result = new ArrayList<>(2); if (mv) { @@ -665,8 +667,7 @@ public void testStringTermsAggregator() throws Exception { }; termsAggregator(ValueType.STRING, fieldType, i -> Integer.toString(i), String::compareTo, luceneFieldFactory); - termsAggregatorWithNestedMaxAgg(ValueType.STRING, fieldType, i -> Integer.toString(i), - val -> new SortedDocValuesField("field", new BytesRef(val))); + termsAggregatorWithNestedMaxAgg(ValueType.STRING, fieldType, i -> Integer.toString(i), val -> luceneFieldFactory.apply(val, false)); } public void testLongTermsAggregator() throws Exception { @@ -680,7 +681,7 @@ public void testLongTermsAggregator() throws Exception { MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.LONG); termsAggregator(ValueType.LONG, fieldType, Integer::longValue, Long::compareTo, luceneFieldFactory); - termsAggregatorWithNestedMaxAgg(ValueType.LONG, fieldType, Integer::longValue, val -> new NumericDocValuesField("field", val)); + termsAggregatorWithNestedMaxAgg(ValueType.LONG, fieldType, Integer::longValue, val -> luceneFieldFactory.apply(val, false)); } public void testDoubleTermsAggregator() throws Exception { @@ -695,7 +696,7 @@ public void testDoubleTermsAggregator() throws Exception { = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.DOUBLE); termsAggregator(ValueType.DOUBLE, fieldType, Integer::doubleValue, Double::compareTo, luceneFieldFactory); termsAggregatorWithNestedMaxAgg(ValueType.DOUBLE, fieldType, Integer::doubleValue, - val -> new NumericDocValuesField("field", Double.doubleToRawLongBits(val))); + val -> luceneFieldFactory.apply(val, false)); } public void testIpTermsAggregator() throws Exception { @@ -862,7 +863,7 @@ private void termsAggregator(ValueType valueType, MappedFieldType fieldType, private void termsAggregatorWithNestedMaxAgg(ValueType valueType, MappedFieldType fieldType, Function valueFactory, - Function luceneFieldFactory) throws Exception { + Function> luceneFieldFactory) throws Exception { final Map counts = new HashMap<>(); int numTerms = scaledRandomIntBetween(8, 128); for (int i = 0; i < numTerms; i++) { @@ -872,8 +873,8 @@ private void termsAggregatorWithNestedMaxAgg(ValueType valueType, MappedFiel try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { for (Map.Entry entry : counts.entrySet()) { - Document document = new Document(); - document.add(luceneFieldFactory.apply(entry.getKey())); + List document = new ArrayList<>(); + document.addAll(luceneFieldFactory.apply(entry.getKey())); document.add(new NumericDocValuesField("value", entry.getValue())); indexWriter.addDocument(document); } @@ -906,7 +907,7 @@ private void termsAggregatorWithNestedMaxAgg(ValueType valueType, MappedFiel MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType("value", NumberFieldMapper.NumberType.LONG); - AggregationContext context = createAggregationContext(indexSearcher, null, fieldType, fieldType2); + AggregationContext context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType, fieldType2); Aggregator aggregator = createAggregator(aggregationBuilder, context); aggregator.preCollection(); indexSearcher.search(new MatchAllDocsQuery(), aggregator); @@ -1096,19 +1097,21 @@ public void testIpField() throws Exception { } public void testNestedTermsAgg() throws Exception { + MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType("field1", randomBoolean(), true, null); + MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("field2", randomBoolean(), true, null); try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { - Document document = new Document(); - document.add(new SortedDocValuesField("field1", new BytesRef("a"))); - document.add(new SortedDocValuesField("field2", new BytesRef("b"))); + List document = new ArrayList<>(); + document.addAll(doc(fieldType1, "a")); + document.addAll(doc(fieldType2, "b")); indexWriter.addDocument(document); - document = new Document(); - document.add(new SortedDocValuesField("field1", new BytesRef("c"))); - document.add(new SortedDocValuesField("field2", new BytesRef("d"))); + document = new ArrayList<>(); + document.addAll(doc(fieldType1, "c")); + document.addAll(doc(fieldType2, "d")); indexWriter.addDocument(document); - document = new Document(); - document.add(new SortedDocValuesField("field1", new BytesRef("e"))); - document.add(new SortedDocValuesField("field2", new BytesRef("f"))); + document = new ArrayList<>(); + document.addAll(doc(fieldType1, "e")); + document.addAll(doc(fieldType2, "f")); indexWriter.addDocument(document); try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) { IndexSearcher indexSearcher = newIndexSearcher(indexReader); @@ -1126,10 +1129,7 @@ public void testNestedTermsAgg() throws Exception { .field("field2") .order(BucketOrder.key(true)) ); - MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType("field1"); - MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("field2"); - - AggregationContext context = createAggregationContext(indexSearcher, null, fieldType1, fieldType2); + AggregationContext context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType1, fieldType2); Aggregator aggregator = createAggregator(aggregationBuilder, context); aggregator.preCollection(); indexSearcher.search(new MatchAllDocsQuery(), aggregator); @@ -1314,12 +1314,14 @@ public void testWithNestedAggregations() throws IOException { } public void testHeisenpig() throws IOException { + MappedFieldType nestedFieldType = new NumberFieldMapper.NumberFieldType("number", NumberFieldMapper.NumberType.LONG); + KeywordFieldType animalFieldType = new KeywordFieldType("str", randomBoolean(), true, null); try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { String[] tags = new String[] {"danger", "fluffiness"}; - indexWriter.addDocuments(generateAnimalDocsWithNested("1", "sheep", tags, new int[] {1, 10})); - indexWriter.addDocuments(generateAnimalDocsWithNested("2", "cow", tags, new int[] {3, 1})); - indexWriter.addDocuments(generateAnimalDocsWithNested("3", "pig", tags, new int[] {100, 1})); + indexWriter.addDocuments(generateAnimalDocsWithNested("1", animalFieldType, "sheep", tags, new int[] {1, 10})); + indexWriter.addDocuments(generateAnimalDocsWithNested("2", animalFieldType, "cow", tags, new int[] {3, 1})); + indexWriter.addDocuments(generateAnimalDocsWithNested("3", animalFieldType, "pig", tags, new int[] {100, 1})); indexWriter.commit(); NestedAggregationBuilder nested = new NestedAggregationBuilder("nested", "nested_object") .subAggregation( @@ -1331,12 +1333,10 @@ public void testHeisenpig() throws IOException { .shardSize(10) .size(10) .order(BucketOrder.aggregation("nested>max_number", false)); - MappedFieldType nestedFieldType = new NumberFieldMapper.NumberFieldType("number", NumberFieldMapper.NumberType.LONG); - MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("str"); try (IndexReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { StringTerms result = searchAndReduce(newSearcher(indexReader, false, true), // match root document only - new DocValuesFieldExistsQuery(PRIMARY_TERM_NAME), terms, fieldType, nestedFieldType); + Queries.newNonNestedFilter(Version.CURRENT), terms, animalFieldType, nestedFieldType); assertThat(result.getBuckets().get(0).getKeyAsString(), equalTo("pig")); assertThat(result.getBuckets().get(0).docCount, equalTo(1L)); assertThat(((InternalMax) (((InternalNested)result.getBuckets().get(0).getAggregations().get("nested")) @@ -1409,15 +1409,19 @@ public void testThreeLayerStringViaMap() throws IOException { } private void threeLayerStringTestCase(String executionHint) throws IOException { + MappedFieldType ift = new KeywordFieldType("i", randomBoolean(), true, null); + MappedFieldType jft = new KeywordFieldType("j", randomBoolean(), true, null); + MappedFieldType kft = new KeywordFieldType("k", randomBoolean(), true, null); + try (Directory dir = newDirectory()) { try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) { for (int i = 0; i < 10; i++) { for (int j = 0; j < 10; j++) { for (int k = 0; k < 10; k++) { - Document d = new Document(); - d.add(new SortedDocValuesField("i", new BytesRef(Integer.toString(i)))); - d.add(new SortedDocValuesField("j", new BytesRef(Integer.toString(j)))); - d.add(new SortedDocValuesField("k", new BytesRef(Integer.toString(k)))); + List d = new ArrayList<>(); + d.addAll(doc(ift, Integer.toString(i))); + d.addAll(doc(jft, Integer.toString(j))); + d.addAll(doc(kft, Integer.toString(k))); writer.addDocument(d); } } @@ -1427,8 +1431,7 @@ private void threeLayerStringTestCase(String executionHint) throws IOException { TermsAggregationBuilder request = new TermsAggregationBuilder("i").field("i").executionHint(executionHint) .subAggregation(new TermsAggregationBuilder("j").field("j").executionHint(executionHint) .subAggregation(new TermsAggregationBuilder("k").field("k").executionHint(executionHint))); - StringTerms result = searchAndReduce(searcher, new MatchAllDocsQuery(), request, - keywordField("i"), keywordField("j"), keywordField("k")); + StringTerms result = searchAndReduce(searcher, new MatchAllDocsQuery(), request, ift, jft, kft); for (int i = 0; i < 10; i++) { StringTerms.Bucket iBucket = result.getBucketByKey(Integer.toString(i)); assertThat(iBucket.getDocCount(), equalTo(100L)); @@ -1717,11 +1720,17 @@ private List generateDocsWithNested(String id, int value, int[] nested return documents; } - private List generateAnimalDocsWithNested(String id, String animal, String[] tags, int[] nestedValues) { - List documents = new ArrayList<>(); + private List> generateAnimalDocsWithNested( + String id, + KeywordFieldType animalFieldType, + String animal, + String[] tags, + int[] nestedValues + ) { + List> documents = new ArrayList<>(); for (int i = 0; i < tags.length; i++) { - Document document = new Document(); + List document = new ArrayList<>(); document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "__nested_object", TypeFieldMapper.Defaults.NESTED_FIELD_TYPE)); @@ -1730,9 +1739,9 @@ private List generateAnimalDocsWithNested(String id, String animal, St documents.add(document); } - Document document = new Document(); + List document = new ArrayList<>(); document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE)); - document.add(new SortedDocValuesField("str", new BytesRef(animal))); + document.addAll(doc(animalFieldType, animal)); document.add(sequenceIDFields.primaryTerm); documents.add(document); diff --git a/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/RollupResponseTranslationTests.java b/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/RollupResponseTranslationTests.java index 75a56879b88a0..73359d26aed11 100644 --- a/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/RollupResponseTranslationTests.java +++ b/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/RollupResponseTranslationTests.java @@ -1165,6 +1165,7 @@ private Document stringValueDoc(String stringValue) { private Document stringValueRollupDoc(String stringValue, long docCount) { Document doc = new Document(); doc.add(new SortedSetDocValuesField("stringfield.terms." + RollupField.VALUE, new BytesRef(stringValue))); + doc.add(new Field("stringfield.terms." + RollupField.VALUE, new BytesRef(stringValue), KeywordFieldMapper.Defaults.FIELD_TYPE)); doc.add(new SortedNumericDocValuesField("stringfield.terms." + RollupField.COUNT_FIELD, docCount)); return doc; } diff --git a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java index b25b299b78d0c..e322fccd7a572 100644 --- a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java +++ b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java @@ -388,7 +388,7 @@ private void testCase(Query query, TermsAggregationBuilder aggregationBuilder, try { MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("value_field"); - MappedFieldType groupFieldType = new KeywordFieldMapper.KeywordFieldType("group_id"); + MappedFieldType groupFieldType = new KeywordFieldMapper.KeywordFieldType("group_id", false, true, null); MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType("sort_field", fieldNumberType); Terms terms = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), aggregationBuilder, diff --git a/x-pack/qa/runtime-fields/build.gradle b/x-pack/qa/runtime-fields/build.gradle index c6c8f3a88c6fa..ee0ef3c4cc284 100644 --- a/x-pack/qa/runtime-fields/build.gradle +++ b/x-pack/qa/runtime-fields/build.gradle @@ -72,7 +72,8 @@ subprojects { 'search.aggregation/10_histogram/*', 'suggest/50_completion_with_multi_fields/Search by suggestion on geofield-hash on sub field should work', // Runtime fields don't have global ords - 'search.aggregation/20_terms/string profiler via global ordinals', + 'search.aggregation/20_terms/string profiler via global ordinals filters implementation', + 'search.aggregation/20_terms/string profiler via global ordinals native implementation', 'search.aggregation/20_terms/Global ordinals are loaded with the global_ordinals execution hint', 'search.aggregation/170_cardinality_metric/profiler string', //dynamic template causes a type _doc to be created, these tests use another type but only one type is allowed