From 92e63cfca5833696127bcddfb917981b342260ab Mon Sep 17 00:00:00 2001 From: Paul Sanwald Date: Fri, 17 Aug 2018 11:43:16 -0400 Subject: [PATCH] backport of #32723 --- .../AutoDateHistogramAggregationBuilder.java | 2 +- .../histogram/InternalAutoDateHistogram.java | 8 +- .../InternalAutoDateHistogramTests.java | 111 +++++++++++++++--- 3 files changed, 97 insertions(+), 24 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregationBuilder.java index 75bcd8689cea6..a07e3302f20c0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregationBuilder.java @@ -170,7 +170,7 @@ public int getNumBuckets() { return new AutoDateHistogramAggregatorFactory(name, config, numBuckets, roundings, context, parent, subFactoriesBuilder, metaData); } - private static Rounding createRounding(DateTimeUnit interval, DateTimeZone timeZone) { + static Rounding createRounding(DateTimeUnit interval, DateTimeZone timeZone) { Rounding.Builder tzRoundingBuilder = Rounding.builder(interval); if (timeZone != null) { tzRoundingBuilder.timeZone(timeZone); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogram.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogram.java index 6a78ca6724988..7bc2b9a317838 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogram.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogram.java @@ -418,7 +418,7 @@ private BucketReduceResult addEmptyBuckets(BucketReduceResult currentResult, Red return currentResult; } int roundingIdx = getAppropriateRounding(list.get(0).key, list.get(list.size() - 1).key, currentResult.roundingIdx, - bucketInfo.roundingInfos); + bucketInfo.roundingInfos, targetBuckets); RoundingInfo roundingInfo = bucketInfo.roundingInfos[roundingIdx]; Rounding rounding = roundingInfo.rounding; // merge buckets using the new rounding @@ -447,8 +447,8 @@ private BucketReduceResult addEmptyBuckets(BucketReduceResult currentResult, Red return new BucketReduceResult(list, roundingInfo, roundingIdx); } - private int getAppropriateRounding(long minKey, long maxKey, int roundingIdx, - RoundingInfo[] roundings) { + static int getAppropriateRounding(long minKey, long maxKey, int roundingIdx, + RoundingInfo[] roundings, int targetBuckets) { if (roundingIdx == roundings.length - 1) { return roundingIdx; } @@ -480,7 +480,7 @@ private int getAppropriateRounding(long minKey, long maxKey, int roundingIdx, currentKey = currentRounding.nextRoundingValue(currentKey); } currentRoundingIdx++; - } while (requiredBuckets > (targetBuckets * roundings[roundingIdx].getMaximumInnerInterval()) + } while (requiredBuckets > (targetBuckets * roundings[currentRoundingIdx - 1].getMaximumInnerInterval()) && currentRoundingIdx < roundings.length); // The loop will increase past the correct rounding index here so we // need to subtract one to get the rounding index we need diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java index a14fca63154d7..b7c5bf03ac552 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.histogram; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.rounding.DateTimeUnit; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation; @@ -28,7 +29,11 @@ import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.test.InternalMultiBucketAggregationTestCase; import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -39,6 +44,8 @@ import static org.elasticsearch.common.unit.TimeValue.timeValueHours; import static org.elasticsearch.common.unit.TimeValue.timeValueMinutes; import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; +import static org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder.createRounding; +import static org.hamcrest.Matchers.equalTo; public class InternalAutoDateHistogramTests extends InternalMultiBucketAggregationTestCase { @@ -56,11 +63,12 @@ protected InternalAutoDateHistogram createTestInstance(String name, List pipelineAggregators, Map metaData, InternalAggregations aggregations) { - + roundingInfos = AutoDateHistogramAggregationBuilder.buildRoundings(null); int nbBuckets = randomNumberOfBuckets(); int targetBuckets = randomIntBetween(1, nbBuckets * 2 + 1); List buckets = new ArrayList<>(nbBuckets); + long startingDate = System.currentTimeMillis(); long interval = randomIntBetween(1, 3); @@ -72,23 +80,41 @@ protected InternalAutoDateHistogram createTestInstance(String name, } InternalAggregations subAggregations = new InternalAggregations(Collections.emptyList()); BucketInfo bucketInfo = new BucketInfo(roundingInfos, randomIntBetween(0, roundingInfos.length - 1), subAggregations); + return new InternalAutoDateHistogram(name, buckets, targetBuckets, bucketInfo, format, pipelineAggregators, metaData); + } + /* + This test was added to reproduce a bug where getAppropriateRounding was only ever using the first innerIntervals + passed in, instead of using the interval associated with the loop. + */ + public void testGetAppropriateRoundingUsesCorrectIntervals() { + RoundingInfo[] roundings = new RoundingInfo[6]; + DateTimeZone timeZone = DateTimeZone.UTC; + // Since we pass 0 as the starting index to getAppropriateRounding, we'll also use + // an innerInterval that is quite large, such that targetBuckets * roundings[i].getMaximumInnerInterval() + // will be larger than the estimate. + roundings[0] = new RoundingInfo(createRounding(DateTimeUnit.SECOND_OF_MINUTE, timeZone), + 1000L, 1000); + roundings[1] = new RoundingInfo(createRounding(DateTimeUnit.MINUTES_OF_HOUR, timeZone), + 60 * 1000L, 1, 5, 10, 30); + roundings[2] = new RoundingInfo(createRounding(DateTimeUnit.HOUR_OF_DAY, timeZone), + 60 * 60 * 1000L, 1, 3, 12); - return new InternalAutoDateHistogram(name, buckets, targetBuckets, bucketInfo, format, pipelineAggregators, metaData); + OffsetDateTime timestamp = Instant.parse("2018-01-01T00:00:01.000Z").atOffset(ZoneOffset.UTC); + // We want to pass a roundingIdx of zero, because in order to reproduce this bug, we need the function + // to increment the rounding (because the bug was that the function would not use the innerIntervals + // from the new rounding. + int result = InternalAutoDateHistogram.getAppropriateRounding(timestamp.toEpochSecond()*1000, + timestamp.plusDays(1).toEpochSecond()*1000, 0, roundings, 25); + assertThat(result, equalTo(2)); } @Override protected void assertReduced(InternalAutoDateHistogram reduced, List inputs) { - int roundingIdx = 0; - for (InternalAutoDateHistogram histogram : inputs) { - if (histogram.getBucketInfo().roundingIdx > roundingIdx) { - roundingIdx = histogram.getBucketInfo().roundingIdx; - } - } - RoundingInfo roundingInfo = roundingInfos[roundingIdx]; long lowest = Long.MAX_VALUE; long highest = 0; + for (InternalAutoDateHistogram histogram : inputs) { for (Histogram.Bucket bucket : histogram.getBuckets()) { long bucketKey = ((DateTime) bucket.getKey()).getMillis(); @@ -100,35 +126,72 @@ protected void assertReduced(InternalAutoDateHistogram reduced, List= 0; j--) { + int interval = roundingInfo.innerIntervals[j]; + if (normalizedDuration / interval < reduced.getBuckets().size()) { + innerIntervalToUse = interval; + innerIntervalIndex = j; + } } } + + long intervalInMillis = innerIntervalToUse * roundingInfo.getRoughEstimateDurationMillis(); + int bucketCount = getBucketCount(lowest, highest, roundingInfo, intervalInMillis); + + //Next, if our bucketCount is still above what we need, we'll go back and determine the interval + // based on a size calculation. + if (bucketCount > reduced.getBuckets().size()) { + for (int i = innerIntervalIndex; i < roundingInfo.innerIntervals.length; i++) { + long newIntervalMillis = roundingInfo.innerIntervals[i] * roundingInfo.getRoughEstimateDurationMillis(); + if (getBucketCount(lowest, highest, roundingInfo, newIntervalMillis) <= reduced.getBuckets().size()) { + innerIntervalToUse = roundingInfo.innerIntervals[i]; + intervalInMillis = innerIntervalToUse * roundingInfo.getRoughEstimateDurationMillis(); + } + } + } + Map expectedCounts = new TreeMap<>(); - long intervalInMillis = innerIntervalToUse*roundingInfo.getRoughEstimateDurationMillis(); for (long keyForBucket = roundingInfo.rounding.round(lowest); - keyForBucket <= highest; + keyForBucket <= roundingInfo.rounding.round(highest); keyForBucket = keyForBucket + intervalInMillis) { expectedCounts.put(keyForBucket, 0L); + // Iterate through the input buckets, and for each bucket, determine if it's inside + // the range of the bucket in the outer loop. if it is, add the doc count to the total + // for that bucket. + for (InternalAutoDateHistogram histogram : inputs) { for (Histogram.Bucket bucket : histogram.getBuckets()) { - long bucketKey = ((DateTime) bucket.getKey()).getMillis(); - long roundedBucketKey = roundingInfo.rounding.round(bucketKey); + long roundedBucketKey = roundingInfo.rounding.round(((DateTime) bucket.getKey()).getMillis()); + long docCount = bucket.getDocCount(); if (roundedBucketKey >= keyForBucket && roundedBucketKey < keyForBucket + intervalInMillis) { - long count = bucket.getDocCount(); expectedCounts.compute(keyForBucket, - (key, oldValue) -> (oldValue == null ? 0 : oldValue) + count); + (key, oldValue) -> (oldValue == null ? 0 : oldValue) + docCount); } } } } + // If there is only a single bucket, and we haven't added it above, add a bucket with no documents. + // this step is necessary because of the roundedBucketKey < keyForBucket + intervalInMillis above. + if (roundingInfo.rounding.round(lowest) == roundingInfo.rounding.round(highest) && expectedCounts.isEmpty()) { + expectedCounts.put(roundingInfo.rounding.round(lowest), 0L); + } + + // pick out the actual reduced values to the make the assertion more readable Map actualCounts = new TreeMap<>(); for (Histogram.Bucket bucket : reduced.getBuckets()) { actualCounts.compute(((DateTime) bucket.getKey()).getMillis(), @@ -137,6 +200,16 @@ protected void assertReduced(InternalAutoDateHistogram reduced, List instanceReader() { return InternalAutoDateHistogram::new;