elastic · csoulios · Jun 28, 2021 · Jun 28, 2021 · benwtrent · Jun 28, 2021
diff --git a/...csearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java b/...csearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java
@@ -86,9 +86,10 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
                             } else {
                                 collectBucket(sub, doc, bucketOrd);
                             }
-                            // We have added the document already. We should increment doc_count by count - 1
-                            // so that we have added it count times.
-                            incrementBucketDocCount(bucketOrd, count - 1);
+                            // We have added the document already and we have incremented bucket doc_count
+                            // by _doc_count times. To compensate for this, we should increment doc_count by
+                            // (count - _doc_count) so that we have added it count times.
+                            incrementBucketDocCount(bucketOrd, count - docCountProvider.getDocCount(doc));
                         }
                         previousKey = key;
                     }

diff --git a/...ch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java b/...ch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java
@@ -7,17 +7,12 @@
 
 package org.elasticsearch.xpack.analytics.aggregations.bucket.histogram;
 
-import static java.util.Collections.singleton;
-import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;
-
-import java.util.Collections;
-import java.util.List;
-
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.store.Directory;
+import org.elasticsearch.index.mapper.CustomTermFreqField;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.plugins.SearchPlugin;
 import org.elasticsearch.search.aggregations.AggregationBuilder;
@@ -30,6 +25,12 @@
 import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
 import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
 
+import java.util.Collections;
+import java.util.List;
+
+import static java.util.Collections.singleton;
+import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;
+
 public class HistoBackedHistogramAggregatorTests extends AggregatorTestCase {
 
     private static final String FIELD_NAME = "field";
@@ -99,6 +100,27 @@ public void testMinDocCount() throws Exception {
         }
     }
 
+    public void testHistogramWithDocCountField() throws Exception {
+        try (Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
+            w.addDocument(List.of(
+                // Add the _doc_dcount field
+                new CustomTermFreqField("_doc_count", "_doc_count", 8),
+                histogramFieldDocValues(FIELD_NAME, new double[] {0, 1.2, 10, 10, 12, 24, 24, 24}))
+            );
+
+            HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
+                .field(FIELD_NAME)
+                .interval(100);
+
+            try (IndexReader reader = w.getReader()) {
+                IndexSearcher searcher = new IndexSearcher(reader);
+                InternalHistogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, defaultFieldType(FIELD_NAME));
+                assertTrue(AggregationInspectionHelper.hasValue(histogram));
+                assertEquals(8, histogram.getBuckets().get(0).getDocCount());
+            }
+        }
+    }
+
     public void testRandomOffset() throws Exception {
         try (Directory dir = newDirectory();
              RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {

diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml
@@ -74,10 +74,46 @@ setup:
                 field: "latency"
                 interval: 0.3
 
-
   - match: { hits.total.value: 2 }
   - length: { aggregations.histo.buckets: 2 }
   - match: { aggregations.histo.buckets.0.key: 0.0 }
   - match: { aggregations.histo.buckets.0.doc_count: 20 }
   - match: { aggregations.histo.buckets.1.key: 0.3 }
   - match: { aggregations.histo.buckets.1.doc_count: 60 }
+
+---
+"Histogram with _doc_count":
+  - do:
+      indices.create:
+        index: "histo_with_doc_count"
+        body:
+          mappings:
+            properties:
+              latency:
+                type: "histogram"
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      bulk:
+        index: "histo_with_doc_count"
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"_doc_count": 50, "latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 5]}}'
+          - '{"index": {}}'
+          - '{"_doc_count": 10, "latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [1, 1, 1, 1, 6]}}'
+  - do:
+      search:
+        index: "histo_with_doc_count"
+        body:
+          size: 0
+          aggs:
+            histo:
+              histogram:
+                field: "latency"
+                interval: 1
+
+  - match: { hits.total.value: 2 }
+  - length: { aggregations.histo.buckets: 1 }
+  - match: { aggregations.histo.buckets.0.key: 0.0 }
+  - match: { aggregations.histo.buckets.0.doc_count: 60 }