elastic · polyfractal · Jan 29, 2020 · Jan 16, 2020 · Jan 21, 2020 · Jan 21, 2020
diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -381,7 +381,8 @@ private void registerAggregations(List<SearchPlugin> plugins) {
                     .addResultReader(StringTerms.NAME, StringTerms::new)
                     .addResultReader(UnmappedTerms.NAME, UnmappedTerms::new)
                     .addResultReader(LongTerms.NAME, LongTerms::new)
-                    .addResultReader(DoubleTerms.NAME, DoubleTerms::new));
+                    .addResultReader(DoubleTerms.NAME, DoubleTerms::new)
+            .setAggregatorRegistrar(TermsAggregationBuilder::registerAggregators));
         registerAggregation(new AggregationSpec(RareTermsAggregationBuilder.NAME, RareTermsAggregationBuilder::new,
                 RareTermsAggregationBuilder::parse)
                     .addResultReader(StringRareTerms.NAME, StringRareTerms::new)

diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java b/...main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java
@@ -42,12 +42,14 @@
 import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
 import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
 import org.elasticsearch.search.aggregations.support.ValuesSourceParserHelper;
+import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
 import org.elasticsearch.search.aggregations.support.ValuesSourceType;
 
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 public class TermsAggregationBuilder extends ValuesSourceAggregationBuilder<TermsAggregationBuilder>
         implements MultiBucketAggregationBuilder {
@@ -100,6 +102,13 @@ public static AggregationBuilder parse(String aggregationName, XContentParser pa
         return PARSER.parse(parser, new TermsAggregationBuilder(aggregationName), null);
     }
 
+    private static AtomicBoolean wasRegistered = new AtomicBoolean(false);
+    public static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) {
+        if (wasRegistered.compareAndSet(false, true) == true) {
+            TermsAggregatorFactory.registerAggregators(valuesSourceRegistry);
+        }
+    }
+
     private BucketOrder order = BucketOrder.compound(BucketOrder.count(false)); // automatically adds tie-breaker key asc order
     private IncludeExclude includeExclude = null;
     private String executionHint = null;

diff --git a/.../main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/.../main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorSupplier.java b/...main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorSupplier.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
+import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.aggregations.support.AggregatorSupplier;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+interface TermsAggregatorSupplier extends AggregatorSupplier {
+    Aggregator build(String name,
+                     AggregatorFactories factories,
+                     ValuesSource valuesSource,
+                     BucketOrder order,
+                     DocValueFormat format,
+                     TermsAggregator.BucketCountThresholds bucketCountThresholds,
+                     IncludeExclude includeExclude,
+                     String executionHint,
+                     SearchContext context,
+                     Aggregator parent,
+                     Aggregator.SubAggCollectionMode subAggCollectMode,
+                     boolean showTermDocCountError,
+                     List<PipelineAggregator> pipelineAggregators,
+                     Map<String, Object> metaData) throws IOException;
+}
diff --git a/...t/java/org/elasticsearch/search/aggregations/bucket/terms/BinaryTermsAggregatorTests.java b/...t/java/org/elasticsearch/search/aggregations/bucket/terms/BinaryTermsAggregatorTests.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.RegExp;
+import org.elasticsearch.common.Numbers;
+import org.elasticsearch.index.mapper.BinaryFieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.AggregationExecutionException;
+import org.elasticsearch.search.aggregations.AggregatorTestCase;
+import org.elasticsearch.search.aggregations.support.ValueType;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Consumer;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class BinaryTermsAggregatorTests extends AggregatorTestCase {
+    private static final String BINARY_FIELD = "binary";
+
+    private static final List<Long> dataset;
+    static {
+        List<Long> d = new ArrayList<>(45);
+        for (int i = 0; i < 10; i++) {
+            for (int j = 0; j < i; j++) {
+                d.add((long) i);
+            }
+        }
+        dataset  = d;
+    }
+
+    public void testMatchNoDocs() throws IOException {
+        testBothCases(new MatchNoDocsQuery(), dataset,
+            aggregation -> aggregation.field(BINARY_FIELD),
+            agg -> assertEquals(0, agg.getBuckets().size()), ValueType.STRING
+        );
+    }
+
+    public void testMatchAllDocs() throws IOException {
+        Query query = new MatchAllDocsQuery();
+
+        testBothCases(query, dataset,
+            aggregation -> aggregation.field(BINARY_FIELD),
+            agg -> {
+                assertEquals(9, agg.getBuckets().size());
+                for (int i = 0; i < 9; i++) {
+                    StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i);
+                    byte[] bytes = Numbers.longToBytes(9L - i);
+                    String bytesAsString = (String) DocValueFormat.BINARY.format(new BytesRef(bytes));
+                    assertThat(bucket.getKey(), equalTo(bytesAsString));
+                    assertThat(bucket.getDocCount(), equalTo(9L - i));
+                }
+            }, null);
+    }
+
+    public void testBadIncludeExclude() throws IOException {
+        IncludeExclude includeExclude = new IncludeExclude(new RegExp("foo"), null);
+
+        // Make sure the include/exclude fails regardless of how the user tries to type hint the agg
+        AggregationExecutionException e = expectThrows(AggregationExecutionException.class,
+            () -> testBothCases(new MatchNoDocsQuery(), dataset,
+                aggregation -> aggregation.field(BINARY_FIELD).includeExclude(includeExclude).format("yyyy-MM-dd"),
+                agg -> fail("test should have failed with exception"), null // default, no hint
+            ));
+        assertThat(e.getMessage(), equalTo("Aggregation [_name] cannot support regular expression style include/exclude settings as " +
+            "they can only be applied to string fields. Use an array of values for include/exclude clauses"));
+
+        e = expectThrows(AggregationExecutionException.class,
+            () -> testBothCases(new MatchNoDocsQuery(), dataset,
+                aggregation -> aggregation.field(BINARY_FIELD).includeExclude(includeExclude).format("yyyy-MM-dd"),
+                agg -> fail("test should have failed with exception"), ValueType.STRING // string type hint
+            ));
+        assertThat(e.getMessage(), equalTo("Aggregation [_name] cannot support regular expression style include/exclude settings as " +
+            "they can only be applied to string fields. Use an array of values for include/exclude clauses"));
+
+        e = expectThrows(AggregationExecutionException.class, () -> testBothCases(new MatchNoDocsQuery(), dataset,
+            aggregation -> aggregation.field(BINARY_FIELD).includeExclude(includeExclude),
+            agg -> fail("test should have failed with exception"), ValueType.NUMERIC // numeric type hint
+        ));
+        assertThat(e.getMessage(), equalTo("Aggregation [_name] cannot support regular expression style include/exclude settings as " +
+            "they can only be applied to string fields. Use an array of values for include/exclude clauses"));
+    }
+
+    private void testSearchCase(Query query, List<Long> dataset,
+                                Consumer<TermsAggregationBuilder> configure,
+                                Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
+        executeTestCase(false, query, dataset, configure, verify, valueType);
+    }
+
+    private void testSearchAndReduceCase(Query query, List<Long> dataset,
+                                         Consumer<TermsAggregationBuilder> configure,
+                                         Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
+        executeTestCase(true, query, dataset, configure, verify, valueType);
+    }
+
+    private void testBothCases(Query query, List<Long> dataset,
+                               Consumer<TermsAggregationBuilder> configure,
+                               Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
+        testSearchCase(query, dataset, configure, verify, valueType);
+        testSearchAndReduceCase(query, dataset, configure, verify, valueType);
+    }
+
+    private void executeTestCase(boolean reduced, Query query, List<Long> dataset,
+                                 Consumer<TermsAggregationBuilder> configure,
+                                 Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
+
+        try (Directory directory = newDirectory()) {
+            try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
+                Document document = new Document();
+                for (Long value : dataset) {
+                    if (frequently()) {
+                        indexWriter.commit();
+                    }
+
+                    document.add(new BinaryFieldMapper.CustomBinaryDocValuesField(BINARY_FIELD, Numbers.longToBytes(value)));
+                    indexWriter.addDocument(document);
+                    document.clear();
+                }
+            }
+
+            try (IndexReader indexReader = DirectoryReader.open(directory)) {
+                IndexSearcher indexSearcher = newIndexSearcher(indexReader);
+
+                TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name");
+                if (valueType != null) {
+                    aggregationBuilder.userValueTypeHint(valueType);
+                }
+                if (configure != null) {
+                    configure.accept(aggregationBuilder);
+                }
+
+                MappedFieldType binaryFieldType = new BinaryFieldMapper.Builder(BINARY_FIELD).fieldType();
+                binaryFieldType.setName(BINARY_FIELD);
+                binaryFieldType.setHasDocValues(true);
+
+                InternalMappedTerms rareTerms;
+                if (reduced) {
+                    rareTerms = searchAndReduce(indexSearcher, query, aggregationBuilder, binaryFieldType);
+                } else {
+                    rareTerms = search(indexSearcher, query, aggregationBuilder, binaryFieldType);
+                }
+                verify.accept(rareTerms);
+            }
+        }
+    }
+
+}