Skip to content

Commit

Permalink
Implement buildEmptyAggregations for MultiTermsAggregator (#7089) (#7318
Browse files Browse the repository at this point in the history
)

* Implement buildEmptyAggregations for MultiTermsAggregator (#7089)

Signed-off-by: Austin Lee <[email protected]>

* Address Spotless check issue

Signed-off-by: Austin Lee <[email protected]>

* Add a unit test for MultiTermsAggregator.buildEmptyAggregations (#7089)

Signed-off-by: Austin Lee <[email protected]>

* Update changelog

Update version check and reason

Signed-off-by: Austin Lee <[email protected]>

---------

Signed-off-by: Austin Lee <[email protected]>
  • Loading branch information
austintlee authored Jun 29, 2023
1 parent 976048d commit 86f955c
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Enforce 512 byte document ID limit in bulk updates ([#8039](https://github.com/opensearch-project/OpenSearch/pull/8039))
- With only GlobalAggregation in request causes unnecessary wrapping with MultiCollector ([#8125](https://github.com/opensearch-project/OpenSearch/pull/8125))
- Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591))
- Fix NPE in multiterms aggregations involving empty buckets ([#7318](https://github.com/opensearch-project/OpenSearch/pull/7318))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -712,3 +712,51 @@ setup:
- match: { aggregations.m_terms.buckets.0.key: ["a", 1] }
- match: { aggregations.m_terms.buckets.0.key_as_string: "a|1" }
- match: { aggregations.m_terms.buckets.0.doc_count: 4 }

---
"aggregate over multi-terms test":
- skip:
version: "- 2.9.99"
reason: "multi_terms aggregation was introduced in 2.1.0, NPE bug checked by this test case will manifest in any version < 3.0"

- do:
bulk:
index: test_1
refresh: true
body:
- '{"index": {}}'
- '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-23"}'
- '{"index": {}}'
- '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-25"}'
- '{"index": {}}'
- '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-23"}'
- '{"index": {}}'
- '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-25"}'

- do:
search:
index: test_1
size: 0
body:
aggs:
histo:
date_histogram:
field: date
calendar_interval: day
aggs:
m_terms:
multi_terms:
terms:
- field: str
- field: ip

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2022-03-23T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.m_terms.buckets.0.key: ["a", "127.0.0.1"] }
- match: { aggregations.histo.buckets.0.m_terms.buckets.1.key: ["b", "127.0.0.1"] }
- match: { aggregations.histo.buckets.1.key_as_string: "2022-03-24T00:00:00.000Z" }
- length: { aggregations.histo.buckets.1.m_terms.buckets: 0 }
- match: { aggregations.histo.buckets.2.key_as_string: "2022-03-25T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.m_terms.buckets.0.key: [ "a", "127.0.0.1" ] }
- match: { aggregations.histo.buckets.2.m_terms.buckets.1.key: [ "b", "127.0.0.1" ] }
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,20 @@ InternalMultiTerms buildResult(long owningBucketOrd, long otherDocCount, Interna

@Override
public InternalAggregation buildEmptyAggregation() {
return null;
return new InternalMultiTerms(
name,
order,
order,
bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(),
metadata(),
bucketCountThresholds.getShardSize(),
showTermDocCountError,
0,
0,
formats,
Collections.emptyList()
);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,36 +28,51 @@
import org.opensearch.common.network.InetAddresses;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.MockPageCacheRecycler;
import org.opensearch.index.IndexService;
import org.opensearch.index.cache.IndexCache;
import org.opensearch.index.mapper.BooleanFieldMapper;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.GeoPointFieldMapper;
import org.opensearch.index.mapper.IpFieldMapper;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.NumberFieldMapper;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.shard.IndexShard;
import org.opensearch.indices.breaker.NoneCircuitBreakerService;
import org.opensearch.script.MockScriptEngine;
import org.opensearch.script.Script;
import org.opensearch.script.ScriptEngine;
import org.opensearch.script.ScriptModule;
import org.opensearch.script.ScriptService;
import org.opensearch.script.ScriptType;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.AggregationBuilder;
import org.opensearch.search.aggregations.Aggregator;
import org.opensearch.search.aggregations.AggregatorFactories;
import org.opensearch.search.aggregations.AggregatorTestCase;
import org.opensearch.search.aggregations.BucketOrder;
import org.opensearch.search.aggregations.CardinalityUpperBound;
import org.opensearch.search.aggregations.InternalAggregation;
import org.opensearch.search.aggregations.metrics.InternalMax;
import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig;
import org.opensearch.search.aggregations.support.ValueType;
import org.opensearch.search.aggregations.support.ValuesSourceType;
import org.opensearch.search.internal.SearchContext;
import org.opensearch.search.lookup.LeafDocLookup;
import org.opensearch.test.TestSearchContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.function.Consumer;
import java.util.function.Function;

Expand All @@ -68,8 +83,12 @@
import static java.util.stream.Collectors.toList;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class MultiTermsAggregatorTests extends AggregatorTestCase {
private static final String FIELD_NAME = "field";
Expand Down Expand Up @@ -852,6 +871,56 @@ public void testIncludeExclude() throws IOException {
);
}

public void testEmptyAggregations() throws IOException {
QueryShardContext queryShardContext = mock(QueryShardContext.class);
IndexShard indexShard = mock(IndexShard.class);
BigArrays bigArrays = new BigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService(), "");
IndexService indexService = mock(IndexService.class);
when(indexService.getShardOrNull(0)).thenReturn(indexShard);
IndexCache cache = mock(IndexCache.class);
when(cache.bitsetFilterCache()).thenReturn(null);
when(indexService.cache()).thenReturn(cache);
SearchContext context = new TestSearchContext(bigArrays, indexService);
when(indexService.newQueryShardContext(0, null, () -> 0L, null)).thenReturn(queryShardContext);
AggregatorFactories factories = AggregatorFactories.EMPTY;
boolean showTermDocCountError = true;
MultiTermsAggregator.InternalValuesSource internalValuesSources = mock(MultiTermsAggregator.InternalValuesSource.class);
DocValueFormat format = mock(DocValueFormat.class);
BucketOrder order = mock(BucketOrder.class);
Aggregator.SubAggCollectionMode collectMode = Aggregator.SubAggCollectionMode.BREADTH_FIRST;
TermsAggregator.BucketCountThresholds bucketCountThresholds = mock(TermsAggregator.BucketCountThresholds.class);
Aggregator parent = mock(Aggregator.class);
CardinalityUpperBound cardinality = CardinalityUpperBound.ONE;
Map<String, Object> metadata = new HashMap<>();
String k1 = UUID.randomUUID().toString();
String v1 = UUID.randomUUID().toString();
metadata.put(k1, v1);

MultiTermsAggregator mAgg = new MultiTermsAggregator(
AGG_NAME,
factories,
showTermDocCountError,
List.of(internalValuesSources),
List.of(format),
order,
collectMode,
bucketCountThresholds,
context,
parent,
cardinality,
metadata
);
InternalAggregation emptyAgg = mAgg.buildEmptyAggregation();

MatcherAssert.assertThat(emptyAgg.getName(), equalTo(AGG_NAME));
MatcherAssert.assertThat(emptyAgg, instanceOf(InternalMultiTerms.class));

InternalMultiTerms mt = (InternalMultiTerms) emptyAgg;
MatcherAssert.assertThat(mt.getMetadata().keySet(), contains(k1));
MatcherAssert.assertThat(mt.getMetadata().get(k1), equalTo(v1));
MatcherAssert.assertThat(mt.getBuckets(), empty());
}

private void testAggregation(
Query query,
List<MultiTermsValuesSourceConfig> terms,
Expand Down

0 comments on commit 86f955c

Please sign in to comment.