Skip to content

Commit

Permalink
Fix a sneaky bug in rare_terms (elastic#51868)
Browse files Browse the repository at this point in the history
When the `rare_terms` aggregation contained another aggregation it'd
break them. Most of the time. This happened because the process that it
uses to remove buckets that turn out not to be rare was incorrectly
merging results from multiple leaves. This'd cause array index out of
bounds issues. We didn't catch it in the test because the issue doesn't
happen on the very first bucket. And the tests generated data in such a
way that the first bucket always contained the rare terms. Randomizing
the order of the generated data fixed the test so it caught the issue.

Closes elastic#51020
  • Loading branch information
nik9000 committed Feb 5, 2020
1 parent f8c8a10 commit ce9e325
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -313,4 +313,50 @@ setup:
- match: { hits.total.value: 1 }
- length: { aggregations.long_terms.buckets: 0 }

---
"sub aggs":
- skip:
version: " - 7.99.99"
reason: Sub aggs fixed in 8.0 (to be backported to 7.6.1)

- do:
index:
refresh: true
index: test_1
id: 1
body: { "str" : "abc", "number": 1 }

- do:
index:
refresh: true
index: test_1
id: 2
body: { "str": "abc", "number": 2 }

- do:
index:
refresh: true
index: test_1
id: 3
body: { "str": "bcd", "number": 3 }

- do:
search:
body:
size: 0
aggs:
str_terms:
rare_terms:
field: str
max_doc_count: 1
aggs:
max_n:
max:
field: number

- match: { hits.total.value: 3 }
- length: { aggregations.str_terms.buckets: 1 }
- match: { aggregations.str_terms.buckets.0.key: "bcd" }
- is_false: aggregations.str_terms.buckets.0.key_as_string
- match: { aggregations.str_terms.buckets.0.doc_count: 1 }
- match: { aggregations.str_terms.buckets.0.max_n.value: 3.0 }
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ public abstract class AbstractRareTermsAggregator<T extends ValuesSource,
protected final U includeExclude;

MergingBucketsDeferringCollector deferringCollector;
LeafBucketCollector subCollectors;
final SetBackedScalingCuckooFilter filter;

AbstractRareTermsAggregator(String name, AggregatorFactories factories, SearchContext context,
Expand Down Expand Up @@ -115,14 +114,14 @@ private String descendsFromNestedAggregator(Aggregator parent) {
return null;
}

protected void doCollect(V val, int docId) throws IOException {
protected void doCollect(LeafBucketCollector subCollector, V val, int docId) throws IOException {
long bucketOrdinal = addValueToOrds(val);

if (bucketOrdinal < 0) { // already seen
bucketOrdinal = -1 - bucketOrdinal;
collectExistingBucket(subCollectors, docId, bucketOrdinal);
collectExistingBucket(subCollector, docId, bucketOrdinal);
} else {
collectBucket(subCollectors, docId, bucketOrdinal);
collectBucket(subCollector, docId, bucketOrdinal);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ protected SortedNumericDocValues getValues(ValuesSource.Numeric valuesSource, Le
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
final SortedNumericDocValues values = getValues(valuesSource, ctx);
if (subCollectors == null) {
subCollectors = sub;
}
return new LeafBucketCollectorBase(sub, values) {

@Override
Expand All @@ -78,7 +75,7 @@ public void collect(int docId, long owningBucketOrdinal) throws IOException {
final long val = values.nextValue();
if (previous != val || i == 0) {
if ((includeExclude == null) || (includeExclude.accept(val))) {
doCollect(val, docId);
doCollect(sub, val, docId);
}
previous = val;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ public class StringRareTermsAggregator extends AbstractRareTermsAggregator<Value
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
if (subCollectors == null) {
subCollectors = sub;
}
return new LeafBucketCollectorBase(sub, values) {
final BytesRefBuilder previous = new BytesRefBuilder();

Expand All @@ -84,7 +81,7 @@ public void collect(int docId, long bucket) throws IOException {
continue;
}

doCollect(bytes, docId);
doCollect(sub, bytes, docId);
previous.copyBytes(bytes);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,9 @@ private void executeTestCase(boolean reduced, Query query, List<Long> dataset,
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
for (Long value : dataset) {
List<Long> shuffledDataset = new ArrayList<>(dataset);
Collections.shuffle(shuffledDataset, random());
for (Long value : shuffledDataset) {
if (frequently()) {
indexWriter.commit();
}
Expand Down

0 comments on commit ce9e325

Please sign in to comment.