Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly calculate doc count error at the slice level for concurrent segment search #11732

Merged
merged 1 commit into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Restore support for Java 8 for RestClient ([#11562](https://github.com/opensearch-project/OpenSearch/pull/11562))
- Add deleted doc count in _cat/shards ([#11678](https://github.com/opensearch-project/OpenSearch/pull/11678))
- Capture information for additional query types and aggregation types ([#11582](https://github.com/opensearch-project/OpenSearch/pull/11582))
- Use slice_size == shard_size heuristic in terms aggs for concurrent segment search and properly calculate the doc_count_error ([#11732](https://github.com/opensearch-project/OpenSearch/pull/11732))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ public void testShardSizeEqualsSizeString() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -98,8 +99,11 @@ public void testShardSizeEqualsSizeString() throws Exception {
expected.put("1", 8L);
expected.put("3", 8L);
expected.put("2", 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsString())));
expectedDocCount = expected.get(bucket.getKeyAsString());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -221,6 +225,7 @@ public void testShardSizeEqualsSizeLong() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -233,8 +238,11 @@ public void testShardSizeEqualsSizeLong() throws Exception {
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -355,6 +363,7 @@ public void testShardSizeEqualsSizeDouble() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -367,8 +376,11 @@ public void testShardSizeEqualsSizeDouble() throws Exception {
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
jed326 marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,16 @@ public void setupSuiteScopeCluster() throws Exception {
}

indexRandom(true, builders);
indexRandomForMultipleSlices("idx");
ensureSearchable();

// Force merge each shard down to 1 segment to verify results are the same between concurrent and non-concurrent search paths, else
// for concurrent segment search there will be additional error introduced during the slice level reduce and thus different buckets,
// doc_counts, and doc_count_errors may be returned. This test serves to verify that the doc_count_error is the same between
// concurrent and non-concurrent search in the 1 slice case. TermsFixedDocCountErrorIT verifies that the doc count error is
// correctly calculated for concurrent segment search at the slice level.
// See https://github.com/opensearch-project/OpenSearch/issues/11680"
forceMerge(1);
Thread.sleep(5000); // Sleep 5s to ensure force merge completes
}

private void assertDocCountErrorWithinBounds(int size, SearchResponse accurateResponse, SearchResponse testResponse) {
Expand Down
Loading
Loading