Skip to content

Commit

Permalink
Correctly calculate doc_count_error at the slice level for concurrent…
Browse files Browse the repository at this point in the history
… segment search. Change slice_size heuristic to be equal to shard_size. (opensearch-project#11732)

Signed-off-by: Jay Deng <jayd0104@gmail.com>
  • Loading branch information
jed326 authored Jan 11, 2024
1 parent bbe790b commit b042688
Show file tree
Hide file tree
Showing 7 changed files with 403 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Restore support for Java 8 for RestClient ([#11562](https://github.com/opensearch-project/OpenSearch/pull/11562))
- Add deleted doc count in _cat/shards ([#11678](https://github.com/opensearch-project/OpenSearch/pull/11678))
- Capture information for additional query types and aggregation types ([#11582](https://github.com/opensearch-project/OpenSearch/pull/11582))
- Use slice_size == shard_size heuristic in terms aggs for concurrent segment search and properly calculate the doc_count_error ([#11732](https://github.com/opensearch-project/OpenSearch/pull/11732))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ public void testShardSizeEqualsSizeString() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -98,8 +99,11 @@ public void testShardSizeEqualsSizeString() throws Exception {
expected.put("1", 8L);
expected.put("3", 8L);
expected.put("2", 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsString())));
expectedDocCount = expected.get(bucket.getKeyAsString());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -221,6 +225,7 @@ public void testShardSizeEqualsSizeLong() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -233,8 +238,11 @@ public void testShardSizeEqualsSizeLong() throws Exception {
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -355,6 +363,7 @@ public void testShardSizeEqualsSizeDouble() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -367,8 +376,11 @@ public void testShardSizeEqualsSizeDouble() throws Exception {
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,16 @@ public void setupSuiteScopeCluster() throws Exception {
}

indexRandom(true, builders);
indexRandomForMultipleSlices("idx");
ensureSearchable();

// Force merge each shard down to 1 segment to verify results are the same between concurrent and non-concurrent search paths, else
// for concurrent segment search there will be additional error introduced during the slice level reduce and thus different buckets,
// doc_counts, and doc_count_errors may be returned. This test serves to verify that the doc_count_error is the same between
// concurrent and non-concurrent search in the 1 slice case. TermsFixedDocCountErrorIT verifies that the doc count error is
// correctly calculated for concurrent segment search at the slice level.
// See https://github.com/opensearch-project/OpenSearch/issues/11680"
forceMerge(1);
Thread.sleep(5000); // Sleep 5s to ensure force merge completes
}

private void assertDocCountErrorWithinBounds(int size, SearchResponse accurateResponse, SearchResponse testResponse) {
Expand Down
Loading

0 comments on commit b042688

Please sign in to comment.