Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgraded to t-digest 3.3. #3634

Merged
merged 8 commits into from
Jun 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,16 @@ setup:
string_field: foo

---
"Basic test":
"Basic 2.x test":

- skip:
version: "3.0.0 -"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "- 2.9.99"
search:
rest_total_hits_as_int: true
body:
Expand Down Expand Up @@ -77,7 +84,58 @@ setup:
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }

---
"Basic 3.x test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
aggs:
percentiles_int:
percentiles:
field: int_field
percentiles_double:
percentiles:
field: double_field

- match: { hits.total: 4 }
- length: { hits.hits: 4 }

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 1.0 }
- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }

---
"Compression test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -93,31 +151,36 @@ setup:
tdigest:
compression: 200


- match: { hits.total: 4 }
- length: { hits.hits: 4 }

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.75\.0: 126.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 1.0 }
- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 26.0 }
- match: { aggregations.percentiles_double.values.50\.0: 76.0 }
- match: { aggregations.percentiles_double.values.75\.0: 126.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }


---
"Only aggs test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -135,26 +198,31 @@ setup:

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.75\.0: 126.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 1.0 }
- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 26.0 }
- match: { aggregations.percentiles_double.values.50\.0: 76.0 }
- match: { aggregations.percentiles_double.values.75\.0: 126.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }



---
"Filtered test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -177,17 +245,17 @@ setup:

- match: { aggregations.percentiles_int.values.1\.0: 51.0 }
- match: { aggregations.percentiles_int.values.5\.0: 51.0 }
- match: { aggregations.percentiles_int.values.25\.0: 63.5 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 138.5 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 51.0 }
- match: { aggregations.percentiles_double.values.5\.0: 51.0 }
- match: { aggregations.percentiles_double.values.25\.0: 63.5 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 138.5 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }

Expand Down Expand Up @@ -234,7 +302,14 @@ setup:
---
"Metadata test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -252,9 +327,9 @@ setup:

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.75\.0: 126.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

Expand Down Expand Up @@ -319,7 +394,14 @@ setup:
---
"Explicit Percents test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -338,17 +420,24 @@ setup:
- length: { hits.hits: 4 }

- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }

- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 26.0 }
- match: { aggregations.percentiles_double.values.50\.0: 76.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }

---
"Non-keyed test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -366,6 +455,6 @@ setup:
- match: { aggregations.percentiles_int.values.0.key: 5.0 }
- match: { aggregations.percentiles_int.values.0.value: 1.0 }
- match: { aggregations.percentiles_int.values.1.key: 25.0 }
- match: { aggregations.percentiles_int.values.1.value: 26.0 }
- match: { aggregations.percentiles_int.values.1.value: 51.0 }
- match: { aggregations.percentiles_int.values.2.key: 50.0 }
- match: { aggregations.percentiles_int.values.2.value: 76.0 }
- match: { aggregations.percentiles_int.values.2.value: 101.0 }
2 changes: 1 addition & 1 deletion server/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ dependencies {
api "joda-time:joda-time:${versions.joda}"

// percentiles aggregation
api 'com.tdunning:t-digest:3.2'
api 'com.tdunning:t-digest:3.3'
// precentil ranks aggregation
api 'org.hdrhistogram:HdrHistogram:2.1.12'

Expand Down
1 change: 0 additions & 1 deletion server/licenses/t-digest-3.2.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/t-digest-3.3.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5e96c4fd7d63b05828cf5ef41da20649195b1b78
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ protected InternalTDigestPercentileRanks createTestInstance(
final TDigestState state = new TDigestState(100);
Arrays.stream(values).forEach(state::add);

assertEquals(state.centroidCount(), values.length);
// the number of centroids is defined as <= the number of samples inserted
assertTrue(state.centroidCount() <= values.length);
return new InternalTDigestPercentileRanks(name, percents, state, keyed, format, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ protected InternalTDigestPercentiles createTestInstance(
final TDigestState state = new TDigestState(100);
Arrays.stream(values).forEach(state::add);

assertEquals(state.centroidCount(), values.length);
// the number of centroids is defined as <= the number of samples inserted
assertTrue(state.centroidCount() <= values.length);
return new InternalTDigestPercentiles(name, percents, state, keyed, format, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ public void testSomeMatchesSortedNumericDocValues() throws IOException {
}, tdigest -> {
assertEquals(7L, tdigest.state.size());
assertEquals(7L, tdigest.state.centroidCount());
assertEquals(4.5d, tdigest.percentile(75), 0.0d);
assertEquals("4.5", tdigest.percentileAsString(75));
assertEquals(5.0d, tdigest.percentile(75), 0.0d);
assertEquals("5.0", tdigest.percentileAsString(75));
assertEquals(3.0d, tdigest.percentile(71), 0.0d);
assertEquals("3.0", tdigest.percentileAsString(71));
assertEquals(2.0d, tdigest.percentile(50), 0.0d);
assertEquals("2.0", tdigest.percentileAsString(50));
assertEquals(1.0d, tdigest.percentile(22), 0.0d);
Expand All @@ -126,11 +128,11 @@ public void testSomeMatchesNumericDocValues() throws IOException {
iw.addDocument(singleton(new NumericDocValuesField("number", 0)));
}, tdigest -> {
assertEquals(tdigest.state.size(), 7L);
assertEquals(tdigest.state.centroidCount(), 7L);
assertTrue(tdigest.state.centroidCount() <= 7L);
assertEquals(8.0d, tdigest.percentile(100), 0.0d);
assertEquals("8.0", tdigest.percentileAsString(100));
assertEquals(6.98d, tdigest.percentile(88), 0.0d);
assertEquals("6.98", tdigest.percentileAsString(88));
assertEquals(8.0d, tdigest.percentile(88), 0.0d);
assertEquals("8.0", tdigest.percentileAsString(88));
assertEquals(1.0d, tdigest.percentile(33), 0.0d);
assertEquals("1.0", tdigest.percentileAsString(33));
assertEquals(1.0d, tdigest.percentile(25), 0.0d);
Expand All @@ -157,7 +159,7 @@ public void testQueryFiltering() throws IOException {
assertEquals(4L, tdigest.state.centroidCount());
assertEquals(2.0d, tdigest.percentile(100), 0.0d);
assertEquals(1.0d, tdigest.percentile(50), 0.0d);
assertEquals(0.5d, tdigest.percentile(25), 0.0d);
assertEquals(1.0d, tdigest.percentile(25), 0.0d);
assertTrue(AggregationInspectionHelper.hasValue(tdigest));
});

Expand Down