From 89125fce9d502d7641b25e5b05f65bfb8c21ac76 Mon Sep 17 00:00:00 2001 From: dblock Date: Mon, 20 Jun 2022 15:29:26 +0000 Subject: [PATCH 1/8] Upgraded to t-digest 3.3. Signed-off-by: dblock --- server/build.gradle | 2 +- server/licenses/t-digest-3.2.jar.sha1 | 1 - server/licenses/t-digest-3.3.jar.sha1 | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 server/licenses/t-digest-3.2.jar.sha1 create mode 100644 server/licenses/t-digest-3.3.jar.sha1 diff --git a/server/build.gradle b/server/build.gradle index 4490b2ea170cf..9d9d12e798eab 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -119,7 +119,7 @@ dependencies { api "joda-time:joda-time:${versions.joda}" // percentiles aggregation - api 'com.tdunning:t-digest:3.2' + api 'com.tdunning:t-digest:3.3' // precentil ranks aggregation api 'org.hdrhistogram:HdrHistogram:2.1.12' diff --git a/server/licenses/t-digest-3.2.jar.sha1 b/server/licenses/t-digest-3.2.jar.sha1 deleted file mode 100644 index de6e848545f38..0000000000000 --- a/server/licenses/t-digest-3.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -2ab94758b0276a8a26102adf8d528cf6d0567b9a \ No newline at end of file diff --git a/server/licenses/t-digest-3.3.jar.sha1 b/server/licenses/t-digest-3.3.jar.sha1 new file mode 100644 index 0000000000000..79319da60ead6 --- /dev/null +++ b/server/licenses/t-digest-3.3.jar.sha1 @@ -0,0 +1 @@ +5e96c4fd7d63b05828cf5ef41da20649195b1b78 \ No newline at end of file From 3c204344f9574317030de0c0b3b7848f9a1f01fd Mon Sep 17 00:00:00 2001 From: dblock Date: Mon, 20 Jun 2022 16:26:46 +0000 Subject: [PATCH 2/8] Remove centroid count assertion. Signed-off-by: dblock --- .../metrics/InternalTDigestPercentilesRanksTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java index aea81fd5d1c78..a55f99309ae16 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java @@ -53,7 +53,6 @@ protected InternalTDigestPercentileRanks createTestInstance( final TDigestState state = new TDigestState(100); Arrays.stream(values).forEach(state::add); - assertEquals(state.centroidCount(), values.length); return new InternalTDigestPercentileRanks(name, percents, state, keyed, format, metadata); } From 564aa432c33ed2838cbfb7a26717d4ea7421f39e Mon Sep 17 00:00:00 2001 From: dblock Date: Tue, 21 Jun 2022 15:17:53 +0000 Subject: [PATCH 3/8] The number of centroids is defined as <= the number of samples inserted. Signed-off-by: dblock --- .../metrics/InternalTDigestPercentilesRanksTests.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java index a55f99309ae16..78296eddbdc2c 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java @@ -53,6 +53,8 @@ protected InternalTDigestPercentileRanks createTestInstance( final TDigestState state = new TDigestState(100); Arrays.stream(values).forEach(state::add); + // the number of centroids is defined as <= the number of samples inserted + assertTrue(state.centroidCount() <= values.length); return new InternalTDigestPercentileRanks(name, percents, state, keyed, format, metadata); } From 37651e9b5fe914a99f0abe0a36e10bd46d958691 Mon Sep 17 00:00:00 2001 From: dblock Date: Tue, 21 Jun 2022 17:50:11 +0000 Subject: [PATCH 4/8] Updated percentiles generated by t-digest 3.3. Signed-off-by: dblock --- .../180_percentiles_tdigest_metric.yml | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml index 9ed414f6b8439..d26b6645e9aca 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml @@ -63,20 +63,23 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 1.0 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } - - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } - match: { aggregations.percentiles_double.values.1\.0: 1.0 } - match: { aggregations.percentiles_double.values.5\.0: 1.0 } - - match: { aggregations.percentiles_double.values.25\.0: 26.0 } - - match: { aggregations.percentiles_double.values.50\.0: 76.0 } - - match: { aggregations.percentiles_double.values.75\.0: 126.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } +--- +"Compression test": + - do: search: rest_total_hits_as_int: true @@ -135,17 +138,17 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 1.0 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } - - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } - match: { aggregations.percentiles_double.values.1\.0: 1.0 } - match: { aggregations.percentiles_double.values.5\.0: 1.0 } - - match: { aggregations.percentiles_double.values.25\.0: 26.0 } - - match: { aggregations.percentiles_double.values.50\.0: 76.0 } - - match: { aggregations.percentiles_double.values.75\.0: 126.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } @@ -252,9 +255,9 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 1.0 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } - - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } @@ -338,12 +341,12 @@ setup: - length: { hits.hits: 4 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } - match: { aggregations.percentiles_double.values.5\.0: 1.0 } - - match: { aggregations.percentiles_double.values.25\.0: 26.0 } - - match: { aggregations.percentiles_double.values.50\.0: 76.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } --- "Non-keyed test": @@ -366,6 +369,6 @@ setup: - match: { aggregations.percentiles_int.values.0.key: 5.0 } - match: { aggregations.percentiles_int.values.0.value: 1.0 } - match: { aggregations.percentiles_int.values.1.key: 25.0 } - - match: { aggregations.percentiles_int.values.1.value: 26.0 } + - match: { aggregations.percentiles_int.values.1.value: 51.0 } - match: { aggregations.percentiles_int.values.2.key: 50.0 } - - match: { aggregations.percentiles_int.values.2.value: 76.0 } + - match: { aggregations.percentiles_int.values.2.value: 101.0 } From 6b0388206edda8c6fa6118bd132bf8b7b6f9c956 Mon Sep 17 00:00:00 2001 From: dblock Date: Tue, 21 Jun 2022 19:05:28 +0000 Subject: [PATCH 5/8] The number of centroids is defined as <= the number of samples inserted. Signed-off-by: dblock --- .../aggregations/metrics/InternalTDigestPercentilesTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java index 4d88f8fecd709..101583f1f37c9 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java @@ -53,7 +53,8 @@ protected InternalTDigestPercentiles createTestInstance( final TDigestState state = new TDigestState(100); Arrays.stream(values).forEach(state::add); - assertEquals(state.centroidCount(), values.length); + // the number of centroids is defined as <= the number of samples inserted + assertTrue(state.centroidCount() <= values.length); return new InternalTDigestPercentiles(name, percents, state, keyed, format, metadata); } From bb9e8f2076d1063e9431e19a722841ae81642620 Mon Sep 17 00:00:00 2001 From: dblock Date: Tue, 21 Jun 2022 19:50:49 +0000 Subject: [PATCH 6/8] Fixed more unit tests. Signed-off-by: dblock --- .../metrics/TDigestPercentilesAggregatorTests.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java index 50415dc10df7e..fd98a090367b2 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java @@ -105,8 +105,10 @@ public void testSomeMatchesSortedNumericDocValues() throws IOException { }, tdigest -> { assertEquals(7L, tdigest.state.size()); assertEquals(7L, tdigest.state.centroidCount()); - assertEquals(4.5d, tdigest.percentile(75), 0.0d); - assertEquals("4.5", tdigest.percentileAsString(75)); + assertEquals(5.0d, tdigest.percentile(75), 0.0d); + assertEquals("5.0", tdigest.percentileAsString(75)); + assertEquals(3.0d, tdigest.percentile(71), 0.0d); + assertEquals("3.0", tdigest.percentileAsString(71)); assertEquals(2.0d, tdigest.percentile(50), 0.0d); assertEquals("2.0", tdigest.percentileAsString(50)); assertEquals(1.0d, tdigest.percentile(22), 0.0d); @@ -126,11 +128,11 @@ public void testSomeMatchesNumericDocValues() throws IOException { iw.addDocument(singleton(new NumericDocValuesField("number", 0))); }, tdigest -> { assertEquals(tdigest.state.size(), 7L); - assertEquals(tdigest.state.centroidCount(), 7L); + assertTrue(tdigest.state.centroidCount() <= 7L); assertEquals(8.0d, tdigest.percentile(100), 0.0d); assertEquals("8.0", tdigest.percentileAsString(100)); - assertEquals(6.98d, tdigest.percentile(88), 0.0d); - assertEquals("6.98", tdigest.percentileAsString(88)); + assertEquals(8.0d, tdigest.percentile(88), 0.0d); + assertEquals("8.0", tdigest.percentileAsString(88)); assertEquals(1.0d, tdigest.percentile(33), 0.0d); assertEquals("1.0", tdigest.percentileAsString(33)); assertEquals(1.0d, tdigest.percentile(25), 0.0d); @@ -157,7 +159,7 @@ public void testQueryFiltering() throws IOException { assertEquals(4L, tdigest.state.centroidCount()); assertEquals(2.0d, tdigest.percentile(100), 0.0d); assertEquals(1.0d, tdigest.percentile(50), 0.0d); - assertEquals(0.5d, tdigest.percentile(25), 0.0d); + assertEquals(1.0d, tdigest.percentile(25), 0.0d); assertTrue(AggregationInspectionHelper.hasValue(tdigest)); }); From 78e4c08087ded8018dbc91ec0fad5a4a9804bbef Mon Sep 17 00:00:00 2001 From: dblock Date: Tue, 21 Jun 2022 22:02:16 +0000 Subject: [PATCH 7/8] Experimenting with flipping these back? Signed-off-by: dblock --- .../180_percentiles_tdigest_metric.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml index d26b6645e9aca..a523614de4f3d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml @@ -102,17 +102,17 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 1.0 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } - - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } - match: { aggregations.percentiles_double.values.1\.0: 1.0 } - match: { aggregations.percentiles_double.values.5\.0: 1.0 } - - match: { aggregations.percentiles_double.values.25\.0: 26.0 } - - match: { aggregations.percentiles_double.values.50\.0: 76.0 } - - match: { aggregations.percentiles_double.values.75\.0: 126.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } From 83be47b42c85548c17305e1501f37f26f2f842cf Mon Sep 17 00:00:00 2001 From: dblock Date: Wed, 22 Jun 2022 15:28:32 +0000 Subject: [PATCH 8/8] Select a 2.x vs. a 3.x node with predictable results. Signed-off-by: dblock --- .../180_percentiles_tdigest_metric.yml | 104 ++++++++++++++++-- 1 file changed, 95 insertions(+), 9 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml index a523614de4f3d..53d0ed1b2d05f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml @@ -44,9 +44,57 @@ setup: string_field: foo --- -"Basic test": +"Basic 2.x test": + + - skip: + version: "3.0.0 -" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" - do: + node_selector: + version: "- 2.9.99" + search: + rest_total_hits_as_int: true + body: + aggs: + percentiles_int: + percentiles: + field: int_field + percentiles_double: + percentiles: + field: double_field + + - match: { hits.total: 4 } + - length: { hits.hits: 4 } + + - match: { aggregations.percentiles_int.values.1\.0: 1.0 } + - match: { aggregations.percentiles_int.values.5\.0: 1.0 } + - match: { aggregations.percentiles_int.values.25\.0: 26.0 } + - match: { aggregations.percentiles_int.values.50\.0: 76.0 } + - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.95\.0: 151.0 } + - match: { aggregations.percentiles_int.values.99\.0: 151.0 } + + - match: { aggregations.percentiles_double.values.1\.0: 1.0 } + - match: { aggregations.percentiles_double.values.5\.0: 1.0 } + - match: { aggregations.percentiles_double.values.25\.0: 26.0 } + - match: { aggregations.percentiles_double.values.50\.0: 76.0 } + - match: { aggregations.percentiles_double.values.75\.0: 126.0 } + - match: { aggregations.percentiles_double.values.95\.0: 151.0 } + - match: { aggregations.percentiles_double.values.99\.0: 151.0 } + +--- +"Basic 3.x test": + + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -80,7 +128,14 @@ setup: --- "Compression test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -96,7 +151,6 @@ setup: tdigest: compression: 200 - - match: { hits.total: 4 } - length: { hits.hits: 4 } @@ -116,11 +170,17 @@ setup: - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } - --- "Only aggs test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -152,12 +212,17 @@ setup: - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } - - --- "Filtered test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -180,17 +245,17 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 51.0 } - match: { aggregations.percentiles_int.values.5\.0: 51.0 } - - match: { aggregations.percentiles_int.values.25\.0: 63.5 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } - match: { aggregations.percentiles_int.values.50\.0: 101.0 } - - match: { aggregations.percentiles_int.values.75\.0: 138.5 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } - match: { aggregations.percentiles_double.values.1\.0: 51.0 } - match: { aggregations.percentiles_double.values.5\.0: 51.0 } - - match: { aggregations.percentiles_double.values.25\.0: 63.5 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } - match: { aggregations.percentiles_double.values.50\.0: 101.0 } - - match: { aggregations.percentiles_double.values.75\.0: 138.5 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } @@ -237,7 +302,14 @@ setup: --- "Metadata test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -322,7 +394,14 @@ setup: --- "Explicit Percents test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -351,7 +430,14 @@ setup: --- "Non-keyed test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: