From 8138702a033fa8b47ff956a5843c897306aca38e Mon Sep 17 00:00:00 2001 From: Rishikesh1159 Date: Tue, 6 Sep 2022 19:22:57 +0000 Subject: [PATCH 1/3] Fix timeout issue by calculating time needed to process getSegmentFiles. Signed-off-by: Rishikesh1159 --- CHANGELOG.md | 3 ++- .../PrimaryShardReplicationSource.java | 23 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d04b754531b0e..c11886066ac2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fix NoSuchFileExceptions with segment replication when computing primary metadata snapshots ([#4366](https://github.com/opensearch-project/OpenSearch/pull/4366)) - [Segment Replication] Update flaky testOnNewCheckpointFromNewPrimaryCancelOngoingReplication unit test ([#4414](https://github.com/opensearch-project/OpenSearch/pull/4414)) - Fixed the `_cat/shards/10_basic.yml` test cases fix. +- Fix timeout issue by calculating time needed to process getSegmentFiles ### Security - CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341)) @@ -71,4 +72,4 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) [Unreleased]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...HEAD -[2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...2.x \ No newline at end of file +[2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...2.x diff --git a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java index aa0b5416dd0ff..76919186be63e 100644 --- a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java @@ -13,11 +13,13 @@ import org.opensearch.action.ActionListener; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.unit.TimeValue; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.recovery.RetryableTransportClient; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.transport.TransportRequestOptions; import org.opensearch.transport.TransportService; import java.util.List; @@ -78,6 +80,22 @@ public void getSegmentFiles( ) { final Writeable.Reader reader = GetSegmentFilesResponse::new; final ActionListener responseListener = ActionListener.map(listener, r -> r); + // Few of the below assumptions and calculations are added for experimental release of segment replication feature in 2.3 + // version.These will be changed in next release. + + // Storing the size of files to fetch in bytes. + long sizeOfSegmentFiles = 0; + for (int i = 0; i < filesToFetch.size(); i++) { + sizeOfSegmentFiles += filesToFetch.get(i).length(); + } + // Making sure files size is in correct format to perform time calculation. + sizeOfSegmentFiles = Math.abs(sizeOfSegmentFiles); + // Maximum size of files to fetch (segment files), that can be processed in 1 minute for a m5.xlarge machine. + long baseSegmentFilesSize = 300000000; + + long timeToGetSegmentFiles = 1; + // Formula for calculating time needed to process a replication event's files to fetch process + timeToGetSegmentFiles += sizeOfSegmentFiles / baseSegmentFilesSize; final GetSegmentFilesRequest request = new GetSegmentFilesRequest( replicationId, targetAllocationId, @@ -85,7 +103,10 @@ public void getSegmentFiles( filesToFetch, checkpoint ); - transportClient.executeRetryableAction(GET_SEGMENT_FILES, request, responseListener, reader); + final TransportRequestOptions options = TransportRequestOptions.builder() + .withTimeout(TimeValue.timeValueMinutes(timeToGetSegmentFiles)) + .build(); + transportClient.executeRetryableAction(GET_SEGMENT_FILES, request, options, responseListener, reader); } @Override From 3fb0e25c623725d4bba0ae0b3eb05ee8c22ae6e8 Mon Sep 17 00:00:00 2001 From: Rishikesh1159 Date: Tue, 6 Sep 2022 19:45:47 +0000 Subject: [PATCH 2/3] Formatting sizeOfSegmentFiles for time calculation. Signed-off-by: Rishikesh1159 --- CHANGELOG.md | 2 +- .../indices/replication/PrimaryShardReplicationSource.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c11886066ac2c..900c52b81f891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,7 +47,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fix NoSuchFileExceptions with segment replication when computing primary metadata snapshots ([#4366](https://github.com/opensearch-project/OpenSearch/pull/4366)) - [Segment Replication] Update flaky testOnNewCheckpointFromNewPrimaryCancelOngoingReplication unit test ([#4414](https://github.com/opensearch-project/OpenSearch/pull/4414)) - Fixed the `_cat/shards/10_basic.yml` test cases fix. -- Fix timeout issue by calculating time needed to process getSegmentFiles +- [Segment Replication] Fix timeout issue by calculating time needed to process getSegmentFiles ([#4426](https://github.com/opensearch-project/OpenSearch/pull/4426)) ### Security - CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341)) diff --git a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java index 76919186be63e..7ade588638c0d 100644 --- a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java @@ -89,7 +89,9 @@ public void getSegmentFiles( sizeOfSegmentFiles += filesToFetch.get(i).length(); } // Making sure files size is in correct format to perform time calculation. - sizeOfSegmentFiles = Math.abs(sizeOfSegmentFiles); + if(sizeOfSegmentFiles<0){ + sizeOfSegmentFiles *=-1; + } // Maximum size of files to fetch (segment files), that can be processed in 1 minute for a m5.xlarge machine. long baseSegmentFilesSize = 300000000; From c2db07d17dbd494957fcfca3269ffd7d7d14978f Mon Sep 17 00:00:00 2001 From: Rishikesh1159 Date: Wed, 7 Sep 2022 03:41:29 +0000 Subject: [PATCH 3/3] Addressing comments and applying spotless check. Signed-off-by: Rishikesh1159 --- .../PrimaryShardReplicationSource.java | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java index 7ade588638c0d..8107f99723eaf 100644 --- a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java @@ -81,23 +81,16 @@ public void getSegmentFiles( final Writeable.Reader reader = GetSegmentFilesResponse::new; final ActionListener responseListener = ActionListener.map(listener, r -> r); // Few of the below assumptions and calculations are added for experimental release of segment replication feature in 2.3 - // version.These will be changed in next release. + // version. These will be changed in next release. // Storing the size of files to fetch in bytes. - long sizeOfSegmentFiles = 0; - for (int i = 0; i < filesToFetch.size(); i++) { - sizeOfSegmentFiles += filesToFetch.get(i).length(); - } - // Making sure files size is in correct format to perform time calculation. - if(sizeOfSegmentFiles<0){ - sizeOfSegmentFiles *=-1; - } - // Maximum size of files to fetch (segment files), that can be processed in 1 minute for a m5.xlarge machine. - long baseSegmentFilesSize = 300000000; + final long sizeOfSegmentFiles = filesToFetch.stream().mapToLong(file -> file.length()).sum(); + + // Maximum size of files to fetch (segment files) in bytes, that can be processed in 1 minute for a m5.xlarge machine. + long baseSegmentFilesSize = 100000000; - long timeToGetSegmentFiles = 1; // Formula for calculating time needed to process a replication event's files to fetch process - timeToGetSegmentFiles += sizeOfSegmentFiles / baseSegmentFilesSize; + final long timeToGetSegmentFiles = 1 + (sizeOfSegmentFiles / baseSegmentFilesSize); final GetSegmentFilesRequest request = new GetSegmentFilesRequest( replicationId, targetAllocationId,