Skip to content

Commit

Permalink
Handling threadpool scheduling failures in remote cluster state purge
Browse files Browse the repository at this point in the history
Signed-off-by: Gaurav Bafna <gbbafna@amazon.com>
  • Loading branch information
gbbafna committed Feb 9, 2024
1 parent 3d08b4c commit 50dcbf2
Showing 1 changed file with 19 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1312,21 +1312,25 @@ private void deleteStalePaths(String clusterName, String clusterUUID, List<Strin
* @param committedManifest last committed ClusterMetadataManifest
*/
public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) {
threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> {
String clusterName = clusterState.getClusterName().value();
logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName);
Set<String> allClustersUUIDsInRemote;
try {
allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value()));
} catch (IOException e) {
logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName));
return;
}
// Retain last 2 cluster uuids data
allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID());
allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID());
deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote));
});
try {
threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> {
String clusterName = clusterState.getClusterName().value();
logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName);
Set<String> allClustersUUIDsInRemote;
try {
allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value()));
} catch (IOException e) {
logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName));
return;
}
// Retain last 2 cluster uuids data
allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID());
allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID());
deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote));
});
} catch (Exception e) {
logger.error("Exception occurred while scheduling deletion of stale cluster UUIDs from remote store", e);
}
}

public RemotePersistenceStats getStats() {
Expand Down

0 comments on commit 50dcbf2

Please sign in to comment.