From 51bb6c786e293b046a8372d9d311e0ba00e72bca Mon Sep 17 00:00:00 2001 From: Simon Dudley Date: Fri, 26 Jul 2024 12:17:08 +1000 Subject: [PATCH] TrieLogPruner preload with 30 second timeout (#7365) Also reduce pruning window from 30_000 to 5_000 --------- Signed-off-by: Simon Dudley Co-authored-by: Sally MacFarlane --- CHANGELOG.md | 2 + .../common/trielog/TrieLogPruner.java | 71 ++++++++++++++++--- .../worldstate/DataStorageConfiguration.java | 2 +- 3 files changed, 64 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 813cf6804b8..4d3f9eb4012 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,11 +26,13 @@ - Add trie log pruner metrics [#7352](https://github.com/hyperledger/besu/pull/7352) - Force bonsai-limit-trie-logs-enabled=false when sync-mode=FULL instead of startup error [#7357](https://github.com/hyperledger/besu/pull/7357) - `--Xbonsai-parallel-tx-processing-enabled` option enables executing transactions in parallel during block processing for Bonsai nodes +- Reduce default trie log pruning window size from 30,000 to 5,000 [#7365](https://github.com/hyperledger/besu/pull/7365) - Add option `--poa-discovery-retry-bootnodes` for PoA networks to always use bootnodes during peer refresh, not just on first start [#7314](https://github.com/hyperledger/besu/pull/7314) ### Bug fixes - Fix `eth_call` deserialization to correctly ignore unknown fields in the transaction object. [#7323](https://github.com/hyperledger/besu/pull/7323) - Prevent Besu from starting up with sync-mode=FULL and bonsai-limit-trie-logs-enabled=true for private networks [#7357](https://github.com/hyperledger/besu/pull/7357) +- Add 30 second timeout to trie log pruner preload [#7365](https://github.com/hyperledger/besu/pull/7365) - Avoid executing pruner preload during trie log subcommands [#7366](https://github.com/hyperledger/besu/pull/7366) ## 24.7.0 diff --git a/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/trie/diffbased/common/trielog/TrieLogPruner.java b/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/trie/diffbased/common/trielog/TrieLogPruner.java index cea5c1a327d..98bc4246ebe 100644 --- a/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/trie/diffbased/common/trielog/TrieLogPruner.java +++ b/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/trie/diffbased/common/trielog/TrieLogPruner.java @@ -26,6 +26,12 @@ import java.util.Comparator; import java.util.Optional; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; import java.util.stream.Stream; @@ -40,6 +46,7 @@ public class TrieLogPruner implements TrieLogEvent.TrieLogObserver { private static final Logger LOG = LoggerFactory.getLogger(TrieLogPruner.class); + private static final int PRELOAD_TIMEOUT_IN_SECONDS = 30; private final int pruningLimit; private final int loadingLimit; @@ -83,25 +90,60 @@ public TrieLogPruner( BesuMetricCategory.PRUNER, "trie_log_pruned_orphan", "trie log pruned orphan"); } - public int initialize() { - return preloadQueue(); + public void initialize() { + preloadQueueWithTimeout(); } - private int preloadQueue() { + private void preloadQueueWithTimeout() { + LOG.atInfo() - .setMessage("Loading first {} trie logs from database...") + .setMessage("Attempting to load first {} trie logs from database...") .addArgument(loadingLimit) .log(); + + try (final ScheduledExecutorService preloadExecutor = Executors.newScheduledThreadPool(1)) { + + final AtomicBoolean timeoutOccurred = new AtomicBoolean(false); + final Runnable timeoutTask = + () -> { + timeoutOccurred.set(true); + LOG.atWarn() + .setMessage( + "Timeout occurred while loading and processing {} trie logs from database") + .addArgument(loadingLimit) + .log(); + }; + + final ScheduledFuture timeoutFuture = + preloadExecutor.schedule(timeoutTask, PRELOAD_TIMEOUT_IN_SECONDS, TimeUnit.SECONDS); + LOG.atInfo() + .setMessage( + "Trie log pruning will timeout after {} seconds. If this is timing out, consider using `besu storage trie-log prune` subcommand, see https://besu.hyperledger.org/public-networks/how-to/bonsai-limit-trie-logs") + .addArgument(PRELOAD_TIMEOUT_IN_SECONDS) + .log(); + + preloadQueue(timeoutOccurred, timeoutFuture); + } + } + + private void preloadQueue( + final AtomicBoolean timeoutOccurred, final ScheduledFuture timeoutFuture) { + try (final Stream trieLogKeys = rootWorldStateStorage.streamTrieLogKeys(loadingLimit)) { - final AtomicLong count = new AtomicLong(); + + final AtomicLong addToPruneQueueCount = new AtomicLong(); final AtomicLong orphansPruned = new AtomicLong(); trieLogKeys.forEach( blockHashAsBytes -> { + if (timeoutOccurred.get()) { + throw new RuntimeException( + new TimeoutException("Timeout occurred while preloading trie log prune queue")); + } final Hash blockHash = Hash.wrap(Bytes32.wrap(blockHashAsBytes)); final Optional header = blockchain.getBlockHeader(blockHash); if (header.isPresent()) { addToPruneQueue(header.get().getNumber(), blockHash); - count.getAndIncrement(); + addToPruneQueueCount.getAndIncrement(); } else { // prune orphaned blocks (sometimes created during block production) rootWorldStateStorage.pruneTrieLog(blockHash); @@ -109,12 +151,21 @@ private int preloadQueue() { prunedOrphanCounter.inc(); } }); + + timeoutFuture.cancel(true); LOG.atDebug().log("Pruned {} orphaned trie logs from database...", orphansPruned.intValue()); - LOG.atInfo().log("Loaded {} trie logs from database", count); - return pruneFromQueue() + orphansPruned.intValue(); + LOG.atInfo().log( + "Added {} trie logs to prune queue. Commencing pruning of eligible trie logs...", + addToPruneQueueCount.intValue()); + int prunedCount = pruneFromQueue(); + LOG.atInfo().log("Pruned {} trie logs.", prunedCount); } catch (Exception e) { - LOG.error("Error loading trie logs from database, nothing pruned", e); - return 0; + if (e.getCause() != null && e.getCause() instanceof TimeoutException) { + int prunedCount = pruneFromQueue(); + LOG.atInfo().log("Operation timed out, but still pruned {} trie logs.", prunedCount); + } else { + LOG.error("Error loading trie logs from database, nothing pruned", e); + } } } diff --git a/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/worldstate/DataStorageConfiguration.java b/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/worldstate/DataStorageConfiguration.java index 615b5bad6eb..8d767f442aa 100644 --- a/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/worldstate/DataStorageConfiguration.java +++ b/ethereum/core/src/main/java/org/hyperledger/besu/ethereum/worldstate/DataStorageConfiguration.java @@ -25,7 +25,7 @@ public interface DataStorageConfiguration { long DEFAULT_BONSAI_MAX_LAYERS_TO_LOAD = 512; boolean DEFAULT_BONSAI_LIMIT_TRIE_LOGS_ENABLED = true; long MINIMUM_BONSAI_TRIE_LOG_RETENTION_LIMIT = DEFAULT_BONSAI_MAX_LAYERS_TO_LOAD; - int DEFAULT_BONSAI_TRIE_LOG_PRUNING_WINDOW_SIZE = 30_000; + int DEFAULT_BONSAI_TRIE_LOG_PRUNING_WINDOW_SIZE = 5_000; boolean DEFAULT_RECEIPT_COMPACTION_ENABLED = false; DataStorageConfiguration DEFAULT_CONFIG =