Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add experimental x-trie-log subcommand for one-off backlog prune #6188

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@
description = "This command provides storage related actions.",
mixinStandardHelpOptions = true,
versionProvider = VersionProvider.class,
subcommands = {StorageSubCommand.RevertVariablesStorage.class})
subcommands = {StorageSubCommand.RevertVariablesStorage.class, TrieLogSubCommand.class})
public class StorageSubCommand implements Runnable {

/** The constant COMMAND_NAME. */
public static final String COMMAND_NAME = "storage";

@SuppressWarnings("unused")
@ParentCommand
private BesuCommand parentCommand;
BesuCommand parentCommand;

@SuppressWarnings("unused")
@Spec
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* Copyright contributors to Hyperledger Besu.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/

package org.hyperledger.besu.cli.subcommands.storage;

import static com.google.common.base.Preconditions.checkArgument;
import static org.hyperledger.besu.ethereum.worldstate.DataStorageConfiguration.Unstable.MINIMUM_BONSAI_TRIE_LOG_RETENTION_THRESHOLD;

import org.hyperledger.besu.controller.BesuController;
import org.hyperledger.besu.datatypes.Hash;
import org.hyperledger.besu.ethereum.bonsai.storage.BonsaiWorldStateKeyValueStorage;
import org.hyperledger.besu.ethereum.bonsai.trielog.TrieLogPruner;
import org.hyperledger.besu.ethereum.chain.Blockchain;
import org.hyperledger.besu.ethereum.chain.MutableBlockchain;
import org.hyperledger.besu.ethereum.core.BlockHeader;
import org.hyperledger.besu.ethereum.worldstate.DataStorageConfiguration;

import java.io.PrintWriter;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.tuweni.bytes.Bytes32;

/** Helper class for counting and pruning trie logs */
public class TrieLogHelper {

static void countAndPrune(
final PrintWriter out,
final DataStorageConfiguration config,
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage,
final MutableBlockchain blockchain,
final BesuController besuController) {
TrieLogHelper.validatePruneConfiguration(config);

final TrieLogCount count = getCount(rootWorldStateStorage, Integer.MAX_VALUE, blockchain);

out.println("Counting trie logs before prune...");
printCount(out, count);
out.println();

final int layersToRetain = (int) config.getUnstable().getBonsaiTrieLogRetentionThreshold();
final int batchSize = config.getUnstable().getBonsaiTrieLogPruningLimit();
final boolean isProofOfStake =
besuController.getGenesisConfigOptions().getTerminalTotalDifficulty().isPresent();
TrieLogPruner pruner =
new TrieLogPruner(
rootWorldStateStorage, blockchain, layersToRetain, batchSize, isProofOfStake);

final int totalToPrune = count.total() - layersToRetain;
out.printf(
"""
Total to prune = %d (total) - %d (retention threshold) =
=> %d
""",
count.total(), layersToRetain, totalToPrune);
final long numBatches = Math.max(totalToPrune / batchSize, 1);
out.println();
out.printf(
"Estimated number of batches = max(%d (total to prune) / %d (batch size), 1) = %d\n",
totalToPrune, batchSize, numBatches);
out.println();

int noProgressCounter = 0;
int prevTotalNumberPruned = 0;
int totalNumberPruned = 0;
int numberPrunedInBatch;
int batchNumber = 1;
while (totalNumberPruned < totalToPrune) {
Fixed Show fixed Hide fixed
out.printf(
"""
Pruning batch %d
-----------------
""", batchNumber++);
// do prune
numberPrunedInBatch = pruner.initialize();

out.printf("Number pruned in batch = %d \n", numberPrunedInBatch);
totalNumberPruned += numberPrunedInBatch;
out.printf(
"""
Running total number pruned =
=> %d of %d
""",
totalNumberPruned, totalToPrune);

if (totalNumberPruned == prevTotalNumberPruned) {
if (noProgressCounter++ == 5) {
out.println("No progress in 5 batches, exiting");
return;
}
}

prevTotalNumberPruned = totalNumberPruned;
out.println();
}
out.println("Trie log prune complete!");
out.println();

out.println("Counting trie logs after prune...");
TrieLogHelper.printCount(
out, TrieLogHelper.getCount(rootWorldStateStorage, Integer.MAX_VALUE, blockchain));
}

private static void validatePruneConfiguration(final DataStorageConfiguration config) {
checkArgument(
config.getUnstable().getBonsaiTrieLogRetentionThreshold()
>= MINIMUM_BONSAI_TRIE_LOG_RETENTION_THRESHOLD,
String.format(
"--Xbonsai-trie-log-retention-threshold minimum value is %d",
MINIMUM_BONSAI_TRIE_LOG_RETENTION_THRESHOLD));
checkArgument(
config.getUnstable().getBonsaiTrieLogPruningLimit() > 0,
String.format(
"--Xbonsai-trie-log-pruning-limit=%d must be greater than 0",
config.getUnstable().getBonsaiTrieLogPruningLimit()));
checkArgument(
config.getUnstable().getBonsaiTrieLogPruningLimit()
> config.getUnstable().getBonsaiTrieLogRetentionThreshold(),
String.format(
"--Xbonsai-trie-log-pruning-limit=%d must greater than --Xbonsai-trie-log-retention-threshold=%d",
config.getUnstable().getBonsaiTrieLogPruningLimit(),
config.getUnstable().getBonsaiTrieLogRetentionThreshold()));
}

static TrieLogCount getCount(
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage,
final int limit,
final Blockchain blockchain) {
final AtomicInteger total = new AtomicInteger();
final AtomicInteger canonicalCount = new AtomicInteger();
final AtomicInteger forkCount = new AtomicInteger();
final AtomicInteger orphanCount = new AtomicInteger();
rootWorldStateStorage
.streamTrieLogKeys(limit)
.map(Bytes32::wrap)
.map(Hash::wrap)
.forEach(
Comment on lines +145 to +149
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am guessing that most users of this subcommand are going to want to prune pretty close to head. Meaning there will be far more trielogs deleted than retained. This implementation is safe, but probably going to be pretty slow in most cases.

IMO we could/should add an alternate implementation that checks that the number of retained trielogs is below a certain reasonable threshold. We copy those trielogs to an alternate column family, truncate the trielog family, and move the trielogs back into the newly truncated CF. That should make this operation markedly faster for what I suspect will be the typical use case (offline pruning a huge number of trielogs)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be clear, I would start from the blockchain storage and get the latest 'x' hashes that we want to retain rather than streaming all of the keys in the trielog CF

hash -> {
total.getAndIncrement();
blockchain
.getBlockHeader(hash)
.ifPresentOrElse(
(header) -> {
long number = header.getNumber();
final Optional<BlockHeader> headerByNumber =
blockchain.getBlockHeader(number);
if (headerByNumber.isPresent()
&& headerByNumber.get().getHash().equals(hash)) {
canonicalCount.getAndIncrement();
} else {
forkCount.getAndIncrement();
}
},
orphanCount::getAndIncrement);
});

return new TrieLogCount(total.get(), canonicalCount.get(), forkCount.get(), orphanCount.get());
}

static void printCount(final PrintWriter out, final TrieLogCount count) {
out.printf(
"trieLog count: %s\n - canonical count: %s\n - fork count: %s\n - orphaned count: %s\n",
count.total, count.canonicalCount, count.forkCount, count.orphanCount);
}

record TrieLogCount(int total, int canonicalCount, int forkCount, int orphanCount) {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright Hyperledger Besu Contributors.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/
package org.hyperledger.besu.cli.subcommands.storage;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import org.hyperledger.besu.cli.util.VersionProvider;
import org.hyperledger.besu.controller.BesuController;
import org.hyperledger.besu.ethereum.bonsai.storage.BonsaiWorldStateKeyValueStorage;
import org.hyperledger.besu.ethereum.bonsai.trielog.TrieLogPruner;
import org.hyperledger.besu.ethereum.chain.MutableBlockchain;
import org.hyperledger.besu.ethereum.storage.StorageProvider;
import org.hyperledger.besu.ethereum.worldstate.DataStorageConfiguration;
import org.hyperledger.besu.ethereum.worldstate.DataStorageFormat;

import java.io.PrintWriter;

import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.core.config.Configurator;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.ParentCommand;

/** The Trie Log subcommand. */
@Command(
name = "x-trie-log",
description = "Manipulate trie logs",
mixinStandardHelpOptions = true,
versionProvider = VersionProvider.class,
subcommands = {TrieLogSubCommand.CountTrieLog.class, TrieLogSubCommand.PruneTrieLog.class})
public class TrieLogSubCommand implements Runnable {

@SuppressWarnings("UnusedVariable")
@ParentCommand
private static StorageSubCommand parentCommand;

@SuppressWarnings("unused")
@CommandLine.Spec
private CommandLine.Model.CommandSpec spec; // Picocli injects reference to command spec

@Override
public void run() {
final PrintWriter out = spec.commandLine().getOut();
spec.commandLine().usage(out);
}

private static BesuController createBesuController() {
return parentCommand.parentCommand.buildController();
}

@Command(
name = "count",
description = "This command counts all the trie logs",
mixinStandardHelpOptions = true,
versionProvider = VersionProvider.class)
static class CountTrieLog implements Runnable {

@SuppressWarnings("unused")
@ParentCommand
private TrieLogSubCommand parentCommand;

@SuppressWarnings("unused")
@CommandLine.Spec
private CommandLine.Model.CommandSpec spec; // Picocli injects reference to command spec

@Override
public void run() {
TrieLogContext context = getTrieLogContext();

final PrintWriter out = spec.commandLine().getOut();

out.println("Counting trie logs...");
TrieLogHelper.printCount(
out,
TrieLogHelper.getCount(
context.rootWorldStateStorage, Integer.MAX_VALUE, context.blockchain));
}
}

@Command(
name = "prune",
description =
"This command prunes all trie log layers below the retention threshold, including orphaned trie logs.",
mixinStandardHelpOptions = true,
versionProvider = VersionProvider.class)
static class PruneTrieLog implements Runnable {

@SuppressWarnings("unused")
@ParentCommand
private TrieLogSubCommand parentCommand;

@SuppressWarnings("unused")
@CommandLine.Spec
private CommandLine.Model.CommandSpec spec; // Picocli injects reference to command spec

@Override
public void run() {
TrieLogContext context = getTrieLogContext();

TrieLogHelper.countAndPrune(
spec.commandLine().getOut(),
context.config(),
context.rootWorldStateStorage(),
context.blockchain(),
context.besuController());
}
}

record TrieLogContext(
BesuController besuController,
DataStorageConfiguration config,
BonsaiWorldStateKeyValueStorage rootWorldStateStorage,
MutableBlockchain blockchain) {}

@SuppressWarnings("BannedMethod")
private static TrieLogContext getTrieLogContext() {
Configurator.setLevel(LogManager.getLogger(TrieLogPruner.class).getName(), Level.DEBUG);
checkNotNull(parentCommand);
BesuController besuController = createBesuController();
final DataStorageConfiguration config = besuController.getDataStorageConfiguration();
checkArgument(
DataStorageFormat.BONSAI.equals(config.getDataStorageFormat()),
"Subcommand only works with data-storage-format=BONSAI");

final StorageProvider storageProvider = besuController.getStorageProvider();
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage =
(BonsaiWorldStateKeyValueStorage)
storageProvider.createWorldStateStorage(DataStorageFormat.BONSAI);
final MutableBlockchain blockchain = besuController.getProtocolContext().getBlockchain();
return new TrieLogContext(besuController, config, rootWorldStateStorage, blockchain);
}
}
Loading
Loading