-
Notifications
You must be signed in to change notification settings - Fork 810
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Import export trie log #6363
Import export trie log #6363
Changes from all commits
16c0a49
7dd4928
bf2b098
e67ae51
9b4e0c9
0b9fe83
426848e
7401b59
1b7fb72
11e6b05
f2d01e2
04f1aaa
2f01c5a
56e4c8e
78561b0
42c72cf
9961fc2
9389540
e3d4fbc
c7144fe
20b0ba5
586ab25
67e6f3d
b9640e5
3bc1878
d47ddf5
999edb6
1699fe4
e679cb3
5d3b4f2
f839b75
0caa4cf
2d5d31d
37df23e
5ce1800
98423dc
cf3a5e6
c759bba
5fb9413
087c54b
4b033a3
3a89ac3
9be7d13
75d1c3b
5eb4cda
5425075
2de0b19
248a776
55d653e
c9d38f0
b9d7620
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,11 @@ | |
import org.hyperledger.besu.ethereum.chain.Blockchain; | ||
import org.hyperledger.besu.ethereum.chain.MutableBlockchain; | ||
import org.hyperledger.besu.ethereum.core.BlockHeader; | ||
import org.hyperledger.besu.ethereum.rlp.BytesValueRLPInput; | ||
import org.hyperledger.besu.ethereum.rlp.RLP; | ||
import org.hyperledger.besu.ethereum.trie.bonsai.storage.BonsaiWorldStateKeyValueStorage; | ||
import org.hyperledger.besu.ethereum.trie.bonsai.trielog.TrieLogFactoryImpl; | ||
import org.hyperledger.besu.ethereum.trie.bonsai.trielog.TrieLogLayer; | ||
import org.hyperledger.besu.ethereum.worldstate.DataStorageConfiguration; | ||
|
||
import java.io.File; | ||
|
@@ -32,13 +36,15 @@ | |
import java.io.ObjectInputStream; | ||
import java.io.ObjectOutputStream; | ||
import java.io.PrintWriter; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.ArrayList; | ||
import java.util.IdentityHashMap; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
|
||
import org.apache.tuweni.bytes.Bytes; | ||
import org.apache.tuweni.bytes.Bytes32; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
@@ -97,16 +103,15 @@ private static void processTrieLogBatches( | |
final String batchFileNameBase) { | ||
|
||
for (long batchNumber = 1; batchNumber <= numberOfBatches; batchNumber++) { | ||
|
||
final String batchFileName = batchFileNameBase + "-" + batchNumber; | ||
final long firstBlockOfBatch = chainHeight - ((batchNumber - 1) * BATCH_SIZE); | ||
|
||
final long lastBlockOfBatch = | ||
Math.max(chainHeight - (batchNumber * BATCH_SIZE), lastBlockNumberToRetainTrieLogsFor); | ||
|
||
final List<Hash> trieLogKeys = | ||
getTrieLogKeysForBlocks(blockchain, firstBlockOfBatch, lastBlockOfBatch); | ||
|
||
saveTrieLogBatches(batchFileNameBase, rootWorldStateStorage, batchNumber, trieLogKeys); | ||
LOG.info("Saving trie logs to retain in file (batch {})...", batchNumber); | ||
siladu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
saveTrieLogBatches(batchFileName, rootWorldStateStorage, trieLogKeys); | ||
} | ||
|
||
LOG.info("Clear trie logs..."); | ||
|
@@ -118,15 +123,12 @@ private static void processTrieLogBatches( | |
} | ||
|
||
private static void saveTrieLogBatches( | ||
final String batchFileNameBase, | ||
final String batchFileName, | ||
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage, | ||
final long batchNumber, | ||
final List<Hash> trieLogKeys) { | ||
|
||
LOG.info("Saving trie logs to retain in file (batch {})...", batchNumber); | ||
|
||
try { | ||
saveTrieLogsInFile(trieLogKeys, rootWorldStateStorage, batchNumber, batchFileNameBase); | ||
saveTrieLogsInFile(trieLogKeys, rootWorldStateStorage, batchFileName); | ||
} catch (IOException e) { | ||
LOG.error("Error saving trie logs to file: {}", e.getMessage()); | ||
throw new RuntimeException(e); | ||
|
@@ -210,9 +212,8 @@ private static void recreateTrieLogs( | |
final String batchFileNameBase) | ||
throws IOException { | ||
// process in chunk to avoid OOM | ||
|
||
IdentityHashMap<byte[], byte[]> trieLogsToRetain = | ||
readTrieLogsFromFile(batchFileNameBase, batchNumber); | ||
final String batchFileName = batchFileNameBase + "-" + batchNumber; | ||
IdentityHashMap<byte[], byte[]> trieLogsToRetain = readTrieLogsFromFile(batchFileName); | ||
final int chunkSize = ROCKSDB_MAX_INSERTS_PER_TRANSACTION; | ||
List<byte[]> keys = new ArrayList<>(trieLogsToRetain.keySet()); | ||
|
||
|
@@ -265,11 +266,10 @@ private static void validatePruneConfiguration(final DataStorageConfiguration co | |
private static void saveTrieLogsInFile( | ||
final List<Hash> trieLogsKeys, | ||
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage, | ||
final long batchNumber, | ||
final String batchFileNameBase) | ||
final String batchFileName) | ||
throws IOException { | ||
|
||
File file = new File(batchFileNameBase + "-" + batchNumber); | ||
File file = new File(batchFileName); | ||
if (file.exists()) { | ||
LOG.error("File already exists, skipping file creation"); | ||
return; | ||
|
@@ -285,24 +285,67 @@ private static void saveTrieLogsInFile( | |
} | ||
|
||
@SuppressWarnings("unchecked") | ||
private static IdentityHashMap<byte[], byte[]> readTrieLogsFromFile( | ||
final String batchFileNameBase, final long batchNumber) { | ||
static IdentityHashMap<byte[], byte[]> readTrieLogsFromFile(final String batchFileName) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non blocking feedback - I am all for code reuse, but if we are going to allow for arbitrary import and export, the import files should be more readable and "createable". The ObjectOutputStream seems fine for backup/recovery of a pruning process, but when part of a general import/export process this file format is too inscrutable IMO. At least for import/export we should serialize/deserialize these as json maps. Key as the hash string, and the trielog itself as hex (or as a rich json object if we wanted to be super transparent). In addition to being a bit more introspectable, it would allow us to create and import our own handcrafted trielogs when debugging |
||
|
||
IdentityHashMap<byte[], byte[]> trieLogs; | ||
try (FileInputStream fis = new FileInputStream(batchFileNameBase + "-" + batchNumber); | ||
try (FileInputStream fis = new FileInputStream(batchFileName); | ||
ObjectInputStream ois = new ObjectInputStream(fis)) { | ||
|
||
trieLogs = (IdentityHashMap<byte[], byte[]>) ois.readObject(); | ||
|
||
} catch (IOException | ClassNotFoundException e) { | ||
|
||
LOG.error(e.getMessage()); | ||
throw new RuntimeException(e); | ||
} | ||
|
||
return trieLogs; | ||
} | ||
|
||
private static void saveTrieLogsAsRlpInFile( | ||
final List<Hash> trieLogsKeys, | ||
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage, | ||
final String batchFileName) { | ||
File file = new File(batchFileName); | ||
if (file.exists()) { | ||
LOG.error("File already exists, skipping file creation"); | ||
return; | ||
} | ||
|
||
final IdentityHashMap<byte[], byte[]> trieLogs = | ||
getTrieLogs(trieLogsKeys, rootWorldStateStorage); | ||
final Bytes rlp = | ||
RLP.encode( | ||
o -> | ||
o.writeList( | ||
trieLogs.entrySet(), (val, out) -> out.writeRaw(Bytes.wrap(val.getValue())))); | ||
try { | ||
Files.write(file.toPath(), rlp.toArrayUnsafe()); | ||
} catch (IOException e) { | ||
LOG.error(e.getMessage()); | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
static IdentityHashMap<byte[], byte[]> readTrieLogsAsRlpFromFile(final String batchFileName) { | ||
try { | ||
final Bytes file = Bytes.wrap(Files.readAllBytes(Path.of(batchFileName))); | ||
final BytesValueRLPInput input = new BytesValueRLPInput(file, false); | ||
|
||
input.enterList(); | ||
final IdentityHashMap<byte[], byte[]> trieLogs = new IdentityHashMap<>(); | ||
while (!input.isEndOfCurrentList()) { | ||
final Bytes trieLogBytes = input.currentListAsBytes(); | ||
TrieLogLayer trieLogLayer = | ||
TrieLogFactoryImpl.readFrom(new BytesValueRLPInput(Bytes.wrap(trieLogBytes), false)); | ||
trieLogs.put(trieLogLayer.getBlockHash().toArrayUnsafe(), trieLogBytes.toArrayUnsafe()); | ||
} | ||
input.leaveList(); | ||
|
||
return trieLogs; | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
private static IdentityHashMap<byte[], byte[]> getTrieLogs( | ||
final List<Hash> trieLogKeys, final BonsaiWorldStateKeyValueStorage rootWorldStateStorage) { | ||
IdentityHashMap<byte[], byte[]> trieLogsToRetain = new IdentityHashMap<>(); | ||
|
@@ -357,5 +400,25 @@ static void printCount(final PrintWriter out, final TrieLogCount count) { | |
count.total, count.canonicalCount, count.forkCount, count.orphanCount); | ||
} | ||
|
||
static void importTrieLog( | ||
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage, final Path trieLogFilePath) { | ||
|
||
var trieLog = readTrieLogsAsRlpFromFile(trieLogFilePath.toString()); | ||
|
||
var updater = rootWorldStateStorage.updater(); | ||
trieLog.forEach((key, value) -> updater.getTrieLogStorageTransaction().put(key, value)); | ||
jframe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
updater.getTrieLogStorageTransaction().commit(); | ||
} | ||
|
||
static void exportTrieLog( | ||
final BonsaiWorldStateKeyValueStorage rootWorldStateStorage, | ||
final List<Hash> trieLogHash, | ||
final Path directoryPath) | ||
throws IOException { | ||
final String trieLogFile = directoryPath.toString(); | ||
|
||
saveTrieLogsAsRlpInFile(trieLogHash, rootWorldStateStorage, trieLogFile); | ||
} | ||
|
||
record TrieLogCount(int total, int canonicalCount, int forkCount, int orphanCount) {} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this might make more sense to have first/last block numbers included in the filename. Otherwise it won't be clear what is actually in the files after an export
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The batch filenames aren't used as part of import/export subcommands the filename is taken from command line args instead. This is only used for the prune subcommand