diff --git a/cmd/geth/verkle.go b/cmd/geth/verkle.go index a16a716fe014..c8b209f2780e 100644 --- a/cmd/geth/verkle.go +++ b/cmd/geth/verkle.go @@ -18,11 +18,11 @@ package main import ( "bytes" + "context" "encoding/binary" "encoding/hex" "errors" "fmt" - "io" "io/ioutil" "os" "runtime" @@ -33,6 +33,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state/snapshot" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/internal/flags" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" @@ -41,6 +42,7 @@ import ( "github.com/gballet/go-verkle" "github.com/holiman/uint256" cli "github.com/urfave/cli/v2" + "golang.org/x/sync/errgroup" ) var ( @@ -732,89 +734,187 @@ func dumpKeys(ctx *cli.Context) error { } func sortKeys(ctx *cli.Context) error { - // Get list of files - files, _ := ioutil.ReadDir(".") + // Run precomp preparation now to avoid racing in paralell goroutines if the + // precomputed table doesn't exist. + _ = verkle.GetConfig() + + // Open database. + stack, _ := makeConfigNode(ctx) + defer stack.Close() + chaindb := utils.MakeChainDatabase(ctx, stack, false) + if chaindb == nil { + return errors.New("nil chaindb") + } + start := time.Now() - root := verkle.New() - // Iterate over files - for _, file := range files { + // The migration code converts trees partitioned by the first two bytes of + // the stem. We'll collect in secondLevelCommitment[a][b] the commitment of + // the InternalNode with stem [a, b, ...]. We'll then use this to build the + // first two layers of the tree. + var secondLevelCommitment [256][256][32]byte + + // List files and iterate on them + files, _ := ioutil.ReadDir(".") + for fileIdx, file := range files { // Check if file is a binary file fname := file.Name() - if !bytes.HasSuffix([]byte(fname), []byte(".bin")) || bytes.HasPrefix([]byte(fname), []byte("sorted-")) || len(fname) != 6 { + if !bytes.HasSuffix([]byte(fname), []byte(".bin")) || len(fname) != 6 { continue } - log.Info("Processing file", "name", file.Name()) - data, _ := ioutil.ReadFile(file.Name()) - numTuples := len(data) / 64 - tuples := make([][64]byte, 0, numTuples) - reader := bytes.NewReader(data) - for { - var tuple [64]byte - err := binary.Read(reader, binary.LittleEndian, &tuple) - if errors.Is(err, io.EOF) { - break + startFile := time.Now() + + // Read the file grouping leaves values by the first two bytes of the stem, and send them to secondLvlLeaves. + secondLvlLeaves := make(chan []verkle.BatchNewLeafNodeData) + go func() { + if err := getSortedLeavesData(fname, secondLvlLeaves); err != nil { + log.Crit("Failed to get sorted leaves data", "error", err) } - if err != nil { - panic(err) + close(secondLvlLeaves) + }() + + // Process secondLvlLeaves items and pipe the results to serializedTrees. + log.Info("Building tree", "name", file.Name()) + serializedTrees := make(chan []verkle.SerializedNode) + go func() { + // We read from the channel, and allow up to runtime.CPU() goroutines to process the data. + // This tries to use the most amount of CPUs, while also puts some backpressure on the channel + // to avoid using too much memory. + group, _ := errgroup.WithContext(context.Background()) + group.SetLimit(runtime.NumCPU()) + for leavesData := range secondLvlLeaves { + leavesData := leavesData + group.Go(func() error { + // We generate the LeafNodes in an optimized way. + leaves := verkle.BatchNewLeafNode(leavesData) + // We do an optimized tree construction from all the leaves at once. + // Note this is a partial tree since all the keys have the same first two bytes of the stem. + root := verkle.BatchInsertOrderedLeaves(leaves) + root.Commit() + + // Serialize all the nodes of the generated tree, which takes advantage of many optimizations. + nodes, err := root.BatchSerialize() + if err != nil { + return fmt.Errorf("failed to serialize nodes: %w", err) + } + + // Sort the serialized nodes by their CommitmentBytes, which tries to help the database with + // future compactions when inserting. + sort.Slice(nodes, func(i, j int) bool { + return bytes.Compare(nodes[i].CommitmentBytes[:], nodes[j].CommitmentBytes[:]) < 0 + }) + + // Remember: this is a partial tree where all the keys have the same first two bytes of the stem. + // We collect now all the commitments of the InternalNodes with stem [a, b, ...] + // in secondLevelCommitment[a][b]. Note that each goroutine is working on a different + // place in the array, so there's no race-condition. + stem := leavesData[0].Stem // All the leaves have the same first 2-byte stem, take the first one. + point := verkle.GetInternalNodeCommitment(root, stem[:2]) + secondLevelCommitment[stem[0]][stem[1]] = point.Bytes() + + // Send the nodes to serializedTrees which will write them to disk. + serializedTrees <- nodes + return nil + }) + } + if err := group.Wait(); err != nil { + log.Crit("Failed to build tree", "error", err) + } + close(serializedTrees) + }() + + // We receive serialized nodes from serializedTrees and write them to disk. + // We batch them into presumably optimal batches. Note this also puts backpressure + // to the previous channels if we can't write fast enough. That's useful because + // there's no reason to use more memory if things are lagging behind. Disk is slow. + log.Info("Serializing tree") + batch := chaindb.NewBatchWithSize(ethdb.IdealBatchSize) + for nodes := range serializedTrees { + for _, node := range nodes { + if err := batch.Put(node.CommitmentBytes[:], node.SerializedBytes); err != nil { + log.Crit("put node to disk: %s", err) + } + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Crit("write batch: %s", err) + } + batch.Reset() + } } - tuples = append(tuples, tuple) } - // Sort tuples by key - log.Info("Sorting file", "name", file.Name()) - sort.Slice(tuples, func(i, j int) bool { - return bytes.Compare(tuples[i][:32], tuples[j][:32]) < 0 - }) + // Just make sure to GC before the next file, so there's a bound of ~4GiB of memory used. + runtime.GC() - // Merge the values - log.Info("Merging file", "name", file.Name()) - file, _ := os.Create("sorted-" + file.Name()) - var ( - stem [31]byte - values = make([][]byte, 256) - last [31]byte - ) - if len(tuples) > 0 { - copy(last[:], tuples[0][:31]) - } - for i := range tuples { - copy(stem[:], tuples[i][:31]) - if stem != last { - binary.Write(file, binary.LittleEndian, last) - binary.Write(file, binary.LittleEndian, values) - - var istem [31]byte - istem = last - err := root.(*verkle.InternalNode).InsertStem(istem[:], values, nil) - if err != nil { - panic(err) - } - copy(last[:], stem[:]) - values = make([][]byte, 256) - } + historyAvgPerFile := time.Since(start) / time.Duration(fileIdx+1) + timeLeft := common.PrettyDuration(historyAvgPerFile * time.Duration(len(files)-fileIdx-1)) + log.Info("Subtree finished", "file", fname, "elapsed", common.PrettyDuration(time.Since(startFile)), "estimated_remaining", timeLeft.String()) + } - values[tuples[i][31]] = make([]byte, 32) - copy(values[tuples[i][31]], tuples[i][32:]) + // From all the commitments of the InternalNodes with stem [a, b, ...] we build + // and save the first two layers of the tree. + root := verkle.BuildFirstTwoLayers(secondLevelCommitment) + log.Info("Building tree finished", "root", fmt.Sprintf("%x", root.Commit().Bytes())) + nodes, err := root.BatchSerialize() + if err != nil { + return fmt.Errorf("failed to serialize nodes: %w", err) + } + for _, node := range nodes { + if err := chaindb.Put(node.CommitmentBytes[:], node.SerializedBytes); err != nil { + log.Crit("put node to disk: %s", err) } + } - // dump the last group - binary.Write(file, binary.LittleEndian, stem) - binary.Write(file, binary.LittleEndian, values) - err := root.(*verkle.InternalNode).InsertStem(stem[:], values, nil) - if err != nil { - panic(err) - } + log.Info("Finished", "elapsed", common.PrettyDuration(time.Since(start))) + return nil +} - // Committing file - log.Info("Committing file", "name", file.Name()) - root.Commit() +func getSortedLeavesData(fname string, secondLvlLeavesData chan []verkle.BatchNewLeafNodeData) error { + log.Info("Reading file", "name", fname) + data, err := ioutil.ReadFile(fname) + if err != nil { + return fmt.Errorf("failed to read file: %w", err) + } - // Write sorted tuples back to file - log.Info("Writing file", "name", file.Name()) - file.Close() + log.Info("Processing file", "name", fname) + numTuples := len(data) / 64 + tuples := make([][]byte, numTuples) + for i := 0; i < numTuples; i++ { + tuples[i] = data[i*64 : (i+1)*64] } - log.Info("Done", "root", fmt.Sprintf("%x", root.Commit().Bytes())) - log.Info("Finished", "elapsed", common.PrettyDuration(time.Since(start))) + // Sort tuples by key + log.Info("Sorting file", "name", fname) + sort.Slice(tuples, func(i, j int) bool { + return bytes.Compare(tuples[i][:32], tuples[j][:32]) < 0 + }) + + // Merge the values + log.Info("Merging file", "name", fname) + var ( + stem []byte + values = make(map[byte][]byte, 5) + last []byte + ) + if len(tuples) > 0 { + last = tuples[0][:31] + } + var leavesData []verkle.BatchNewLeafNodeData + for i := range tuples { + stem = tuples[i][:31] + if !bytes.Equal(stem, last) { + leavesData = append(leavesData, verkle.BatchNewLeafNodeData{Stem: last, Values: values}) + if stem[1] != last[1] { + secondLvlLeavesData <- leavesData + leavesData = make([]verkle.BatchNewLeafNodeData, 0, len(leavesData)) + } + last = stem + values = make(map[byte][]byte) + } + + values[tuples[i][31]] = tuples[i][32:] + } + leavesData = append(leavesData, verkle.BatchNewLeafNodeData{Stem: last, Values: values}) + secondLvlLeavesData <- leavesData + return nil } diff --git a/go.mod b/go.mod index 1369c43c71c3..082f8a2a06e8 100644 --- a/go.mod +++ b/go.mod @@ -23,7 +23,7 @@ require ( github.com/fjl/gencodec v0.0.0-20220412091415-8bb9e558978c github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5 github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff - github.com/gballet/go-verkle v0.0.0-20230413135631-4bea2763ed0f + github.com/gballet/go-verkle v0.0.0-20230413165055-0ebfd8549906 github.com/go-stack/stack v1.8.0 github.com/golang-jwt/jwt/v4 v4.3.0 github.com/golang/protobuf v1.5.2 diff --git a/go.sum b/go.sum index 728030c6ffcb..bb2cf3e8ab85 100644 --- a/go.sum +++ b/go.sum @@ -86,8 +86,6 @@ github.com/consensys/gnark-crypto v0.4.1-0.20210426202927-39ac3d4b3f1f/go.mod h1 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/crate-crypto/go-ipa v0.0.0-20230315201338-1643fdc2ead8 h1:2EBbIwPDRqlCD2K34Eojyy0x9d3RhOuHAZfbQm508X8= -github.com/crate-crypto/go-ipa v0.0.0-20230315201338-1643fdc2ead8/go.mod h1:gzbVz57IDJgQ9rLQwfSk696JGWof8ftznEL9GoAv3NI= github.com/crate-crypto/go-ipa v0.0.0-20230410135559-ce4a96995014 h1:bbyTlFQ12wkFA6aVL+9HrBZwVl85AN0VS/Bwam7o93U= github.com/crate-crypto/go-ipa v0.0.0-20230410135559-ce4a96995014/go.mod h1:gzbVz57IDJgQ9rLQwfSk696JGWof8ftznEL9GoAv3NI= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -137,10 +135,8 @@ github.com/garslo/gogen v0.0.0-20170306192744-1d203ffc1f61 h1:IZqZOB2fydHte3kUgx github.com/garslo/gogen v0.0.0-20170306192744-1d203ffc1f61/go.mod h1:Q0X6pkwTILDlzrGEckF6HKjXe48EgsY/l7K7vhY4MW8= github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff h1:tY80oXqGNY4FhTFhk+o9oFHGINQ/+vhlm8HFzi6znCI= github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff/go.mod h1:x7DCsMOv1taUwEWCzT4cmDeAkigA5/QCwUodaVOe8Ww= -github.com/gballet/go-verkle v0.0.0-20230317174103-141354da6b11 h1:x4hiQFgr1SlqR4IoAZiXLFZK4L7KbibqkORqa1fwKp8= -github.com/gballet/go-verkle v0.0.0-20230317174103-141354da6b11/go.mod h1:IyOnn1kujMWaT+wet/6Ix1BtvYwateOBy9puuWH/8sw= -github.com/gballet/go-verkle v0.0.0-20230413135631-4bea2763ed0f h1:gP4uR2/1qx6hsIzbRI28JWcsVuP7xyjyj6SpLnoFobc= -github.com/gballet/go-verkle v0.0.0-20230413135631-4bea2763ed0f/go.mod h1:P3bwGrLhsUNIsUDlq2yzMPvO1c/15oiB3JS85P+hNfw= +github.com/gballet/go-verkle v0.0.0-20230413165055-0ebfd8549906 h1:T/z0/Xg6VwrTdw6oZcQyw6vLjDF5+g/15ppwSWgBMP8= +github.com/gballet/go-verkle v0.0.0-20230413165055-0ebfd8549906/go.mod h1:P3bwGrLhsUNIsUDlq2yzMPvO1c/15oiB3JS85P+hNfw= github.com/getkin/kin-openapi v0.53.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4= github.com/getkin/kin-openapi v0.61.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -558,8 +554,6 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211020174200-9d6173849985/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220919091848-fb04ddd9f9c8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=