Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

store/cachekv, x/bank/types: algorithmically fix pathologically slow code #8719

Merged
merged 4 commits into from
Feb 27, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions store/cachekv/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"bytes"
"container/list"
"io"
"reflect"
"sort"
"sync"
"time"
"unsafe"

dbm "github.com/tendermint/tm-db"

Expand Down Expand Up @@ -177,18 +179,48 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
return newCacheMergeIterator(parent, cache, ascending)
}

// strToByte is meant to make a zero allocation conversion
// from string -> []byte to speed up operations, it is not meant
// to be used generally, but for a specific pattern to check for available
// keys within a domain.
func strToByte(s string) []byte {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly memo to self: there is another implementation in types/address/hash.go. We'll eventually need to 1. find a place where to put these functions (and I hope we won't come up with another catch-all util package) 2. reduce code duplication

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, we can do so later indeed, but for now this'll suffice and keep the PR focused :-)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely agree! Thanks for the great work! 🙏

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great job @odeke-em. Are you planning to merge str/bytes convertion functions? Otherwise I can do a quick PR for that.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @robert-zaremba! Please go ahead and add them, but perhaps we should put them in an internal package so that no other packages out of the cosmos-sdk can import them.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, 👍

var b []byte
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
hdr.Cap = len(s)
hdr.Len = len(s)
hdr.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
return b
}

// byteSliceToStr is meant to make a zero allocation conversion
// from []byte -> string to speed up operations, it is not meant
// to be used generally, but for a specific pattern to delete keys
// from a map.
func byteSliceToStr(b []byte) string {
hdr := (*reflect.StringHeader)(unsafe.Pointer(&b))
return *(*string)(unsafe.Pointer(hdr))
}

// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
unsorted := make([]*kv.Pair, 0)

n := len(store.unsortedCache)
for key := range store.unsortedCache {
cacheValue := store.cache[key]

if dbm.IsKeyInDomain([]byte(key), start, end) {
if dbm.IsKeyInDomain(strToByte(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}

if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
}
} else { // Otherwise, normally delete the unsorted keys from the map.
for _, kv := range unsorted {
delete(store.unsortedCache, byteSliceToStr(kv.Key))
}
}

sort.Slice(unsorted, func(i, j int) bool {
Expand Down
45 changes: 38 additions & 7 deletions x/bank/types/balance.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,47 @@ func (b Balance) Validate() error {

// SanitizeGenesisBalances sorts addresses and coin sets.
func SanitizeGenesisBalances(balances []Balance) []Balance {
sort.Slice(balances, func(i, j int) bool {
addr1, _ := sdk.AccAddressFromBech32(balances[i].Address)
addr2, _ := sdk.AccAddressFromBech32(balances[j].Address)
return bytes.Compare(addr1.Bytes(), addr2.Bytes()) < 0
})

// Given that this function sorts balances, using the standard library's
// Quicksort based algorithms, we have algorithmic complexities of:
// * Best case: O(nlogn)
// * Worst case: O(n^2)
// The comparator used MUST be cheap to use lest we incur expenses like we had
// before whereby sdk.AccAddressFromBech32, which is a very expensive operation
// compared n * n elements yet discarded computations each time, as per:
// https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734
// with this change the first step is to extract out and singly produce the values
// that'll be used for comparisons and keep them cheap and fast.

// 1. Retrieve the byte equivalents for each Balance's address and maintain a mapping of
// its Balance, as the mapper will be used in sorting.
type addrToBalance struct {
// We use a pointer here to avoid averse effects of value copying
// wasting RAM all around.
balance *Balance
accAddr []byte
}
adL := make([]*addrToBalance, 0, len(balances))
for _, balance := range balances {
balance.Coins = balance.Coins.Sort()
balance := balance
addr, _ := sdk.AccAddressFromBech32(balance.Address)
adL = append(adL, &addrToBalance{
balance: &balance,
accAddr: []byte(addr),
})
}

// 2. Sort with the cheap mapping, using the mapper's
// already accAddr bytes values which is a cheaper operation.
sort.Slice(adL, func(i, j int) bool {
ai, aj := adL[i], adL[j]
return bytes.Compare(ai.accAddr, aj.accAddr) < 0
})

// 3. To complete the sorting, we have to now just insert
// back the balances in the order returned by the sort.
for i, ad := range adL {
balances[i] = *ad.balance
}
return balances
}

Expand Down
78 changes: 78 additions & 0 deletions x/bank/types/balance_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package types_test

import (
"bytes"
"testing"

"github.com/stretchr/testify/require"

"github.com/cosmos/cosmos-sdk/crypto/keys/ed25519"
sdk "github.com/cosmos/cosmos-sdk/types"
bank "github.com/cosmos/cosmos-sdk/x/bank/types"
)
Expand Down Expand Up @@ -101,3 +103,79 @@ func TestBalance_GetAddress(t *testing.T) {
})
}
}

func TestSanitizeBalances(t *testing.T) {
// 1. Generate balances
tokens := sdk.TokensFromConsensusPower(81)
coin := sdk.NewCoin("benchcoin", tokens)
coins := sdk.Coins{coin}
addrs, _ := makeRandomAddressesAndPublicKeys(20)

var balances []bank.Balance
for _, addr := range addrs {
balances = append(balances, bank.Balance{
Address: addr.String(),
Coins: coins,
})
}
// 2. Sort the values.
sorted := bank.SanitizeGenesisBalances(balances)

// 3. Compare and ensure that all the values are sorted in ascending order.
// Invariant after sorting:
// a[i] <= a[i+1...n]
for i := 0; i < len(sorted); i++ {
ai := sorted[i]
// Ensure that every single value that comes after i is less than it.
for j := i + 1; j < len(sorted); j++ {
aj := sorted[j]

if got := bytes.Compare(ai.GetAddress(), aj.GetAddress()); got > 0 {
t.Errorf("Balance(%d) > Balance(%d)", i, j)
}
}
}
}

func makeRandomAddressesAndPublicKeys(n int) (accL []sdk.AccAddress, pkL []*ed25519.PubKey) {
for i := 0; i < n; i++ {
pk := ed25519.GenPrivKey().PubKey().(*ed25519.PubKey)
pkL = append(pkL, pk)
accL = append(accL, sdk.AccAddress(pk.Address()))
}
return accL, pkL
}

var sink, revert interface{}

func BenchmarkSanitizeBalances500(b *testing.B) {
benchmarkSanitizeBalances(b, 500)
}

func BenchmarkSanitizeBalances1000(b *testing.B) {
benchmarkSanitizeBalances(b, 1000)
}

func benchmarkSanitizeBalances(b *testing.B, nAddresses int) {
b.ReportAllocs()
tokens := sdk.TokensFromConsensusPower(81)
coin := sdk.NewCoin("benchcoin", tokens)
coins := sdk.Coins{coin}
addrs, _ := makeRandomAddressesAndPublicKeys(nAddresses)

b.ResetTimer()
for i := 0; i < b.N; i++ {
var balances []bank.Balance
for _, addr := range addrs {
balances = append(balances, bank.Balance{
Address: addr.String(),
Coins: coins,
})
}
sink = bank.SanitizeGenesisBalances(balances)
}
if sink == nil {
b.Fatal("Benchmark did not run")
}
sink = revert
}