onflow · zhangchiqing · Dec 12, 2024 · Sep 13, 2024 · Nov 1, 2024 · Nov 1, 2024
@@ -2,7 +2,8 @@ package utils
 
 // The google storage API only provides md5 and crc32 hence overriding the linter flag for md5
 import (
-	"crypto/md5" //nolint:gosec
+	// #nosec
+	"crypto/md5"
 	"io"
 	"os"
 )

diff --git a/storage/batch.go b/storage/batch.go
@@ -1,13 +1,19 @@
 package storage
 
-import "github.com/dgraph-io/badger/v2"
+import (
+	"github.com/dgraph-io/badger/v2"
+)
 
+// Deprecated: Transaction is being deprecated as part of the transition from Badger to Pebble.
+// Use Writer instead of Transaction for all new code.
 type Transaction interface {
 	Set(key, val []byte) error
 }
 
 // BatchStorage serves as an abstraction over batch storage, adding ability to add ability to add extra
 // callbacks which fire after the batch is successfully flushed.
+// Deprecated: BatchStorage is being deprecated as part of the transition from Badger to Pebble.
+// Use ReaderBatchWriter instead of BatchStorage for all new code.
 type BatchStorage interface {
 	GetWriter() *badger.WriteBatch
 

diff --git a/storage/operation/badgerimpl/iterator.go b/storage/operation/badgerimpl/iterator.go
@@ -0,0 +1,71 @@
+package badgerimpl
+
+import (
+	"bytes"
+
+	"github.com/dgraph-io/badger/v2"
+
+	"github.com/onflow/flow-go/storage"
+)
+
+type badgerIterator struct {
+	iter       *badger.Iterator
+	lowerBound []byte
+	upperBound []byte
+}
+
+var _ storage.Iterator = (*badgerIterator)(nil)
+
+func newBadgerIterator(db *badger.DB, startPrefix, endPrefix []byte, ops storage.IteratorOption) *badgerIterator {
+	options := badger.DefaultIteratorOptions
+	if ops.IterateKeyOnly {
+		options.PrefetchValues = false
+	}
+
+	tx := db.NewTransaction(false)
+	iter := tx.NewIterator(options)
+
+	lowerBound, upperBound := storage.StartEndPrefixToLowerUpperBound(startPrefix, endPrefix)
+
+	return &badgerIterator{
+		iter:       iter,
+		lowerBound: lowerBound,
+		upperBound: upperBound,
+	}
+}
+
+// First seeks to the smallest key greater than or equal to the given key.
+func (i *badgerIterator) First() {
+	i.iter.Seek(i.lowerBound)
+}
+
+// Valid returns whether the iterator is positioned at a valid key-value pair.
+func (i *badgerIterator) Valid() bool {
+	// if it's beyond the upper bound, it's invalid
+	if !i.iter.Valid() {
+		return false
+	}
+	key := i.iter.Item().Key()
+	// "< 0" means "key < upperBound"
+	valid := bytes.Compare(key, i.upperBound) < 0
+	return valid
 // if it's beyond the upper bound, it's invalid 
 if !i.iter.Valid() { 
 	return false 
 } 
 key := i.iter.Item().Key() 
 // "< 0" means "key < upperBound" 
 valid := bytes.Compare(key, i.upperBound) < 0 
 return valid 
 // if it's beyond the upper bound, it's invalid 
 if !i.iter.Valid() { 
 	return false 
 } 
 key := i.iter.Item().Key() 
 // "< 0" means "key < upperBound" 
 valid := bytes.Compare(key, i.upperBound) < 0 
 return valid 
+}
 lowerBound, upperBound := storage.StartEndPrefixToLowerUpperBound(startPrefix, endPrefix) 
 lowerBound, upperBound := storage.StartEndPrefixToLowerUpperBound(startPrefix, endPrefix) 
+
+// Next advances the iterator to the next key-value pair.
+func (i *badgerIterator) Next() {
+	i.iter.Next()
+}
+
+// IterItem returns the current key-value pair, or nil if done.
+func (i *badgerIterator) IterItem() storage.IterItem {
+	return i.iter.Item()
+}
+
+var _ storage.IterItem = (*badger.Item)(nil)
+
+// Close closes the iterator. Iterator must be closed, otherwise it causes memory leak.
+// No errors expected during normal operation
+func (i *badgerIterator) Close() error {
+	i.iter.Close()
+	return nil
+}
diff --git a/storage/operation/badgerimpl/reader.go b/storage/operation/badgerimpl/reader.go
@@ -0,0 +1,63 @@
+package badgerimpl
+
+import (
+	"errors"
+	"io"
+
+	"github.com/dgraph-io/badger/v2"
+
+	"github.com/onflow/flow-go/module/irrecoverable"
+	"github.com/onflow/flow-go/storage"
+)
+
+type dbReader struct {
+	db *badger.DB
+}
+
+type noopCloser struct{}
+
+var _ io.Closer = (*noopCloser)(nil)
+
+func (noopCloser) Close() error { return nil }
+
+// Get gets the value for the given key. It returns ErrNotFound if the DB
+// does not contain the key.
+// other errors are exceptions
+//
+// The caller should not modify the contents of the returned slice, but it is
+// safe to modify the contents of the argument after Get returns. The
+// returned slice will remain valid until the returned Closer is closed. On
+// success, the caller MUST call closer.Close() or a memory leak will occur.
+func (b dbReader) Get(key []byte) ([]byte, io.Closer, error) {
+	tx := b.db.NewTransaction(false)
+	defer tx.Discard()
+
+	item, err := tx.Get(key)
+	if err != nil {
+		if errors.Is(err, badger.ErrKeyNotFound) {
+			return nil, nil, storage.ErrNotFound
+		}
+		return nil, nil, irrecoverable.NewExceptionf("could not load data: %w", err)
+	}
+
+	value, err := item.ValueCopy(nil)
+	if err != nil {
+		return nil, nil, irrecoverable.NewExceptionf("could not load value: %w", err)
+	}
+
+	return value, noopCloser{}, nil
+}
+
+// NewIter returns a new Iterator for the given key prefix range [startPrefix, endPrefix], both inclusive.
+// Specifically, all keys that meet ANY of the following conditions are included in the iteration:
+//   - have a prefix equal to startPrefix OR
+//   - have a prefix equal to the endPrefix OR
+//   - have a prefix that is lexicographically between startPrefix and endPrefix
+func (b dbReader) NewIter(startPrefix, endPrefix []byte, ops storage.IteratorOption) (storage.Iterator, error) {
+	return newBadgerIterator(b.db, startPrefix, endPrefix, ops), nil
+}
+
+// ToReader is a helper function to convert a *badger.DB to a Reader
+func ToReader(db *badger.DB) storage.Reader {
+	return dbReader{db}
+}
diff --git a/storage/operation/badgerimpl/writer.go b/storage/operation/badgerimpl/writer.go
@@ -0,0 +1,120 @@
+package badgerimpl
+
+import (
+	"fmt"
+
+	"github.com/dgraph-io/badger/v2"
+
+	"github.com/onflow/flow-go/storage"
+	"github.com/onflow/flow-go/storage/operation"
+	op "github.com/onflow/flow-go/storage/operation"
+)
+
+type ReaderBatchWriter struct {
+	globalReader storage.Reader
+	batch        *badger.WriteBatch
+
+	callbacks op.Callbacks
+}
+
+var _ storage.ReaderBatchWriter = (*ReaderBatchWriter)(nil)
+
+// GlobalReader returns a database-backed reader which reads the latest committed global database state ("read-committed isolation").
+// This reader will not read writes written to ReaderBatchWriter.Writer until the write batch is committed.
+// This reader may observe different values for the same key on subsequent reads.
+func (b *ReaderBatchWriter) GlobalReader() storage.Reader {
+	return b.globalReader
+}
+
+// Writer returns a writer associated with a batch of writes. The batch is pending until it is committed.
+// When we `Write` into the batch, that write operation is added to the pending batch, but not committed.
+// The commit operation is atomic w.r.t. the batch; either all writes are applied to the database, or no writes are.
+// Note:
+// - The writer cannot be used concurrently for writing.
+func (b *ReaderBatchWriter) Writer() storage.Writer {
+	return b
+}
+
+// BadgerWriteBatch returns the badger write batch
+func (b *ReaderBatchWriter) BadgerWriteBatch() *badger.WriteBatch {
+	return b.batch
+}
+
+// AddCallback adds a callback to execute after the batch has been flush
+// regardless the batch update is succeeded or failed.
+// The error parameter is the error returned by the batch update.
+func (b *ReaderBatchWriter) AddCallback(callback func(error)) {
+	b.callbacks.AddCallback(callback)
+}
+
+// Commit flushes the batch to the database.
+// No errors expected during normal operation
+func (b *ReaderBatchWriter) Commit() error {
+	err := b.batch.Flush()
+
+	b.callbacks.NotifyCallbacks(err)
+
+	return err
+}
+
+func WithReaderBatchWriter(db *badger.DB, fn func(storage.ReaderBatchWriter) error) error {
+	batch := NewReaderBatchWriter(db)
+
+	err := fn(batch)
+	if err != nil {
+		// fn might use lock to ensure concurrent safety while reading and writing data
+		// and the lock is usually released by a callback.
+		// in other words, fn might hold a lock to be released by a callback,
+		// we need to notify the callback for the locks to be released before
+		// returning the error.
+		batch.callbacks.NotifyCallbacks(err)
+		return err
+	}
+
+	return batch.Commit()
+}
+
+func NewReaderBatchWriter(db *badger.DB) *ReaderBatchWriter {
+	return &ReaderBatchWriter{
+		globalReader: ToReader(db),
+		batch:        db.NewWriteBatch(),
+	}
+}
+
+var _ storage.Writer = (*ReaderBatchWriter)(nil)
+
+// Set sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+//
+// It is safe to modify the contents of the arguments after Set returns.
+// No errors expected during normal operation
+func (b *ReaderBatchWriter) Set(key, value []byte) error {
+	return b.batch.Set(key, value)
+}
+
+// Delete deletes the value for the given key. Deletes are blind all will
+// succeed even if the given key does not exist.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+// No errors expected during normal operation
+func (b *ReaderBatchWriter) Delete(key []byte) error {
+	return b.batch.Delete(key)
+}
+
+// DeleteByRange removes all keys with a prefix that falls within the
+// range [start, end], both inclusive.
+// No errors expected during normal operation
+func (b *ReaderBatchWriter) DeleteByRange(globalReader storage.Reader, startPrefix, endPrefix []byte) error {
+	err := operation.IterateKeysInPrefixRange(startPrefix, endPrefix, func(key []byte) error {
+		err := b.batch.Delete(key)
+		if err != nil {
+			return fmt.Errorf("could not add key to delete batch (%v): %w", key, err)
+		}
+		return nil
+	})(globalReader)
+
+	if err != nil {
+		return fmt.Errorf("could not find keys by range to be deleted: %w", err)
+	}
+	return nil
+}
diff --git a/storage/operation/callbacks.go b/storage/operation/callbacks.go
@@ -0,0 +1,24 @@
+package operation
+
+import "sync"
+
+type Callbacks struct {
+	sync.Mutex // protect callbacks
+	callbacks  []func(error)
+}
+
+func (b *Callbacks) AddCallback(callback func(error)) {
+	b.Lock()
+	defer b.Unlock()
+
+	b.callbacks = append(b.callbacks, callback)
+}
+
+func (b *Callbacks) NotifyCallbacks(err error) {
+	b.Lock()
+	defer b.Unlock()
+
+	for _, callback := range b.callbacks {
+		callback(err)
+	}
+}
diff --git a/storage/operation/codec.go b/storage/operation/codec.go
@@ -0,0 +1,34 @@
+package operation
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/onflow/flow-go/model/flow"
+)
+
+// EncodeKeyPart encodes a value to be used as a part of a key to be stored in storage.
+func EncodeKeyPart(v interface{}) []byte {
+	switch i := v.(type) {
+	case uint8:
+		return []byte{i}
+	case uint32:
+		b := make([]byte, 4)
+		binary.BigEndian.PutUint32(b, i)
+		return b
+	case uint64:
+		b := make([]byte, 8)
+		binary.BigEndian.PutUint64(b, i)
+		return b
+	case string:
+		return []byte(i)
+	case flow.Role:
+		return []byte{byte(i)}
+	case flow.Identifier:
+		return i[:]
+	case flow.ChainID:
+		return []byte(i)
+	default:
+		panic(fmt.Sprintf("unsupported type to convert (%T)", v))
+	}
+}