Skip to content

Commit

Permalink
use simple delta encoding for key offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
yihuang committed Feb 24, 2023
1 parent 4d031b6 commit 3f726af
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 15 deletions.
9 changes: 5 additions & 4 deletions memiavl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,18 @@ IAVL snapshot is composed by four files:

The implementation will read the mmap-ed content in a zero-copy way, won't use extra node cache, it will only rely on the OS page cache.

- `keys`, sequence of leaf node keys, ordered and no duplication, the offsets are appended to the end of the file, user can look up the key offset by leaf node index.
- `keys`, sequence of leaf node keys, ordered and no duplication, the offsets are encoded with custom format and appended to the end of the file, support query by leaf node index.

```
payload
*repeat*
key offset: uint32
offset restart: uint64
delta offsets: [65535]uint32
*repeat*
offset: uint64 // begin offset of the offsets table
offset: uint64 // beginning offset of the above table
```

- `values`, sequence of leaf node values, the offsets are encoded with elias-fano coding and appended to the end of the file, user can look up the key offset by leaf node index.
- `values`, sequence of leaf node values, the offsets are encoded with elias-fano coding and appended to the end of the file, support query by leaf node index.

```
payload
Expand Down
25 changes: 20 additions & 5 deletions memiavl/layout_little_endian.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,26 @@ type PlainOffsetTable struct {
data []byte
}

func (t PlainOffsetTable) Get2(i uint64) (uint32, uint32) {
offset := i * 4
start := binary.LittleEndian.Uint32(t.data[offset:])
end := binary.LittleEndian.Uint32(t.data[offset+4:])
return start, end
func (t PlainOffsetTable) Get2(i uint64) (uint64, uint64) {
ichunk := i / OffsetRestartInteval
ii := i % OffsetRestartInteval
irestart := ichunk * (OffsetRestartInteval + 1) * 4
data := t.data[irestart:]

_ = data[3*4-1]
restart := binary.LittleEndian.Uint64(data[:8])

if ii == 0 {
return restart, restart + uint64(binary.LittleEndian.Uint32(data[8:12]))
}
if ii == OffsetRestartInteval-1 {
// the next one is at the beginning of the next chunk
return restart + uint64(binary.LittleEndian.Uint32(data[OffsetRestartInteval*4:])),
binary.LittleEndian.Uint64(data[(OffsetRestartInteval+1)*4:])
}
// the next one is in the same chunk
return restart + uint64(binary.LittleEndian.Uint32(data[(ii+1)*4:])),
restart + uint64(binary.LittleEndian.Uint32(data[(ii+2)*4:]))
}

func NewPlainOffsetTable(data []byte) (PlainOffsetTable, error) {
Expand Down
20 changes: 18 additions & 2 deletions memiavl/layout_native.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,24 @@ type PlainOffsetTable struct {
data []uint32
}

func (t PlainOffsetTable) Get2(i uint64) (uint32, uint32) {
return t.data[i], t.data[i+1]
func (t PlainOffsetTable) Get2(i uint64) (uint64, uint64) {
ichunk := i / OffsetRestartInteval
ii := i % OffsetRestartInteval
irestart := ichunk * (OffsetRestartInteval + 1)
data := t.data[irestart:]

_ = data[2]
restart := uint64(data[0]) | uint64(data[1])<<32

if ii == 0 {
return restart, restart + uint64(data[2])
}
if ii == OffsetRestartInteval-1 {
data2 := data[OffsetRestartInteval+1:]
_ = data2[1]
return restart + uint64(data[OffsetRestartInteval]), uint64(data2[0]) | uint64(data2[1])<<32
}
return restart + uint64(data[ii+1]), restart + uint64(data[ii+2])
}

func NewPlainOffsetTable(buf []byte) (PlainOffsetTable, error) {
Expand Down
21 changes: 17 additions & 4 deletions memiavl/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ const (
EmptyRootNodeIndex = math.MaxUint32

Alignment = 8

OffsetRestartInteval = 65536
)

// Snapshot manage the lifecycle of mmap-ed files for the snapshot,
Expand Down Expand Up @@ -498,14 +500,25 @@ func writePadding(w io.Writer, offset uint64) (uint64, error) {

// writePlainOffsets writes the offset table in plain little-endian format
func writePlainOffsets(w io.Writer, bitmap *roaring64.Bitmap) error {
var numBuf [4]byte
var numBuf [8]byte
it := bitmap.Iterator()
var counter, restart uint64
for it.HasNext() {
v := it.Next()
binary.LittleEndian.PutUint32(numBuf[:], uint32(v))
if _, err := w.Write(numBuf[:]); err != nil {
return err
if counter%OffsetRestartInteval == 0 {
binary.LittleEndian.PutUint64(numBuf[:], v)
restart = v

if _, err := w.Write(numBuf[:]); err != nil {
return err
}
} else {
binary.LittleEndian.PutUint32(numBuf[:], uint32(v-restart))
if _, err := w.Write(numBuf[:4]); err != nil {
return err
}
}
counter++
}
return nil
}
Expand Down

0 comments on commit 3f726af

Please sign in to comment.