From 3f726af88b6be3310e40b82fb8520166080e6bad Mon Sep 17 00:00:00 2001 From: HuangYi Date: Fri, 24 Feb 2023 17:58:08 +0800 Subject: [PATCH] use simple delta encoding for key offsets --- memiavl/README.md | 9 +++++---- memiavl/layout_little_endian.go | 25 ++++++++++++++++++++----- memiavl/layout_native.go | 20 ++++++++++++++++++-- memiavl/snapshot.go | 21 +++++++++++++++++---- 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/memiavl/README.md b/memiavl/README.md index ca90911ccc..98237f7967 100644 --- a/memiavl/README.md +++ b/memiavl/README.md @@ -97,17 +97,18 @@ IAVL snapshot is composed by four files: The implementation will read the mmap-ed content in a zero-copy way, won't use extra node cache, it will only rely on the OS page cache. -- `keys`, sequence of leaf node keys, ordered and no duplication, the offsets are appended to the end of the file, user can look up the key offset by leaf node index. +- `keys`, sequence of leaf node keys, ordered and no duplication, the offsets are encoded with custom format and appended to the end of the file, support query by leaf node index. ``` payload *repeat* - key offset: uint32 + offset restart: uint64 + delta offsets: [65535]uint32 *repeat* - offset: uint64 // begin offset of the offsets table + offset: uint64 // beginning offset of the above table ``` -- `values`, sequence of leaf node values, the offsets are encoded with elias-fano coding and appended to the end of the file, user can look up the key offset by leaf node index. +- `values`, sequence of leaf node values, the offsets are encoded with elias-fano coding and appended to the end of the file, support query by leaf node index. ``` payload diff --git a/memiavl/layout_little_endian.go b/memiavl/layout_little_endian.go index 21ee1d6af9..6e7d9e91d6 100644 --- a/memiavl/layout_little_endian.go +++ b/memiavl/layout_little_endian.go @@ -53,11 +53,26 @@ type PlainOffsetTable struct { data []byte } -func (t PlainOffsetTable) Get2(i uint64) (uint32, uint32) { - offset := i * 4 - start := binary.LittleEndian.Uint32(t.data[offset:]) - end := binary.LittleEndian.Uint32(t.data[offset+4:]) - return start, end +func (t PlainOffsetTable) Get2(i uint64) (uint64, uint64) { + ichunk := i / OffsetRestartInteval + ii := i % OffsetRestartInteval + irestart := ichunk * (OffsetRestartInteval + 1) * 4 + data := t.data[irestart:] + + _ = data[3*4-1] + restart := binary.LittleEndian.Uint64(data[:8]) + + if ii == 0 { + return restart, restart + uint64(binary.LittleEndian.Uint32(data[8:12])) + } + if ii == OffsetRestartInteval-1 { + // the next one is at the beginning of the next chunk + return restart + uint64(binary.LittleEndian.Uint32(data[OffsetRestartInteval*4:])), + binary.LittleEndian.Uint64(data[(OffsetRestartInteval+1)*4:]) + } + // the next one is in the same chunk + return restart + uint64(binary.LittleEndian.Uint32(data[(ii+1)*4:])), + restart + uint64(binary.LittleEndian.Uint32(data[(ii+2)*4:])) } func NewPlainOffsetTable(data []byte) (PlainOffsetTable, error) { diff --git a/memiavl/layout_native.go b/memiavl/layout_native.go index 18d4eb12c9..f2a8b9cd06 100644 --- a/memiavl/layout_native.go +++ b/memiavl/layout_native.go @@ -65,8 +65,24 @@ type PlainOffsetTable struct { data []uint32 } -func (t PlainOffsetTable) Get2(i uint64) (uint32, uint32) { - return t.data[i], t.data[i+1] +func (t PlainOffsetTable) Get2(i uint64) (uint64, uint64) { + ichunk := i / OffsetRestartInteval + ii := i % OffsetRestartInteval + irestart := ichunk * (OffsetRestartInteval + 1) + data := t.data[irestart:] + + _ = data[2] + restart := uint64(data[0]) | uint64(data[1])<<32 + + if ii == 0 { + return restart, restart + uint64(data[2]) + } + if ii == OffsetRestartInteval-1 { + data2 := data[OffsetRestartInteval+1:] + _ = data2[1] + return restart + uint64(data[OffsetRestartInteval]), uint64(data2[0]) | uint64(data2[1])<<32 + } + return restart + uint64(data[ii+1]), restart + uint64(data[ii+2]) } func NewPlainOffsetTable(buf []byte) (PlainOffsetTable, error) { diff --git a/memiavl/snapshot.go b/memiavl/snapshot.go index 27c95eb908..762e77d784 100644 --- a/memiavl/snapshot.go +++ b/memiavl/snapshot.go @@ -29,6 +29,8 @@ const ( EmptyRootNodeIndex = math.MaxUint32 Alignment = 8 + + OffsetRestartInteval = 65536 ) // Snapshot manage the lifecycle of mmap-ed files for the snapshot, @@ -498,14 +500,25 @@ func writePadding(w io.Writer, offset uint64) (uint64, error) { // writePlainOffsets writes the offset table in plain little-endian format func writePlainOffsets(w io.Writer, bitmap *roaring64.Bitmap) error { - var numBuf [4]byte + var numBuf [8]byte it := bitmap.Iterator() + var counter, restart uint64 for it.HasNext() { v := it.Next() - binary.LittleEndian.PutUint32(numBuf[:], uint32(v)) - if _, err := w.Write(numBuf[:]); err != nil { - return err + if counter%OffsetRestartInteval == 0 { + binary.LittleEndian.PutUint64(numBuf[:], v) + restart = v + + if _, err := w.Write(numBuf[:]); err != nil { + return err + } + } else { + binary.LittleEndian.PutUint32(numBuf[:], uint32(v-restart)) + if _, err := w.Write(numBuf[:4]); err != nil { + return err + } } + counter++ } return nil }