Skip to content

Commit

Permalink
Pre-allocate vids for branches
Browse files Browse the repository at this point in the history
Each branch node may have up to 16 sub-items - currently, these are
given VertexID based when they are first needed leading to a
mostly-random order of vertexid for each subitem.

Here, we pre-allocate all 16 vertex ids such that when a branch subitem
is filled, it already has a vertexid waiting for it. This brings several
important benefits:

* subitems are sorted and "close" in their id sequencing - this means
that when rocksdb stores them, they are likely to end up in the same
data block thus improving read efficiency
* because the ids are consequtive, we can store just the starting id and
a bitmap representing which subitems are in use - this reduces disk
space usage for branches allowing more of them fit into a single disk
read, further improving disk read and caching performance - disk usage
at block 18M is down from 84 to 78gb!
* the in-memory footprint of VertexRef reduced allowing more instances
to fit into caches and less memory to be used overall.

Because of the increased locality of reference, it turns out that we no
longer need to iterate over the entire database to efficiently generate
the hash key database because the normal computation is now faster -
this significantly benefits "live" chain processing as well where each
dirtied key must be accompanied by a read of all branch subitems next to
it - most of the performance benefit in this branch comes from this
locality-of-reference improvement.

On a sample resync, there's already ~20% improvement with later blocks
seeing increasing benefit (because the trie is deeper in later blocks
leading to more benefit from branch read perf improvements)

```
blocks: 18729664, baseline: 190h43m49s, contender: 153h59m0s
Time (total): -36h44m48s, -19.27%
```

Note: clients need to be resynced as the PR changes the on-disk format

R.I.P. little bloom filter - your life in the repo was short but
valuable
  • Loading branch information
arnetheduck committed Dec 3, 2024
1 parent 9da3f29 commit 38ea2f1
Show file tree
Hide file tree
Showing 31 changed files with 418 additions and 701 deletions.
185 changes: 71 additions & 114 deletions nimbus/db/aristo/aristo_blobify.nim
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ proc blobifyTo*(pyl: LeafPayload, data: var seq[byte]) =
data &= pyl.stoData.blobify().data
data &= [0x20.byte]

proc blobifyTo*(vtx: VertexRef; key: HashKey, data: var seq[byte]): Result[void,AristoError] =
proc blobifyTo*(vtx: VertexRef, key: HashKey, data: var seq[byte]) =
## This function serialises the vertex argument to a database record.
## Contrary to RLP based serialisation, these records aim to align on
## fixed byte boundaries.
Expand All @@ -181,72 +181,53 @@ proc blobifyTo*(vtx: VertexRef; key: HashKey, data: var seq[byte]): Result[void,
## ::
## 8 * n * ((access shr (n * 4)) and 15)
##
if not vtx.isValid:
return err(BlobifyNilVertex)
case vtx.vType:
of Branch:
let code = if key.isValid:
data.add byte(key.len)
data.add key.data()
# TODO using 0 here for legacy reasons - a bit flag would be easier
0'u8 shl 6
else:
2'u8 shl 6
var
lens = 0u64
pos = data.len
for n in 0..15:
if vtx.bVid[n].isValid:
let tmp = vtx.bVid[n].blobify()
lens += uint64(tmp.len) shl (n * 4)
data &= tmp.data()
if data.len == pos:
return err(BlobifyBranchMissingRefs)
doAssert vtx.isValid

let
pSegm =
if vtx.pfx.len > 0:
vtx.pfx.toHexPrefix(isleaf = false)
else:
default(HexPrefixBuf)
psLen = pSegm.len.byte
if 33 < psLen:
return err(BlobifyExtPathOverflow)

data &= pSegm.data()
data &= lens.toBytesBE
data &= [code or psLen]

of Leaf:
let
pSegm = vtx.pfx.toHexPrefix(isleaf = true)
psLen = pSegm.len.byte
if psLen == 0 or 33 < psLen:
return err(BlobifyLeafPathOverflow)
vtx.lData.blobifyTo(data)
data &= pSegm.data()
data &= [(3'u8 shl 6) or psLen]

ok()
let
bits =
case vtx.vType
of Branch:
let bits =
if key.isValid and key.len == 32:
# Shorter keys can be loaded from the vertex directly
data.add key.data()
0b10'u8
else:
0b00'u8

data.add vtx.startVid.blobify().data()
data.add toBytesBE(vtx.used)
bits
of Leaf:
vtx.lData.blobifyTo(data)
0b01'u8

pSegm =
if vtx.pfx.len > 0:
vtx.pfx.toHexPrefix(isleaf = vtx.vType == Leaf)
else:
default(HexPrefixBuf)
psLen = pSegm.len.byte

data &= pSegm.data()
data &= [(bits shl 6) or psLen]

proc blobify*(vtx: VertexRef, key: HashKey): seq[byte] =
## Variant of `blobify()`
result = newSeqOfCap[byte](128)
if vtx.blobifyTo(key, result).isErr:
result.setLen(0) # blobify only fails on invalid verticies
vtx.blobifyTo(key, result)

proc blobifyTo*(lSst: SavedState; data: var seq[byte]): Result[void,AristoError] =
proc blobifyTo*(lSst: SavedState; data: var seq[byte]) =
## Serialise a last saved state record
data.add lSst.key.data
data.add lSst.serial.toBytesBE
data.add @[0x7fu8]
ok()

proc blobify*(lSst: SavedState): Result[seq[byte],AristoError] =
proc blobify*(lSst: SavedState): seq[byte] =
## Variant of `blobify()`
var data: seq[byte]
? lSst.blobifyTo data
ok(move(data))
lSst.blobifyTo data
data

# -------------
proc deblobify(
Expand Down Expand Up @@ -296,79 +277,55 @@ proc deblobifyType*(record: openArray[byte]; T: type VertexRef):
if record.len < 3: # minimum `Leaf` record
return err(DeblobVtxTooShort)

ok case record[^1] shr 6:
of 0, 2: Branch
of 3: Leaf
ok if ((record[^1] shr 6) and 0b01'u8) > 0:
Leaf
else:
return err(DeblobUnknown)
Branch

proc deblobify*(
record: openArray[byte];
T: type VertexRef;
): Result[T,AristoError] =
## De-serialise a data record encoded with `blobify()`. The second
## argument `vtx` can be `nil`.
if record.len < 3: # minimum `Leaf` record
if record.len < 3: # minimum `Leaf` record
return err(DeblobVtxTooShort)
let kind = record[^1] shr 6
let start = if kind == 0:
int(record[0] + 1)
else:
0
ok case kind:
of 0, 2: # `Branch` vertex
if record.len - start < 11: # at least two edges
return err(DeblobBranchTooShort)
let
aInx = record.len - 9
aIny = record.len - 2
var
offs = start
lens = uint64.fromBytesBE record.toOpenArray(aInx, aIny) # bitmap
vtxList: array[16,VertexID]
n = 0
while lens != 0:
let len = lens and 0b1111
if len > 0:
vtxList[n] = VertexID(? load64(record, offs, int(len)))
inc n
lens = lens shr 4

let (isLeaf, pathSegment) =
NibblesBuf.fromHexPrefix record.toOpenArray(offs, aInx - 1)
if isLeaf:
return err(DeblobBranchGotLeafPrefix)

# End `while`
VertexRef(
vType: Branch,
pfx: pathSegment,
bVid: vtxList)

of 3: # `Leaf` vertex

let
bits = record[^1] shr 6
vType = if (bits and 0b01'u8) > 0: Leaf else: Branch
hasKey = (bits and 0b10'u8) > 0
psLen = int(record[^1] and 0b00111111)
start = if hasKey: 32 else: 0

if psLen > record.len - 2 or start > record.len - 2 - psLen:
return err(DeblobBranchTooShort)

let
psPos = record.len - psLen - 1
(_, pathSegment) =
NibblesBuf.fromHexPrefix record.toOpenArray(psPos, record.len - 2)

ok case vType
of Branch:
var pos = start
let
sLen = record[^1].int and 0x3f # length of path segment
rLen = record.len - 1 # payload + path segment
pLen = rLen - sLen # payload length
if rLen < sLen or pLen < 1:
return err(DeblobLeafSizeGarbled)
let (isLeaf, pathSegment) =
NibblesBuf.fromHexPrefix record.toOpenArray(pLen, rLen-1)
if not isLeaf:
return err(DeblobLeafGotExtPrefix)
let vtx = VertexRef(
vType: Leaf,
pfx: pathSegment)

? record.toOpenArray(start, pLen - 1).deblobify(vtx.lData)
vtx
svLen = psPos - pos - 2
startVid = VertexID(?load64(record, pos, svLen))
used = uint16.fromBytesBE(record.toOpenArray(pos, pos + 1))

else:
return err(DeblobUnknown)
pos += 2

VertexRef(vType: Branch, pfx: pathSegment, startVid: startVid, used: used)
of Leaf:
let vtx = VertexRef(vType: Leaf, pfx: pathSegment)

?record.toOpenArray(start, psPos - 1).deblobify(vtx.lData)
vtx

proc deblobify*(record: openArray[byte], T: type HashKey): Opt[HashKey] =
if record.len > 1 and ((record[^1] shr 6) == 0) and (int(record[0]) + 1) < record.len:
HashKey.fromBytes(record.toOpenArray(1, int(record[0])))
if record.len > 33 and (((record[^1] shr 6) and 0b10'u8) > 0):
HashKey.fromBytes(record.toOpenArray(0, 31))
else:
Opt.none(HashKey)

Expand Down
9 changes: 4 additions & 5 deletions nimbus/db/aristo/aristo_check/check_be.nim
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef](
of Branch:
block check42Links:
var seen = false
for n in 0 .. 15:
if vtx.bVid[n].isValid:
if seen:
break check42Links
seen = true
for _, _ in vtx.pairs():
if seen:
break check42Links
seen = true
return err((rvid.vid,CheckBeVtxBranchLinksMissing))

for (rvid,key) in T.walkKeyBe db:
Expand Down
9 changes: 4 additions & 5 deletions nimbus/db/aristo/aristo_check/check_top.nim
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,10 @@ proc checkTopCommon*(
of Branch:
block check42Links:
var seen = false
for n in 0 .. 15:
if vtx.bVid[n].isValid:
if seen:
break check42Links
seen = true
for _, _ in vtx.pairs():
if seen:
break check42Links
seen = true
return err((rvid.vid,CheckAnyVtxBranchLinksMissing))
else:
nNilVtx.inc
Expand Down
Loading

0 comments on commit 38ea2f1

Please sign in to comment.