Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Farm Hash conditioned upon nimPreviewHashFarm as 64-bit Hash #23735

Merged
merged 2 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelogs/changelog_2_2_0.md
ringabout marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

## Standard library additions and changes

`nimPreviewHashFarm` has been added to `lib/pure/hashes.nim` to activate a
64-bit string Hash producer (based upon Google's Farm Hash) which is also
much faster than the present one. At present, this is incompatible with
`--jsbigint=off` mode.

## Language changes

## Compiler changes
Expand Down
204 changes: 184 additions & 20 deletions lib/pure/hashes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,145 @@ proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash =
proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash =
raiseAssert "implementation override in compiler/vmops.nim"

const k0 = 0xc3a5c85c97cb3127u64 # Primes on (2^63, 2^64) for various uses
const k1 = 0xb492b66fbe98f273u64
const k2 = 0x9ae16a3b2f90404fu64

proc load4e(s: openArray[byte], o=0): uint32 {.inline.} =
uint32(s[o + 3]) shl 24 or uint32(s[o + 2]) shl 16 or
uint32(s[o + 1]) shl 8 or uint32(s[o + 0])

proc load8e(s: openArray[byte], o=0): uint64 {.inline.} =
uint64(s[o + 7]) shl 56 or uint64(s[o + 6]) shl 48 or
uint64(s[o + 5]) shl 40 or uint64(s[o + 4]) shl 32 or
uint64(s[o + 3]) shl 24 or uint64(s[o + 2]) shl 16 or
uint64(s[o + 1]) shl 8 or uint64(s[o + 0])

proc load4(s: openArray[byte], o=0): uint32 {.inline.} =
when nimvm: result = load4e(s, o)
else:
when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
else: result = load4e(s, o)

proc load8(s: openArray[byte], o=0): uint64 {.inline.} =
when nimvm: result = load8e(s, o)
else:
when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
else: result = load8e(s, o)

proc lenU(s: openArray[byte]): uint64 {.inline.} = s.len.uint64

proc shiftMix(v: uint64): uint64 {.inline.} = v xor (v shr 47)

proc rotR(v: uint64; bits: cint): uint64 {.inline.} =
(v shr bits) or (v shl (64 - bits))

proc len16(u: uint64; v: uint64; mul: uint64): uint64 {.inline.} =
var a = (u xor v)*mul
a = a xor (a shr 47)
var b = (v xor a)*mul
b = b xor (b shr 47)
b*mul

proc len0_16(s: openArray[byte]): uint64 {.inline.} =
if s.len >= 8:
let mul = k2 + 2*s.lenU
let a = load8(s) + k2
let b = load8(s, s.len - 8)
let c = rotR(b, 37)*mul + a
let d = (rotR(a, 25) + b)*mul
len16 c, d, mul
elif s.len >= 4:
let mul = k2 + 2*s.lenU
let a = load4(s).uint64
len16 s.lenU + (a shl 3), load4(s, s.len - 4), mul
elif s.len > 0:
let a = uint32(s[0])
let b = uint32(s[s.len shr 1])
let c = uint32(s[s.len - 1])
let y = a + (b shl 8)
let z = s.lenU + (c shl 2)
shiftMix(y*k2 xor z*k0)*k2
else: k2 # s.len == 0

proc len17_32(s: openArray[byte]): uint64 {.inline.} =
let mul = k2 + 2*s.lenU
let a = load8(s)*k1
let b = load8(s, 8)
let c = load8(s, s.len - 8)*mul
let d = load8(s, s.len - 16)*k2
len16 rotR(a + b, 43) + rotR(c, 30) + d, a + rotR(b + k2, 18) + c, mul

proc len33_64(s: openArray[byte]): uint64 {.inline.} =
let mul = k2 + 2*s.lenU
let a = load8(s)*k2
let b = load8(s, 8)
let c = load8(s, s.len - 8)*mul
let d = load8(s, s.len - 16)*k2
let y = rotR(a + b, 43) + rotR(c, 30) + d
let z = len16(y, a + rotR(b + k2, 18) + c, mul)
let e = load8(s, 16)*mul
let f = load8(s, 24)
let g = (y + load8(s, s.len - 32))*mul
let h = (z + load8(s, s.len - 24))*mul
len16 rotR(e + f, 43) + rotR(g, 30) + h, e + rotR(f + a, 18) + g, mul

type Pair = tuple[first, second: uint64]

proc weakLen32withSeeds2(w, x, y, z, a, b: uint64): Pair {.inline.} =
var a = a + w
var b = rotR(b + a + z, 21)
let c = a
a += x
a += y
b += rotR(a, 44)
result[0] = a + z
result[1] = b + c

proc weakLen32withSeeds(s: openArray[byte]; o: int; a,b: uint64): Pair {.inline.} =
weakLen32withSeeds2 load8(s, o ), load8(s, o + 8),
load8(s, o + 16), load8(s, o + 24), a, b

proc hashFarm(s: openArray[byte]): uint64 {.inline.} =
if s.len <= 16: return len0_16(s)
if s.len <= 32: return len17_32(s)
if s.len <= 64: return len33_64(s)
const seed = 81u64 # not const to use input `h`
var
o = 0 # s[] ptr arith -> variable origin variable `o`
x = seed
y = seed*k1 + 113
z = shiftMix(y*k2 + 113)*k2
v, w: Pair
x = x*k2 + load8(s)
let eos = ((s.len - 1) div 64)*64
let last64 = eos + ((s.len - 1) and 63) - 63
while true:
x = rotR(x + y + v[0] + load8(s, o+8), 37)*k1
y = rotR(y + v[1] + load8(s, o+48), 42)*k1
x = x xor w[1]
y += v[0] + load8(s, o+40)
z = rotR(z + w[0], 33)*k1
v = weakLen32withSeeds(s, o+0 , v[1]*k1, x + w[0])
w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
swap z, x
inc o, 64
if o == eos: break
let mul = k1 + ((z and 0xff) shl 1)
o = last64
w[0] += (s.lenU - 1) and 63
v[0] += w[0]
w[0] += v[0]
x = rotR(x + y + v[0] + load8(s, o+8), 37)*mul
y = rotR(y + v[1] + load8(s, o+48), 42)*mul
x = x xor w[1]*9
y += v[0]*9 + load8(s, o+40)
z = rotR(z + w[0], 33)*mul
v = weakLen32withSeeds(s, o+0 , v[1]*mul, x + w[0])
w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
swap z, x
len16 len16(v[0],w[0],mul) + shiftMix(y)*k0 + z, len16(v[1],w[1],mul) + x, mul

proc hash*(x: string): Hash =
## Efficient hashing of strings.
##
Expand All @@ -388,10 +527,13 @@ proc hash*(x: string): Hash =
runnableExamples:
doAssert hash("abracadabra") != hash("AbracadabrA")

when nimvm:
result = hashVmImpl(x, 0, high(x))
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
else:
result = murmurHash(toOpenArrayByte(x, 0, high(x)))
when nimvm:
result = hashVmImpl(x, 0, high(x))
else:
result = murmurHash(toOpenArrayByte(x, 0, high(x)))

proc hash*(x: cstring): Hash =
## Efficient hashing of null-terminated strings.
Expand All @@ -400,14 +542,21 @@ proc hash*(x: cstring): Hash =
doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")

when nimvm:
hashVmImpl(x, 0, high(x))
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
when defined js:
let xx = $x
result = cast[Hash](hashFarm(toOpenArrayByte(xx, 0, xx.high)))
else:
result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
else:
when not defined(js):
murmurHash(toOpenArrayByte(x, 0, x.high))
when nimvm:
hashVmImpl(x, 0, high(x))
else:
let xx = $x
murmurHash(toOpenArrayByte(xx, 0, high(xx)))
when not defined(js):
murmurHash(toOpenArrayByte(x, 0, x.high))
else:
let xx = $x
murmurHash(toOpenArrayByte(xx, 0, high(xx)))

proc hash*(sBuf: string, sPos, ePos: int): Hash =
## Efficient hashing of a string buffer, from starting
Expand All @@ -418,7 +567,10 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
var a = "abracadabra"
doAssert hash(a, 0, 3) == hash(a, 7, 10)

murmurHash(toOpenArrayByte(sBuf, sPos, ePos))
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(sBuf, sPos, ePos)))
else:
murmurHash(toOpenArrayByte(sBuf, sPos, ePos))

proc hashIgnoreStyle*(x: string): Hash =
## Efficient hashing of strings; style is ignored.
Expand Down Expand Up @@ -553,12 +705,18 @@ proc hash*[A](x: openArray[A]): Hash =
## Efficient hashing of arrays and sequences.
## There must be a `hash` proc defined for the element type `A`.
when A is byte:
result = murmurHash(x)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(x))
else:
result = murmurHash(x)
elif A is char:
when nimvm:
result = hashVmImplChar(x, 0, x.high)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
else:
result = murmurHash(toOpenArrayByte(x, 0, x.high))
when nimvm:
result = hashVmImplChar(x, 0, x.high)
else:
result = murmurHash(toOpenArrayByte(x, 0, x.high))
else:
result = 0
for a in x:
Expand All @@ -576,15 +734,21 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
doAssert hash(a, 0, 1) == hash(a, 3, 4)

when A is byte:
when nimvm:
result = hashVmImplByte(aBuf, sPos, ePos)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArray(aBuf, sPos, ePos)))
else:
result = murmurHash(toOpenArray(aBuf, sPos, ePos))
when nimvm:
result = hashVmImplByte(aBuf, sPos, ePos)
else:
result = murmurHash(toOpenArray(aBuf, sPos, ePos))
elif A is char:
when nimvm:
result = hashVmImplChar(aBuf, sPos, ePos)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(aBuf, sPos, ePos)))
else:
result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
when nimvm:
result = hashVmImplChar(aBuf, sPos, ePos)
else:
result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
else:
for i in sPos .. ePos:
result = result !& hash(aBuf[i])
Expand Down
17 changes: 12 additions & 5 deletions tests/stdlib/thashes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,22 @@ block hashes:
doAssert hashWangYi1(456) == -6421749900419628582

block empty:
const emptyStrHash = # Hash=int=4B on js even w/--jsbigint64:on => cast[Hash]
when defined nimPreviewHashFarm: cast[Hash](-7286425919675154353i64)
else: 0
var
a = ""
b = newSeq[char]()
c = newSeq[int]()
d = cstring""
e = "abcd"
doAssert hash(a) == 0
doAssert hash(b) == 0
doAssert hash(a) == emptyStrHash
doAssert hash(b) == emptyStrHash
doAssert hash(c) == 0
doAssert hash(d) == 0
doAssert hash(d) == emptyStrHash
doAssert hashIgnoreCase(a) == 0
doAssert hashIgnoreStyle(a) == 0
doAssert hash(e, 3, 2) == 0
doAssert hash(e, 3, 2) == emptyStrHash

block sameButDifferent:
doAssert hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13)
Expand Down Expand Up @@ -93,7 +96,11 @@ block largeSize: # longer than 4 characters
proc main() =
doAssert hash(0.0) == hash(0)
# bug #16061
doAssert hash(cstring"abracadabra") == 97309975
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
# Hash=int=4B on js even w/--jsbigint64:on => cast[Hash]
doAssert hash(cstring"abracadabra") == cast[Hash](-1119910118870047694i64)
else:
doAssert hash(cstring"abracadabra") == 97309975
doAssert hash(cstring"abracadabra") == hash("abracadabra")

when sizeof(int) == 8 or defined(js):
Expand Down
Loading