Skip to content

Commit

Permalink
Add Farm Hash conditioned upon nimPreviewHashFarm as 64-bit Hash (#…
Browse files Browse the repository at this point in the history
…23735)

Unlike present Nim this actually fills `Hash` for `string` & related.

For the curious, note that `hashData` remains the aboriginal Nim string
hasher & `import hashes {.all.}` allows simultaneous test/time of {orig,
murmur, farm} on your favorite CPU & back end compiler.

Update tests also conditioned upon `nimPreviewHashFarm` so they should
pass either with or without that `define` on.

In `--jsbigint=on` mode, only the lower 32-bits of `Hash` match nimvm &
run-time values because `type Hash = int` and on JS int=int32, not int64
as for 64-bit Nim platforms. Due to the matching, `const` Table should
match run-time `Table` on all platforms.

To operate in `--jsbigint=off` mode is feasible but needs much "double
precision mul/xor/ror/shr-arithmetic"-style work. That is distracting &
also of questionable value since JS added BigInt in 2018, ringabout
added Nim support for it in 2021 & `nimPreviewHashFarm` is unlikely to
swap from an opt-in to an opt-out default before 2025..2026 which will
have given a backward looking time window of 7..8 years for deployment
platforms - reasonably generous.

Add a changelog entry for 2.2.
  • Loading branch information
c-blake authored Jun 19, 2024
1 parent 9d08d26 commit e645120
Show file tree
Hide file tree
Showing 3 changed files with 199 additions and 25 deletions.
3 changes: 3 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ slots when enlarging a sequence.
objects the cyclic collector did free. If the number is zero that is a strong indicator that you can use `--mm:arc`
instead of `--mm:orc`.
- A `$` template is provided for `Path` in `std/paths`.
- `nimPreviewHashFarm` has been added to `lib/pure/hashes.nim` to default to a
64-bit string `Hash` (based upon Google's Farm Hash) which is also faster than
the present one. At present, this is incompatible with `--jsbigint=off` mode.

[//]: # "Deprecations:"

Expand Down
204 changes: 184 additions & 20 deletions lib/pure/hashes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,145 @@ proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash =
proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash =
raiseAssert "implementation override in compiler/vmops.nim"

const k0 = 0xc3a5c85c97cb3127u64 # Primes on (2^63, 2^64) for various uses
const k1 = 0xb492b66fbe98f273u64
const k2 = 0x9ae16a3b2f90404fu64

proc load4e(s: openArray[byte], o=0): uint32 {.inline.} =
uint32(s[o + 3]) shl 24 or uint32(s[o + 2]) shl 16 or
uint32(s[o + 1]) shl 8 or uint32(s[o + 0])

proc load8e(s: openArray[byte], o=0): uint64 {.inline.} =
uint64(s[o + 7]) shl 56 or uint64(s[o + 6]) shl 48 or
uint64(s[o + 5]) shl 40 or uint64(s[o + 4]) shl 32 or
uint64(s[o + 3]) shl 24 or uint64(s[o + 2]) shl 16 or
uint64(s[o + 1]) shl 8 or uint64(s[o + 0])

proc load4(s: openArray[byte], o=0): uint32 {.inline.} =
when nimvm: result = load4e(s, o)
else:
when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
else: result = load4e(s, o)

proc load8(s: openArray[byte], o=0): uint64 {.inline.} =
when nimvm: result = load8e(s, o)
else:
when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
else: result = load8e(s, o)

proc lenU(s: openArray[byte]): uint64 {.inline.} = s.len.uint64

proc shiftMix(v: uint64): uint64 {.inline.} = v xor (v shr 47)

proc rotR(v: uint64; bits: cint): uint64 {.inline.} =
(v shr bits) or (v shl (64 - bits))

proc len16(u: uint64; v: uint64; mul: uint64): uint64 {.inline.} =
var a = (u xor v)*mul
a = a xor (a shr 47)
var b = (v xor a)*mul
b = b xor (b shr 47)
b*mul

proc len0_16(s: openArray[byte]): uint64 {.inline.} =
if s.len >= 8:
let mul = k2 + 2*s.lenU
let a = load8(s) + k2
let b = load8(s, s.len - 8)
let c = rotR(b, 37)*mul + a
let d = (rotR(a, 25) + b)*mul
len16 c, d, mul
elif s.len >= 4:
let mul = k2 + 2*s.lenU
let a = load4(s).uint64
len16 s.lenU + (a shl 3), load4(s, s.len - 4), mul
elif s.len > 0:
let a = uint32(s[0])
let b = uint32(s[s.len shr 1])
let c = uint32(s[s.len - 1])
let y = a + (b shl 8)
let z = s.lenU + (c shl 2)
shiftMix(y*k2 xor z*k0)*k2
else: k2 # s.len == 0

proc len17_32(s: openArray[byte]): uint64 {.inline.} =
let mul = k2 + 2*s.lenU
let a = load8(s)*k1
let b = load8(s, 8)
let c = load8(s, s.len - 8)*mul
let d = load8(s, s.len - 16)*k2
len16 rotR(a + b, 43) + rotR(c, 30) + d, a + rotR(b + k2, 18) + c, mul

proc len33_64(s: openArray[byte]): uint64 {.inline.} =
let mul = k2 + 2*s.lenU
let a = load8(s)*k2
let b = load8(s, 8)
let c = load8(s, s.len - 8)*mul
let d = load8(s, s.len - 16)*k2
let y = rotR(a + b, 43) + rotR(c, 30) + d
let z = len16(y, a + rotR(b + k2, 18) + c, mul)
let e = load8(s, 16)*mul
let f = load8(s, 24)
let g = (y + load8(s, s.len - 32))*mul
let h = (z + load8(s, s.len - 24))*mul
len16 rotR(e + f, 43) + rotR(g, 30) + h, e + rotR(f + a, 18) + g, mul

type Pair = tuple[first, second: uint64]

proc weakLen32withSeeds2(w, x, y, z, a, b: uint64): Pair {.inline.} =
var a = a + w
var b = rotR(b + a + z, 21)
let c = a
a += x
a += y
b += rotR(a, 44)
result[0] = a + z
result[1] = b + c

proc weakLen32withSeeds(s: openArray[byte]; o: int; a,b: uint64): Pair {.inline.} =
weakLen32withSeeds2 load8(s, o ), load8(s, o + 8),
load8(s, o + 16), load8(s, o + 24), a, b

proc hashFarm(s: openArray[byte]): uint64 {.inline.} =
if s.len <= 16: return len0_16(s)
if s.len <= 32: return len17_32(s)
if s.len <= 64: return len33_64(s)
const seed = 81u64 # not const to use input `h`
var
o = 0 # s[] ptr arith -> variable origin variable `o`
x = seed
y = seed*k1 + 113
z = shiftMix(y*k2 + 113)*k2
v, w: Pair
x = x*k2 + load8(s)
let eos = ((s.len - 1) div 64)*64
let last64 = eos + ((s.len - 1) and 63) - 63
while true:
x = rotR(x + y + v[0] + load8(s, o+8), 37)*k1
y = rotR(y + v[1] + load8(s, o+48), 42)*k1
x = x xor w[1]
y += v[0] + load8(s, o+40)
z = rotR(z + w[0], 33)*k1
v = weakLen32withSeeds(s, o+0 , v[1]*k1, x + w[0])
w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
swap z, x
inc o, 64
if o == eos: break
let mul = k1 + ((z and 0xff) shl 1)
o = last64
w[0] += (s.lenU - 1) and 63
v[0] += w[0]
w[0] += v[0]
x = rotR(x + y + v[0] + load8(s, o+8), 37)*mul
y = rotR(y + v[1] + load8(s, o+48), 42)*mul
x = x xor w[1]*9
y += v[0]*9 + load8(s, o+40)
z = rotR(z + w[0], 33)*mul
v = weakLen32withSeeds(s, o+0 , v[1]*mul, x + w[0])
w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
swap z, x
len16 len16(v[0],w[0],mul) + shiftMix(y)*k0 + z, len16(v[1],w[1],mul) + x, mul

proc hash*(x: string): Hash =
## Efficient hashing of strings.
##
Expand All @@ -388,10 +527,13 @@ proc hash*(x: string): Hash =
runnableExamples:
doAssert hash("abracadabra") != hash("AbracadabrA")

when nimvm:
result = hashVmImpl(x, 0, high(x))
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
else:
result = murmurHash(toOpenArrayByte(x, 0, high(x)))
when nimvm:
result = hashVmImpl(x, 0, high(x))
else:
result = murmurHash(toOpenArrayByte(x, 0, high(x)))

proc hash*(x: cstring): Hash =
## Efficient hashing of null-terminated strings.
Expand All @@ -400,14 +542,21 @@ proc hash*(x: cstring): Hash =
doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")

when nimvm:
hashVmImpl(x, 0, high(x))
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
when defined js:
let xx = $x
result = cast[Hash](hashFarm(toOpenArrayByte(xx, 0, xx.high)))
else:
result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
else:
when not defined(js):
murmurHash(toOpenArrayByte(x, 0, x.high))
when nimvm:
hashVmImpl(x, 0, high(x))
else:
let xx = $x
murmurHash(toOpenArrayByte(xx, 0, high(xx)))
when not defined(js):
murmurHash(toOpenArrayByte(x, 0, x.high))
else:
let xx = $x
murmurHash(toOpenArrayByte(xx, 0, high(xx)))

proc hash*(sBuf: string, sPos, ePos: int): Hash =
## Efficient hashing of a string buffer, from starting
Expand All @@ -418,7 +567,10 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
var a = "abracadabra"
doAssert hash(a, 0, 3) == hash(a, 7, 10)

murmurHash(toOpenArrayByte(sBuf, sPos, ePos))
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(sBuf, sPos, ePos)))
else:
murmurHash(toOpenArrayByte(sBuf, sPos, ePos))

proc hashIgnoreStyle*(x: string): Hash =
## Efficient hashing of strings; style is ignored.
Expand Down Expand Up @@ -553,12 +705,18 @@ proc hash*[A](x: openArray[A]): Hash =
## Efficient hashing of arrays and sequences.
## There must be a `hash` proc defined for the element type `A`.
when A is byte:
result = murmurHash(x)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(x))
else:
result = murmurHash(x)
elif A is char:
when nimvm:
result = hashVmImplChar(x, 0, x.high)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
else:
result = murmurHash(toOpenArrayByte(x, 0, x.high))
when nimvm:
result = hashVmImplChar(x, 0, x.high)
else:
result = murmurHash(toOpenArrayByte(x, 0, x.high))
else:
result = 0
for a in x:
Expand All @@ -576,15 +734,21 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
doAssert hash(a, 0, 1) == hash(a, 3, 4)

when A is byte:
when nimvm:
result = hashVmImplByte(aBuf, sPos, ePos)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArray(aBuf, sPos, ePos)))
else:
result = murmurHash(toOpenArray(aBuf, sPos, ePos))
when nimvm:
result = hashVmImplByte(aBuf, sPos, ePos)
else:
result = murmurHash(toOpenArray(aBuf, sPos, ePos))
elif A is char:
when nimvm:
result = hashVmImplChar(aBuf, sPos, ePos)
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
result = cast[Hash](hashFarm(toOpenArrayByte(aBuf, sPos, ePos)))
else:
result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
when nimvm:
result = hashVmImplChar(aBuf, sPos, ePos)
else:
result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
else:
for i in sPos .. ePos:
result = result !& hash(aBuf[i])
Expand Down
17 changes: 12 additions & 5 deletions tests/stdlib/thashes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,22 @@ block hashes:
doAssert hashWangYi1(456) == -6421749900419628582

block empty:
const emptyStrHash = # Hash=int=4B on js even w/--jsbigint64:on => cast[Hash]
when defined nimPreviewHashFarm: cast[Hash](-7286425919675154353i64)
else: 0
var
a = ""
b = newSeq[char]()
c = newSeq[int]()
d = cstring""
e = "abcd"
doAssert hash(a) == 0
doAssert hash(b) == 0
doAssert hash(a) == emptyStrHash
doAssert hash(b) == emptyStrHash
doAssert hash(c) == 0
doAssert hash(d) == 0
doAssert hash(d) == emptyStrHash
doAssert hashIgnoreCase(a) == 0
doAssert hashIgnoreStyle(a) == 0
doAssert hash(e, 3, 2) == 0
doAssert hash(e, 3, 2) == emptyStrHash

block sameButDifferent:
doAssert hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13)
Expand Down Expand Up @@ -93,7 +96,11 @@ block largeSize: # longer than 4 characters
proc main() =
doAssert hash(0.0) == hash(0)
# bug #16061
doAssert hash(cstring"abracadabra") == 97309975
when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2`
# Hash=int=4B on js even w/--jsbigint64:on => cast[Hash]
doAssert hash(cstring"abracadabra") == cast[Hash](-1119910118870047694i64)
else:
doAssert hash(cstring"abracadabra") == 97309975
doAssert hash(cstring"abracadabra") == hash("abracadabra")

when sizeof(int) == 8 or defined(js):
Expand Down

0 comments on commit e645120

Please sign in to comment.