diff --git a/changelog.md b/changelog.md index 53d88b19fb3f..92abb26146a3 100644 --- a/changelog.md +++ b/changelog.md @@ -39,6 +39,9 @@ slots when enlarging a sequence. objects the cyclic collector did free. If the number is zero that is a strong indicator that you can use `--mm:arc` instead of `--mm:orc`. - A `$` template is provided for `Path` in `std/paths`. +- `nimPreviewHashFarm` has been added to `lib/pure/hashes.nim` to default to a +64-bit string `Hash` (based upon Google's Farm Hash) which is also faster than +the present one. At present, this is incompatible with `--jsbigint=off` mode. [//]: # "Deprecations:" diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 51829951166b..9e16cea9eab5 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -379,6 +379,145 @@ proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash = proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash = raiseAssert "implementation override in compiler/vmops.nim" +const k0 = 0xc3a5c85c97cb3127u64 # Primes on (2^63, 2^64) for various uses +const k1 = 0xb492b66fbe98f273u64 +const k2 = 0x9ae16a3b2f90404fu64 + +proc load4e(s: openArray[byte], o=0): uint32 {.inline.} = + uint32(s[o + 3]) shl 24 or uint32(s[o + 2]) shl 16 or + uint32(s[o + 1]) shl 8 or uint32(s[o + 0]) + +proc load8e(s: openArray[byte], o=0): uint64 {.inline.} = + uint64(s[o + 7]) shl 56 or uint64(s[o + 6]) shl 48 or + uint64(s[o + 5]) shl 40 or uint64(s[o + 4]) shl 32 or + uint64(s[o + 3]) shl 24 or uint64(s[o + 2]) shl 16 or + uint64(s[o + 1]) shl 8 or uint64(s[o + 0]) + +proc load4(s: openArray[byte], o=0): uint32 {.inline.} = + when nimvm: result = load4e(s, o) + else: + when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof + else: result = load4e(s, o) + +proc load8(s: openArray[byte], o=0): uint64 {.inline.} = + when nimvm: result = load8e(s, o) + else: + when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof + else: result = load8e(s, o) + +proc lenU(s: openArray[byte]): uint64 {.inline.} = s.len.uint64 + +proc shiftMix(v: uint64): uint64 {.inline.} = v xor (v shr 47) + +proc rotR(v: uint64; bits: cint): uint64 {.inline.} = + (v shr bits) or (v shl (64 - bits)) + +proc len16(u: uint64; v: uint64; mul: uint64): uint64 {.inline.} = + var a = (u xor v)*mul + a = a xor (a shr 47) + var b = (v xor a)*mul + b = b xor (b shr 47) + b*mul + +proc len0_16(s: openArray[byte]): uint64 {.inline.} = + if s.len >= 8: + let mul = k2 + 2*s.lenU + let a = load8(s) + k2 + let b = load8(s, s.len - 8) + let c = rotR(b, 37)*mul + a + let d = (rotR(a, 25) + b)*mul + len16 c, d, mul + elif s.len >= 4: + let mul = k2 + 2*s.lenU + let a = load4(s).uint64 + len16 s.lenU + (a shl 3), load4(s, s.len - 4), mul + elif s.len > 0: + let a = uint32(s[0]) + let b = uint32(s[s.len shr 1]) + let c = uint32(s[s.len - 1]) + let y = a + (b shl 8) + let z = s.lenU + (c shl 2) + shiftMix(y*k2 xor z*k0)*k2 + else: k2 # s.len == 0 + +proc len17_32(s: openArray[byte]): uint64 {.inline.} = + let mul = k2 + 2*s.lenU + let a = load8(s)*k1 + let b = load8(s, 8) + let c = load8(s, s.len - 8)*mul + let d = load8(s, s.len - 16)*k2 + len16 rotR(a + b, 43) + rotR(c, 30) + d, a + rotR(b + k2, 18) + c, mul + +proc len33_64(s: openArray[byte]): uint64 {.inline.} = + let mul = k2 + 2*s.lenU + let a = load8(s)*k2 + let b = load8(s, 8) + let c = load8(s, s.len - 8)*mul + let d = load8(s, s.len - 16)*k2 + let y = rotR(a + b, 43) + rotR(c, 30) + d + let z = len16(y, a + rotR(b + k2, 18) + c, mul) + let e = load8(s, 16)*mul + let f = load8(s, 24) + let g = (y + load8(s, s.len - 32))*mul + let h = (z + load8(s, s.len - 24))*mul + len16 rotR(e + f, 43) + rotR(g, 30) + h, e + rotR(f + a, 18) + g, mul + +type Pair = tuple[first, second: uint64] + +proc weakLen32withSeeds2(w, x, y, z, a, b: uint64): Pair {.inline.} = + var a = a + w + var b = rotR(b + a + z, 21) + let c = a + a += x + a += y + b += rotR(a, 44) + result[0] = a + z + result[1] = b + c + +proc weakLen32withSeeds(s: openArray[byte]; o: int; a,b: uint64): Pair {.inline.} = + weakLen32withSeeds2 load8(s, o ), load8(s, o + 8), + load8(s, o + 16), load8(s, o + 24), a, b + +proc hashFarm(s: openArray[byte]): uint64 {.inline.} = + if s.len <= 16: return len0_16(s) + if s.len <= 32: return len17_32(s) + if s.len <= 64: return len33_64(s) + const seed = 81u64 # not const to use input `h` + var + o = 0 # s[] ptr arith -> variable origin variable `o` + x = seed + y = seed*k1 + 113 + z = shiftMix(y*k2 + 113)*k2 + v, w: Pair + x = x*k2 + load8(s) + let eos = ((s.len - 1) div 64)*64 + let last64 = eos + ((s.len - 1) and 63) - 63 + while true: + x = rotR(x + y + v[0] + load8(s, o+8), 37)*k1 + y = rotR(y + v[1] + load8(s, o+48), 42)*k1 + x = x xor w[1] + y += v[0] + load8(s, o+40) + z = rotR(z + w[0], 33)*k1 + v = weakLen32withSeeds(s, o+0 , v[1]*k1, x + w[0]) + w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16)) + swap z, x + inc o, 64 + if o == eos: break + let mul = k1 + ((z and 0xff) shl 1) + o = last64 + w[0] += (s.lenU - 1) and 63 + v[0] += w[0] + w[0] += v[0] + x = rotR(x + y + v[0] + load8(s, o+8), 37)*mul + y = rotR(y + v[1] + load8(s, o+48), 42)*mul + x = x xor w[1]*9 + y += v[0]*9 + load8(s, o+40) + z = rotR(z + w[0], 33)*mul + v = weakLen32withSeeds(s, o+0 , v[1]*mul, x + w[0]) + w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16)) + swap z, x + len16 len16(v[0],w[0],mul) + shiftMix(y)*k0 + z, len16(v[1],w[1],mul) + x, mul + proc hash*(x: string): Hash = ## Efficient hashing of strings. ## @@ -388,10 +527,13 @@ proc hash*(x: string): Hash = runnableExamples: doAssert hash("abracadabra") != hash("AbracadabrA") - when nimvm: - result = hashVmImpl(x, 0, high(x)) + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high))) else: - result = murmurHash(toOpenArrayByte(x, 0, high(x))) + when nimvm: + result = hashVmImpl(x, 0, high(x)) + else: + result = murmurHash(toOpenArrayByte(x, 0, high(x))) proc hash*(x: cstring): Hash = ## Efficient hashing of null-terminated strings. @@ -400,14 +542,21 @@ proc hash*(x: cstring): Hash = doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA") doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA") - when nimvm: - hashVmImpl(x, 0, high(x)) + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + when defined js: + let xx = $x + result = cast[Hash](hashFarm(toOpenArrayByte(xx, 0, xx.high))) + else: + result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high))) else: - when not defined(js): - murmurHash(toOpenArrayByte(x, 0, x.high)) + when nimvm: + hashVmImpl(x, 0, high(x)) else: - let xx = $x - murmurHash(toOpenArrayByte(xx, 0, high(xx))) + when not defined(js): + murmurHash(toOpenArrayByte(x, 0, x.high)) + else: + let xx = $x + murmurHash(toOpenArrayByte(xx, 0, high(xx))) proc hash*(sBuf: string, sPos, ePos: int): Hash = ## Efficient hashing of a string buffer, from starting @@ -418,7 +567,10 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash = var a = "abracadabra" doAssert hash(a, 0, 3) == hash(a, 7, 10) - murmurHash(toOpenArrayByte(sBuf, sPos, ePos)) + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + result = cast[Hash](hashFarm(toOpenArrayByte(sBuf, sPos, ePos))) + else: + murmurHash(toOpenArrayByte(sBuf, sPos, ePos)) proc hashIgnoreStyle*(x: string): Hash = ## Efficient hashing of strings; style is ignored. @@ -553,12 +705,18 @@ proc hash*[A](x: openArray[A]): Hash = ## Efficient hashing of arrays and sequences. ## There must be a `hash` proc defined for the element type `A`. when A is byte: - result = murmurHash(x) + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + result = cast[Hash](hashFarm(x)) + else: + result = murmurHash(x) elif A is char: - when nimvm: - result = hashVmImplChar(x, 0, x.high) + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high))) else: - result = murmurHash(toOpenArrayByte(x, 0, x.high)) + when nimvm: + result = hashVmImplChar(x, 0, x.high) + else: + result = murmurHash(toOpenArrayByte(x, 0, x.high)) else: result = 0 for a in x: @@ -576,15 +734,21 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash = doAssert hash(a, 0, 1) == hash(a, 3, 4) when A is byte: - when nimvm: - result = hashVmImplByte(aBuf, sPos, ePos) + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + result = cast[Hash](hashFarm(toOpenArray(aBuf, sPos, ePos))) else: - result = murmurHash(toOpenArray(aBuf, sPos, ePos)) + when nimvm: + result = hashVmImplByte(aBuf, sPos, ePos) + else: + result = murmurHash(toOpenArray(aBuf, sPos, ePos)) elif A is char: - when nimvm: - result = hashVmImplChar(aBuf, sPos, ePos) + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + result = cast[Hash](hashFarm(toOpenArrayByte(aBuf, sPos, ePos))) else: - result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos)) + when nimvm: + result = hashVmImplChar(aBuf, sPos, ePos) + else: + result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos)) else: for i in sPos .. ePos: result = result !& hash(aBuf[i]) diff --git a/tests/stdlib/thashes.nim b/tests/stdlib/thashes.nim index 3974b01c81bf..04606b306206 100644 --- a/tests/stdlib/thashes.nim +++ b/tests/stdlib/thashes.nim @@ -47,19 +47,22 @@ block hashes: doAssert hashWangYi1(456) == -6421749900419628582 block empty: + const emptyStrHash = # Hash=int=4B on js even w/--jsbigint64:on => cast[Hash] + when defined nimPreviewHashFarm: cast[Hash](-7286425919675154353i64) + else: 0 var a = "" b = newSeq[char]() c = newSeq[int]() d = cstring"" e = "abcd" - doAssert hash(a) == 0 - doAssert hash(b) == 0 + doAssert hash(a) == emptyStrHash + doAssert hash(b) == emptyStrHash doAssert hash(c) == 0 - doAssert hash(d) == 0 + doAssert hash(d) == emptyStrHash doAssert hashIgnoreCase(a) == 0 doAssert hashIgnoreStyle(a) == 0 - doAssert hash(e, 3, 2) == 0 + doAssert hash(e, 3, 2) == emptyStrHash block sameButDifferent: doAssert hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) @@ -93,7 +96,11 @@ block largeSize: # longer than 4 characters proc main() = doAssert hash(0.0) == hash(0) # bug #16061 - doAssert hash(cstring"abracadabra") == 97309975 + when defined nimPreviewHashFarm: # Default switched -> `not nimStringHash2` + # Hash=int=4B on js even w/--jsbigint64:on => cast[Hash] + doAssert hash(cstring"abracadabra") == cast[Hash](-1119910118870047694i64) + else: + doAssert hash(cstring"abracadabra") == 97309975 doAssert hash(cstring"abracadabra") == hash("abracadabra") when sizeof(int) == 8 or defined(js):