Skip to content

Commit

Permalink
fix tests using hashBiggestIntVM vm callback
Browse files Browse the repository at this point in the history
  • Loading branch information
timotheecour committed Jul 18, 2019
1 parent 93b46c4 commit 7847592
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 32 deletions.
4 changes: 4 additions & 0 deletions compiler/vmops.nim
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ from math import sqrt, ln, log10, log2, exp, round, arccos, arcsin,
from os import getEnv, existsEnv, dirExists, fileExists, putEnv, walkDir, getAppFilename
from md5 import getMD5
from sighashes import symBodyDigest
from std/hashes import hashBiggestInt

template mathop(op) {.dirty.} =
registerCallback(c, "stdlib.math." & astToStr(op), `op Wrapper`)
Expand Down Expand Up @@ -144,3 +145,6 @@ proc registerAdditionalOps*(c: PCtx) =
let n = getNode(a, 0)
if n.kind != nkSym: raise newException(ValueError, "node is not a symbol")
setResult(a, $symBodyDigest(c.graph, n.sym))

registerCallback c, "stdlib.hashes.hashBiggestIntVM", proc (a: VmArgs) {.nimcall.} =
a.setResult hashBiggestInt(getInt(a, 0))
68 changes: 40 additions & 28 deletions lib/pure/hashes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -56,38 +56,11 @@ type
const
IntSize = sizeof(int)

proc preferStringHash*(T: typedesc): bool =
## whether hashing is more efficient using `hash($x)`
# when string hashing is more efficient, see #11764
# exported so user defined hash can use this too
T.sizeof >= 4

proc hash*(x: string): Hash

proc hash*[T: SomeNumber | Ordinal | char](x: T): Hash {.inline.} =
## Efficient hashing of numbers, ordinals (eg enum), char.
when preferStringHash(T): # fix #11764
when T is SomeFloat:
# 0.0 vs -0.0 should map to same hash to avoid weird behavior.
# the only non nan value that can cause clash is 0 according to
# https://stackoverflow.com/questions/31087915/are-there-denormalized-floats-that-evaluate-to-the-same-value-apart-from-0-0
# bugfix: the previous code was using `x = x + 1.0` (presumably for
# handling negative 0), however this doesn't work well for small inputs
# because `x+1.0` can become 0 with floating point accuracy, which
# leads to hash collisions.
# Note: this hit this bug: #11775:
# `let x = if x == 0.0: 0.0 else: x`
var x = x
if x == 0: x = 0
hashData(cast[pointer](unsafeAddr x), T.sizeof)
else:
# more efficient for small types
ord(x)

proc `!&`*(h: Hash, val: int): Hash {.inline.} =
## Mixes a hash value `h` with `val` to produce a new hash value.
##
## This is only needed if you need to implement a hash proc for a new datatype.
## Uses Jenkins hash: https://en.wikipedia.org/wiki/Jenkins_hash_function
let h = cast[uint](h)
let val = cast[uint](val)
var res = h + val
Expand Down Expand Up @@ -124,6 +97,45 @@ proc hashData*(data: pointer, size: int): Hash =
dec(s)
result = !$h

proc hashBiggestIntVM(x: BiggestInt): Hash = discard # in vmops

proc hashBiggestInt*(x: BiggestInt): Hash {.inline.} =
# ## for internal use; user code should prefer `hash` overloads
toU32(x)
# when nimvm: hashBiggestIntVM(x)
# else: hashData(cast[pointer](unsafeAddr x), type(x).sizeof)

proc hash*[T: SomeNumber | Ordinal | char](x: T): Hash {.inline.} =
# when T is int: if true: return toU32(x)
## Efficient hashing of numbers, ordinals (eg enum), char.
when T.sizeof >= 4:
# fix #11764: `ord(x)`, `toU32(x)` or similar are up to 4X faster to compute
# compared to jenkins `hashData` but result in very poor hashes, leading to
# collisions; this can lead to several order magnitude (eg 1e3) slowdowns
# e.g. when used in hash tables, so we prefer to use slower to compute good
# hashes here. Murmur3 would improve speed of hash computation.
when T is SomeFloat:
# 0.0 vs -0.0 should map to same hash to avoid weird behavior.
# the only non nan value that can cause clash is 0 according to
# https://stackoverflow.com/questions/31087915/are-there-denormalized-floats-that-evaluate-to-the-same-value-apart-from-0-0
# bugfix: the previous code was using `x = x + 1.0` (presumably for
# handling negative 0), however this leads to collisions for small x due
# to FP finite precision.
let x: BiggestInt =
if x == 0: 0.BiggestInt
else:
when sizeof(BiggestInt) == sizeof(T):
cast[BiggestInt](x)
else: # for nimvm
cast[int32](x).BiggestInt
else:
let x = x.BiggestInt
hashBiggestInt(x)
else:
# empirically better for small types, the collision risk is limited anyway
# due to cardinality of at most 2^16=65536
ord(x)

when defined(js):
var objectID = 0

Expand Down
7 changes: 4 additions & 3 deletions tests/collections/ttables.nim
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,10 @@ block tableconstr:
block ttables2:
proc TestHashIntInt() =
var tab = initTable[int,int]()
for i in 1..1_000_000:
const n = 1_000_000 # bottleneck: 50 seconds on OSX in debug mode
for i in 1..n:
tab[i] = i
for i in 1..1_000_000:
for i in 1..n:
var x = tab[i]
if x != i : echo "not found ", i

Expand Down Expand Up @@ -233,7 +234,7 @@ block tablesref:
for y in 0..1:
assert t[(x,y)] == $x & $y
assert($t ==
"{(x: 0, y: 1): \"01\", (x: 0, y: 0): \"00\", (x: 1, y: 0): \"10\", (x: 1, y: 1): \"11\"}")
"""{(x: 0, y: 1): "01", (x: 1, y: 0): "10", (x: 0, y: 0): "00", (x: 1, y: 1): "11"}""")

block tableTest2:
var t = newTable[string, float]()
Expand Down
2 changes: 1 addition & 1 deletion tests/collections/ttablesthreads.nim
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ block tableTest1:
for y in 0..1:
assert t[(x,y)] == $x & $y
assert($t ==
"{(x: 0, y: 1): \"01\", (x: 0, y: 0): \"00\", (x: 1, y: 0): \"10\", (x: 1, y: 1): \"11\"}")
"""{(x: 0, y: 0): "00", (x: 1, y: 0): "10", (x: 0, y: 1): "01", (x: 1, y: 1): "11"}""")

block tableTest2:
var t = initTable[string, float]()
Expand Down
26 changes: 26 additions & 0 deletions tests/vm/tcompiletimetable.nim
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,29 @@ addStuff("Hey"): echo "Hey"
addStuff("Hi"): echo "Hi"
dump()

import std/hashes
block:
# check CT vs RT produces same results for Table
template callFun(T) =
block:
proc fun(): string =
var t: Table[T, string]
let n = 10
for i in 0..<n:
let i2 = when T.sizeof == type(i).sizeof: i else: i.int32
let k = cast[T](i2)
# cast intentional for regression testing,
# producing small values
doAssert k notin t
t[k] = $(i, k)
doAssert k in t
$t
const s1 = fun()
let s2 = fun()
# echo s1 # for debugging
doAssert s1 == s2
doAssert s1 == s2
doAssert hash(0.0) == hash(-0.0)
callFun(float)
callFun(float32)
callFun(int64)

0 comments on commit 7847592

Please sign in to comment.