Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: new approach to efficiently hashing 1, 1.0, big(1), the same. #6624

Merged
merged 20 commits into from
May 7, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
155a6c8
WIP: new approach to efficiently hashing 1, 1.0, big(1), the same.
StefanKarpinski Feb 24, 2014
b3a7c84
decompose: fix documentation of decompose for rational hashing.
StefanKarpinski Apr 24, 2014
dadc7cf
define hash(x) = hash(x, zero(Uint)) in a single place; fix bugs.
StefanKarpinski Apr 24, 2014
316eeda
MathConst: == and hash
StefanKarpinski Apr 24, 2014
f791029
move generic rational hashing definitions into hashing2.jl
StefanKarpinski Apr 24, 2014
3696968
Work around LLVM's dickish undefined constant folding behavior.
StefanKarpinski Apr 25, 2014
ef79b13
generic hash(Real) optimization: remove type assert on `decompose`.
StefanKarpinski Apr 25, 2014
85afdbb
More streamlined hashing for smallish rational types (≤ 64-bits).
StefanKarpinski Apr 25, 2014
f5fd830
signbit: return a boolean value, instead of an Int.
StefanKarpinski Apr 27, 2014
660c018
isnan, isinf, isfinite: improved generic definitions.
StefanKarpinski Apr 27, 2014
04908d4
move hash(WeakRef) into base/hashing.jl also (not needed so early).
StefanKarpinski Apr 28, 2014
1c8b7d1
hashing: make new hashing work on 32-bit systems.
StefanKarpinski Apr 30, 2014
737ad6e
isequal, isless: bring comparison and sorting in line with new hashing.
StefanKarpinski Apr 28, 2014
f11b2e8
`d[k] = v`: replace key when new values are assigned in dicts.
StefanKarpinski Apr 30, 2014
5a834b2
Merge branch 'master' into sk/hashing
JeffBezanson May 2, 2014
f9cb1e3
workaround for the Type{()} error
JeffBezanson May 2, 2014
b5e0b73
update docs for == and isequal
JeffBezanson May 2, 2014
9a48950
doc: some wording adjustments for isequal.
StefanKarpinski May 2, 2014
9738dd3
restore former behavior of isless as a total order
JeffBezanson May 7, 2014
607558a
Merge branch 'master' of github.com:JuliaLang/julia into sk/hashing
JeffBezanson May 7, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions base/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,9 @@ type Colon
end
const (:) = Colon()

hash(w::WeakRef) = hash(w.value)
isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
isequal(w::WeakRef, v) = isequal(w.value, v)
isequal(w, v::WeakRef) = isequal(w, v.value)
==(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
==(w::WeakRef, v) = isequal(w.value, v)
==(w, v::WeakRef) = isequal(w, v.value)

function finalizer(o::ANY, f::Union(Function,Ptr))
if isimmutable(o)
Expand Down
6 changes: 0 additions & 6 deletions base/bitarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1720,9 +1720,3 @@ function cat(catdim::Integer, X::Union(BitArray, Integer)...)
end

# hvcat -> use fallbacks in abstractarray.jl

isequal(A::BitArray, B::BitArray) = (A == B)

# Hashing

hash(B::BitArray) = hash((size(B), B.chunks))
2 changes: 1 addition & 1 deletion base/bool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ typemax(::Type{Bool}) = true
(|)(x::Bool, y::Bool) = box(Bool,or_int(unbox(Bool,x),unbox(Bool,y)))
($)(x::Bool, y::Bool) = (x!=y)

signbit(x::Bool) = 0
signbit(x::Bool) = false
sign(x::Bool) = x
abs(x::Bool) = x
abs2(x::Bool) = x
Expand Down
Empty file added base/comparison.jl
Empty file.
2 changes: 0 additions & 2 deletions base/complex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ end

isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w))

hash(z::Complex) = bitmix(hash(real(z)),hash(imag(z)))

conj(z::Complex) = Complex(real(z),-imag(z))
abs(z::Complex) = hypot(real(z), imag(z))
abs2(z::Complex) = real(z)*real(z) + imag(z)*imag(z)
Expand Down
5 changes: 5 additions & 0 deletions base/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ convert(::Type{Float16}, x::MathConst) = float16(float32(x))
convert{T<:Real}(::Type{Complex{T}}, x::MathConst) = convert(Complex{T}, float64(x))
convert{T<:Integer}(::Type{Rational{T}}, x::MathConst) = convert(Rational{T}, float64(x))

=={s}(::MathConst{s}, ::MathConst{s}) = true
==(::MathConst, ::MathConst) = false

hash(x::MathConst, h::Uint) = hash(object_id(x), h)

-(x::MathConst) = -float64(x)
for op in {:+, :-, :*, :/, :^}
@eval $op(x::MathConst, y::MathConst) = $op(float64(x),float64(y))
Expand Down
99 changes: 1 addition & 98 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,6 @@ filter(f::Function, d::Associative) = filter!(f,copy(d))

eltype{K,V}(a::Associative{K,V}) = (K,V)

function hash(d::Associative)
h::Uint = 0
for (k,v) in d
h $= bitmix(hash(k),~hash(v))
end
h
end

function isequal(l::Associative, r::Associative)
if isa(l,ObjectIdDict) != isa(r,ObjectIdDict)
return false
Expand Down Expand Up @@ -201,96 +193,6 @@ function length(d::ObjectIdDict)
n
end

# hashing

function int32hash(n::Uint32)
local a::Uint32 = n
a = (a + 0x7ed55d16) + a << 12
a = (a $ 0xc761c23c) $ a >> 19
a = (a + 0x165667b1) + a << 5
a = (a + 0xd3a2646c) $ a << 9
a = (a + 0xfd7046c5) + a << 3
a = (a $ 0xb55a4f09) $ a >> 16
return a
end

function int64hash(n::Uint64)
local a::Uint64 = n
a = ~a + (a << 21)
a = a $ (a >> 24)
a = (a + (a << 3)) + (a << 8)
a = a $ (a >> 14)
a = (a + (a << 2)) + (a << 4)
a = a $ (a >> 28)
a = a + (a << 31)
return a
end

function int64to32hash(n::Uint64)
local key::Uint64 = n
key = ~key + (key << 18)
key = key $ (key >> 31)
key = key * 21
key = key $ (key >> 11)
key = key + (key << 6 )
key = key $ (key >> 22)
return uint32(key)
end

bitmix(a::Union(Int32,Uint32), b::Union(Int32,Uint32)) = int64to32hash((uint64(a)<<32)|uint64(b))
bitmix(a::Union(Int64,Uint64), b::Union(Int64, Uint64)) = int64hash(uint64(a$((b<<32)|(b>>>32))))

if WORD_SIZE == 64
hash64(x::Float64) = int64hash(reinterpret(Uint64,x))
hash64(x::Union(Int64,Uint64)) = int64hash(reinterpret(Uint64,x))
else
hash64(x::Float64) = int64to32hash(reinterpret(Uint64,x))
hash64(x::Union(Int64,Uint64)) = int64to32hash(reinterpret(Uint64,x))
end

hash(x::Union(Bool,Char,Int8,Uint8,Int16,Uint16,Int32,Uint32,Int64,Uint64)) =
hash64(uint64(x))

function hash(x::Integer)
h::Uint = hash(uint64(x&0xffffffffffffffff))
if typemin(Int64) <= x <= typemax(Uint64)
return h
end
x >>>= 64
while x != 0 && x != -1
h = bitmix(h, hash(uint64(x&0xffffffffffffffff)))
x >>>= 64
end
return h
end

hash(x::Float32) = hash(reinterpret(Uint32, ifelse(isnan(x), NaN32, x)))
hash(x::Float64) = hash(reinterpret(Uint64, ifelse(isnan(x), NaN, x)))

function hash(t::Tuple)
h::Uint = 0
for i=1:length(t)
h = bitmix(h,int(hash(t[i]))+42)
end
return h
end

function hash(a::AbstractArray)
h::Uint = hash(size(a))+1
for i=1:length(a)
h = bitmix(h,int(hash(a[i])))
end
return h
end

# make sure Array{Bool} and BitArray can be equivalent
hash(a::AbstractArray{Bool}) = hash(bitpack(a))

hash(x::ANY) = object_id(x)

hash(x::Expr) = bitmix(hash(x.head),hash(x.args)+43)


# dict

type Dict{K,V} <: Associative{K,V}
Expand Down Expand Up @@ -538,6 +440,7 @@ function setindex!{K,V}(h::Dict{K,V}, v0, key0)
index = ht_keyindex2(h, key)

if index > 0
h.keys[index] = key
h.vals[index] = v
else
_setindex!(h, v, key, -index)
Expand Down
1 change: 0 additions & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ export
atanh,
big,
binomial,
bitmix,
bool,
bswap,
cbrt,
Expand Down
4 changes: 2 additions & 2 deletions base/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ astcopy(x::Union(SymbolNode,GetfieldNode,Expr)) = copy(x)
astcopy(x::Array{Any,1}) = map(astcopy, x)
astcopy(x) = x

isequal(x::Expr, y::Expr) = (is(x.head,y.head) && isequal(x.args,y.args))
isequal(x::QuoteNode, y::QuoteNode) = isequal(x.value, y.value)
==(x::Expr, y::Expr) = x.head === y.head && x.args == y.args
==(x::QuoteNode, y::QuoteNode) = x.value == y.value

function show(io::IO, tv::TypeVar)
if !is(tv.lb, None)
Expand Down
23 changes: 6 additions & 17 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,11 @@ mod{T<:FloatingPoint}(x::T, y::T) = rem(y+rem(x,y),y)
<=(x::Float32, y::Float32) = le_float(unbox(Float32,x),unbox(Float32,y))
<=(x::Float64, y::Float64) = le_float(unbox(Float64,x),unbox(Float64,y))

isequal{T<:FloatingPoint}(x::T, y::T) =
((x==y) & (signbit(x)==signbit(y))) | (isnan(x)&isnan(y))

isequal(x::Float32, y::Float32) = fpiseq(unbox(Float32,x),unbox(Float32,y))
isequal(x::Float64, y::Float64) = fpiseq(unbox(Float64,x),unbox(Float64,y))
isless (x::Float32, y::Float32) = fpislt(unbox(Float32,x),unbox(Float32,y))
isless (x::Float64, y::Float64) = fpislt(unbox(Float64,x),unbox(Float64,y))

isless(a::FloatingPoint, b::FloatingPoint) =
(a<b) | (!isnan(a) & (isnan(b) | (signbit(a)>signbit(b))))
isless(a::Real, b::FloatingPoint) = (a<b) | isless(float(a),b)
isless(a::FloatingPoint, b::Real) = (a<b) | isless(a,float(b))

function cmp(x::FloatingPoint, y::FloatingPoint)
(isnan(x) || isnan(y)) && throw(DomainError())
ifelse(x<y, -1, ifelse(x>y, 1, 0))
Expand Down Expand Up @@ -220,18 +212,15 @@ end
abs(x::Float64) = box(Float64,abs_float(unbox(Float64,x)))
abs(x::Float32) = box(Float32,abs_float(unbox(Float32,x)))

isnan(x::FloatingPoint) = (x != x)
isnan(x::Real) = isnan(float(x))
isnan(x::Integer) = false
isnan(x::FloatingPoint) = x != x
isnan(x::Real) = false

isinf(x::FloatingPoint) = (abs(x) == Inf)
isinf(x::Real) = isinf(float(x))
isinf(x::Integer) = false

isfinite(x::FloatingPoint) = (x-x == 0)
isfinite(x::Real) = isfinite(float(x))
isfinite(x::FloatingPoint) = x - x == 0
isfinite(x::Real) = decompose(x)[3] != 0
isfinite(x::Integer) = true

isinf(x::Real) = !isnan(x) & !isfinite(x)

## floating point traits ##

const Inf16 = box(Float16,unbox(Uint16,0x7c00))
Expand Down
2 changes: 0 additions & 2 deletions base/float16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,5 +139,3 @@ hypot(a::Float16, b::Float16) = float16(hypot(float32(a), float32(b)))
ldexp(a::Float16, b::Integer) = float16(ldexp(float32(a), b))
exponent(x::Float16) = exponent(float32(x))
^(x::Float16, y::Integer) = x^float16(y)

hash(x::Float16) = hash(reinterpret(Uint16, isnan(x) ? NaN16 : x))
1 change: 0 additions & 1 deletion base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,6 @@ end
binomial(n::BigInt, k::Integer) = k < 0 ? throw(DomainError()) : binomial(n, uint(k))

==(x::BigInt, y::BigInt) = cmp(x,y) == 0
isequal(x::BigInt, y::BigInt) = cmp(x,y) == 0
<=(x::BigInt, y::BigInt) = cmp(x,y) <= 0
>=(x::BigInt, y::BigInt) = cmp(x,y) >= 0
<(x::BigInt, y::BigInt) = cmp(x,y) < 0
Expand Down
81 changes: 81 additions & 0 deletions base/hashing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
## hashing a single value ##

hash(x::Any) = hash(x, zero(Uint))
hash(w::WeakRef, h::Uint) = hash(w.value, h)

## core data hashing functions ##

function hash_64_64(n::Uint64)
local a::Uint64 = n
a = ~a + a << 21
a = a $ a >> 24
a = a + a << 3 + a << 8
a = a $ a >> 14
a = a + a << 2 + a << 4
a = a $ a >> 28
a = a + a << 31
return a
end

function hash_64_32(n::Uint64)
local a::Uint64 = n
a = ~a + a << 18
a = a $ a >> 31
a = a * 21
a = a $ a >> 11
a = a + a << 6
a = a $ a >> 22
return uint32(a)
end

function hash_32_32(n::Uint32)
local a::Uint32 = n
a = a + 0x7ed55d16 + a << 12
a = a $ 0xc761c23c $ a >> 19
a = a + 0x165667b1 + a << 5
a = a + 0xd3a2646c $ a << 9
a = a + 0xfd7046c5 + a << 3
a = a $ 0xb55a4f09 $ a >> 16
return a
end

if Uint == Uint64
hash_uint64(x::Uint64) = hash_64_64(x)
hash_uint(x::Uint) = hash_64_64(x)
else
hash_uint64(x::Uint64) = hash_64_32(x)
hash_uint(x::Uint) = hash_32_32(x)
end

## hashing small, built-in numeric types ##

hx(a::Uint64, b::Float64, h::Uint) = hash_uint64((3a + reinterpret(Uint64,b)) - h)
const hx_NaN = hx(uint64(0), NaN, uint(0 ))

hash(x::Uint64, h::Uint) = hx(x, float64(x), h)
hash(x::Int64, h::Uint) = hx(reinterpret(Uint64,x), float64(x), h)
hash(x::Float64, h::Uint) = isnan(x) ? (hx_NaN $ h) : hx(box(Uint64,fptosi(unbox(Float64,x))), x, h)

hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h)
hash(x::Float32, h::Uint) = hash(float64(x), h)

## hashing complex numbers ##

const h_imag = uint(0x32a7a07f3e7cd1f9)
const hash_0_imag = hash(0, h_imag)

function hash(z::Complex, h::Uint)
# TODO: with default argument specialization, this would be better:
# hash(real(z), h $ hash(imag(z), h $ h_imag) $ hash(0, h $ h_imag))
hash(real(z), h $ hash(imag(z), h_imag) $ hash_0_imag)
end

## special hashing for booleans and characters ##

hash(x::Bool, h::Uint) = hash(int(x), h + uint(0x4cd135a1755139a5))
hash(x::Char, h::Uint) = hash(int(x), h + uint(0x10f989ff0f886f11))

## symbol & expression hashing ##

hash(x::Symbol, h::Uint) = hash(object_id(x), h)
hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + uint(0x83c7900696d26dc6)))
Loading