From 0be5cdaa858ab15b97b7877d584e7e2a7a4775dc Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sat, 4 Feb 2017 17:19:35 -0500 Subject: [PATCH 1/4] Refactor IntSets * Complete deprecation of stored zeros; IntSets now only support integers in the range `1:typemax(Int)` * Complete deprecation of `complement`; removes all support for inverted IntSets * Refactor internals to rely on a BitVector, allowing the use of highly optimized `map` methods. `IntSet` is now immutable. This significantly improves performance across varying [densities](http://imgur.com/a/uqv8A) and [sizes](http://imgur.com/a/iEgcr). These are compared against a modified Base with deprecation warnings removed for a fairer comparison. Testing code [available here](https://github.com/mbauman/IntSets.jl/tree/b50a7c97abbe9786e33221f723e107e266f31fe4/test). * Add more tests and organize into testsets. * Improve hashing; `hash(IntSet([1]))` is now distinct from `hash(IntSet([65]))` This is a continuation of #10065. Now that complements are fully removed, making IntSet immutable solves the performance issue. I am keeping the name the same within this PR as it vastly simplifies comparisons between the two implementations; the name can later be changed to `IndexSet` if still desired. The naming story is now a bit more complicated since we support offset indices, but a future change could perhaps allow wrapping any `AbstractVector{Bool}` and base the supported `Int`s on those indices. Very few methods depend upon BitArray internals. --- base/bitarray.jl | 14 ++ base/coreimg.jl | 1 + base/inference.jl | 4 +- base/intset.jl | 440 +++++++++++++++------------------------ base/multidimensional.jl | 15 -- test/intset.jl | 395 ++++++++++++++++++++++++----------- 6 files changed, 451 insertions(+), 418 deletions(-) diff --git a/base/bitarray.jl b/base/bitarray.jl index efb2381a5c53a..924eb4712e23a 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -738,6 +738,20 @@ end return B end +indexoffset(i) = first(i)-1 +indexoffset(::Colon) = 0 + +@inline function setindex!(B::BitArray, x, J0::Union{Colon,UnitRange{Int}}) + I0 = to_indices(B, (J0,))[1] + @boundscheck checkbounds(B, I0) + y = Bool(x) + l0 = length(I0) + l0 == 0 && return B + f0 = indexoffset(I0)+1 + fill_chunks!(B.chunks, y, f0, l0) + return B +end + # logical indexing # When indexing with a BitArray, we can operate whole chunks at a time for a ~100x gain diff --git a/base/coreimg.jl b/base/coreimg.jl index 3f1ea2b1ead62..c6ef6fd5de137 100644 --- a/base/coreimg.jl +++ b/base/coreimg.jl @@ -51,6 +51,7 @@ end include("reduce.jl") ## core structures +include("bitarray.jl") include("intset.jl") include("associative.jl") diff --git a/base/inference.jl b/base/inference.jl index b6fc5a4987412..1667535b92a54 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -2586,8 +2586,8 @@ function typeinf_frame(frame) local pc´::Int = pc + 1 # next program-counter (after executing instruction) if pc == frame.pc´´ # need to update pc´´ to point at the new lowest instruction in W - min_pc = next(W, Int64(pc) + 1)[1] - if min_pc >= W.limit + min_pc = next(W, Int64(pc))[2] + if done(W, min_pc) frame.pc´´ = max(min_pc, n + 1) else frame.pc´´ = min_pc diff --git a/base/intset.jl b/base/intset.jl index 2200c73d6ca5b..9b8456ef0a260 100644 --- a/base/intset.jl +++ b/base/intset.jl @@ -1,339 +1,231 @@ # This file is a part of Julia. License is MIT: http://julialang.org/license -type IntSet <: AbstractSet{Int} - bits::Array{UInt32,1} - limit::Int - fill1s::Bool - - IntSet() = new(zeros(UInt32,256>>>5), 256, false) +immutable IntSet <: AbstractSet{Int} + bits::BitVector + IntSet() = new(fill!(BitVector(256), false)) end -IntSet(itr) = (s=IntSet(); for a in itr; push!(s,a); end; s) +IntSet(itr) = union!(IntSet(), itr) eltype(::Type{IntSet}) = Int64 similar(s::IntSet) = IntSet() - -function show(io::IO, s::IntSet) - print(io, "IntSet([") - first = true - for n in s - if n > s.limit - break - end - if !first - print(io, ", ") - end - print(io, n) - first = false - end - if s.fill1s - print(io, ", ..., ", typemax(Int)-1) - end - print(io, "])") +copy(s1::IntSet) = copy!(IntSet(), s1) +function copy!(to::IntSet, from::IntSet) + resize!(to.bits, length(from.bits)) + copy!(to.bits, from.bits) + to end +eltype(s::IntSet) = Int +sizehint!(s::IntSet, n::Integer) = (_resize0!(s.bits, n); s) -copy(s::IntSet) = union!(IntSet(), s) - -function sizehint!(s::IntSet, top::Integer) - if top >= s.limit - lim = ((top+31) & -32)>>>5 - olsz = length(s.bits) - if olsz < lim - resize!(s.bits, lim) - fill = s.fill1s ? UInt32(-1) : UInt32(0) - for i=(olsz+1):lim; s.bits[i] = fill; end - end - s.limit = top +# An internal function for setting the inclusion bit for a given integer n >= 0 +@inline function _setint!(s::IntSet, idx::Integer, b::Bool) + if idx > length(s.bits) + !b && return s # setting a bit to zero outside the set's bits is a no-op + newlen = idx + idx>>1 # This operation may overflow; we want saturation + _resize0!(s.bits, ifelse(newlen<0, typemax(Int), newlen)) end + unsafe_setindex!(s.bits, b, idx) # Use @inbounds once available s end -function push!(s::IntSet, n::Integer) - if n >= s.limit - if s.fill1s - return s +# An internal function to resize a bitarray and ensure the newly allocated +# elements are zeroed (will become unnecessary if this behavior changes) +@inline function _resize0!(b::BitVector, newlen::Integer) + len = length(b) + resize!(b, newlen) + len < newlen && unsafe_setindex!(b, false, len+1:newlen) # resize! gives dirty memory + b +end + +# An internal function that takes a pure function `f` and maps across two BitArrays +# allowing the lengths to be different and altering b1 with the result +function _matched_map!{F}(f::F, b1::BitArray, b2::BitArray) + l1, l2 = length(b1), length(b2) + if l1 == l2 + map!(f, b1, b1, b2) + elseif l1 < l2 + _resize0!(b1, l2) + map!(f, b1, b1, b2) + elseif l1 > l2 + if f(false, false) == f(true, false) == false + # We don't need to worry about the trailing bits — they're all false + resize!(b1, l2) + map!(f, b1, b1, b2) else - lim = Int(n + div(n,2)) - sizehint!(s, lim) - end - elseif n <= 0 - if n < 0 - throw(ArgumentError("IntSet elements cannot be negative")) - else - depwarn("storing zero in IntSets is deprecated", :push!) + # We transiently extend b2 — as IntSet internal storage this is unobservable + _resize0!(b2, l1) + map!(f, b1, b1, b2) + resize!(b2, l2) end end - s.bits[n>>5 + 1] |= (UInt32(1)<<(n&31)) - return s + b1 end -function union!(s::IntSet, ns) - for n in ns - push!(s, n) - end - return s -end +@noinline _throw_intset_bounds_err() = throw(ArgumentError("elements of IntSet must be between 1 and typemax(Int)")) +@noinline _throw_keyerror(n) = throw(KeyError(n)) -function pop!(s::IntSet, n::Integer, deflt) - if n >= s.limit - if s.fill1s - lim = Int(n + div(n,2)) - sizehint!(s, lim) - else - return deflt - end - end - if n <= 0 - if n < 0 - return deflt - else - depwarn("stored zeros in IntSet is deprecated", :pop!) - end - end - mask = UInt32(1)<<(n&31) - idx = n>>5 + 1 - b = s.bits[idx] - if (b&mask)==0; return deflt; end - s.bits[idx] = b&~mask - return n +@inline function push!(s::IntSet, n::Integer) + 0 < n <= typemax(Int) || _throw_intset_bounds_err() + _setint!(s, n, true) end +push!(s::IntSet, ns::Integer...) = (for n in ns; push!(s, n); end; s) -function pop!(s::IntSet, n::Integer) - if pop!(s, n, n+1) == n+1 - throw(KeyError(n)) - end - return n +@inline function pop!(s::IntSet) + pop!(s, last(s)) end +@inline function pop!(s::IntSet, n::Integer) + n in s ? (_delete!(s, n); n) : _throw_keyerror(n) +end +@inline function pop!(s::IntSet, n::Integer, default) + n in s ? (_delete!(s, n); n) : default +end +@inline _delete!(s::IntSet, n::Integer) = _setint!(s, n, false) +@inline delete!(s::IntSet, n::Integer) = n < 0 ? s : _delete!(s, n) +shift!(s::IntSet) = pop!(s, first(s)) + +empty!(s::IntSet) = (fill!(s.bits, false); s) +isempty(s::IntSet) = !any(s.bits) -# TODO: what should happen when fill1s == true? -pop!(s::IntSet) = pop!(s, last(s)) +# Mathematical set functions: union!, intersect!, setdiff!, symdiff! -function delete!(s::IntSet, n::Integer) - pop!(s, n, n) - return s +union(s::IntSet) = copy(s) +union(s1::IntSet, s2::IntSet) = union!(copy(s1), s2) +union(s1::IntSet, ss::IntSet...) = union(s1, union(ss...)) +union(s::IntSet, ns) = union!(copy(s), ns) +union!(s::IntSet, ns) = (for n in ns; push!(s, n); end; s) +function union!(s1::IntSet, s2::IntSet) + _matched_map!(|, s1.bits, s2.bits) + s1 end -function setdiff!(s::IntSet, ns) +intersect(s1::IntSet) = copy(s1) +intersect(s1::IntSet, ss::IntSet...) = intersect(s1, intersect(ss...)) +function intersect(s1::IntSet, ns) + s = IntSet() for n in ns - delete!(s, n) + n in s1 && push!(s, n) end - return s -end - -setdiff(a::IntSet, b::IntSet) = setdiff!(copy(a),b) -symdiff(s1::IntSet, s2::IntSet) = - (s1.limit >= s2.limit ? symdiff!(copy(s1), s2) : symdiff!(copy(s2), s1)) - -function empty!(s::IntSet) - fill!(s.bits, 0) - return s + s end - +intersect(s1::IntSet, s2::IntSet) = + (length(s1.bits) >= length(s2.bits) ? intersect!(copy(s1), s2) : intersect!(copy(s2), s1)) """ - symdiff!(s, n) + intersect!(s1::IntSet, s2::IntSet) -The set `s` is destructively modified to toggle the inclusion of integer `n`. +Intersects sets `s1` and `s2` and overwrites the set `s1` with the result. If needed, `s1` +will be expanded to the size of `s2`. """ -function symdiff!(s::IntSet, n::Integer) - if n >= s.limit - lim = Int(n + div(n,2)) - sizehint!(s, lim) - elseif n < 0 - throw(ArgumentError("IntSet elements cannot be negative")) - end - s.bits[n>>5 + 1] ⊻= UInt32(1)<<(n&31) - return s +function intersect!(s1::IntSet, s2::IntSet) + _matched_map!(&, s1.bits, s2.bits) + s1 end +setdiff(s::IntSet, ns) = setdiff!(copy(s), ns) +setdiff!(s::IntSet, ns) = (for n in ns; _delete!(s, n); end; s) +function setdiff!(s1::IntSet, s2::IntSet) + _matched_map!(>, s1.bits, s2.bits) + s1 +end + +symdiff(s::IntSet, ns) = symdiff!(copy(s), ns) """ symdiff!(s, itr) For each element in `itr`, destructively toggle its inclusion in set `s`. """ -function symdiff!(s::IntSet, ns) - for n in ns - symdiff!(s, n) - end - return s -end +symdiff!(s::IntSet, ns) = (for n in ns; symdiff!(s, n); end; s) +""" + symdiff!(s, n) -function copy!(to::IntSet, from::IntSet) - if to === from - return to - else - empty!(to) - return union!(to, from) - end +The set `s` is destructively modified to toggle the inclusion of integer `n`. +""" +function symdiff!(s::IntSet, n::Integer) + 0 <= n < typemax(Int) || _throw_intset_bounds_err() + val = !(n in s) + _setint!(s, n, val) + s end - -in(n, s::IntSet) = n < 0 ? false : (n > typemax(Int) ? s.fill1s : in(convert(Int, n), s)) -function in(n::Int, s::IntSet) - if n >= s.limit - # max IntSet length is typemax(Int), so highest possible element is - # typemax(Int)-1 - return s.fill1s && n >= 0 && n < typemax(Int) - elseif n <= 0 - if n < 0 - return false - else - depwarn("stored zeros in IntSet is deprecated", :in) - end - end - (s.bits[n>>5 + 1] & (UInt32(1)<<(n&31))) != 0 +function symdiff!(s1::IntSet, s2::IntSet) + _matched_map!(xor, s1.bits, s2.bits) + s1 end -start(s::IntSet) = Int64(0) -done(s::IntSet, i) = (!s.fill1s && next(s,i)[1] >= s.limit) || i == typemax(Int) -function next(s::IntSet, i) - if i >= s.limit - n = Int64(i) +@inline function in(n::Integer, s::IntSet) + if 1 <= n <= length(s.bits) + unsafe_getindex(s.bits, n) else - n = Int64(ccall(:bitvector_next, UInt64, (Ptr{UInt32}, UInt64, UInt64), s.bits, i, s.limit)) + false end - (n, n+1) end -isempty(s::IntSet) = - !s.fill1s && ccall(:bitvector_any1, UInt32, (Ptr{UInt32}, UInt64, UInt64), s.bits, 0, s.limit)==0 - -function first(s::IntSet) - n = next(s,0)[1] - if n >= s.limit - throw(ArgumentError("set must be non-empty")) - end - return n +# Use the next-set index as the state to prevent looking it up again in done +start(s::IntSet) = next(s, 0)[2] +function next(s::IntSet, i) + nextidx = i == typemax(Int) ? 0 : findnext(s.bits, i+1) + (i, nextidx) end +done(s::IntSet, i) = i <= 0 -shift!(s::IntSet) = pop!(s, first(s)) +@noinline _throw_intset_notempty_error() = throw(ArgumentError("collection must be non-empty")) function last(s::IntSet) - if !s.fill1s - for i = length(s.bits):-1:1 - w = s.bits[i] - if w != 0 - return (i-1)<<5 + (31-leading_zeros(w)) - end - end - end - throw(ArgumentError("set has no last element")) -end - -length(s::IntSet) = Int(ccall(:bitvector_count, UInt64, (Ptr{UInt32}, UInt64, UInt64), s.bits, 0, s.limit)) + - (s.fill1s ? typemax(Int) - s.limit : 0) - - -# Math functions -function union!(s::IntSet, s2::IntSet) - if s2.limit > s.limit - sizehint!(s, s2.limit) - end - lim = length(s2.bits) - for n = 1:lim - s.bits[n] |= s2.bits[n] - end - if s2.fill1s - for n=lim+1:length(s.bits) - s.bits[n] = UInt32(-1) - end - end - s.fill1s |= s2.fill1s - s + idx = findprev(s.bits, length(s.bits)) + idx == 0 ? _throw_intset_notempty_error() : idx end -union(s1::IntSet) = copy(s1) -union(s1::IntSet, s2::IntSet) = (s1.limit >= s2.limit ? union!(copy(s1), s2) : union!(copy(s2), s1)) -union(s1::IntSet, ss::IntSet...) = union(s1, union(ss...)) - -""" - intersect!(s1::IntSet, s2::IntSet) - -Intersects sets `s1` and `s2` and overwrites the set `s1` with the result. If needed, `s1` -will be expanded to the size of `s2`. -""" -function intersect!(s::IntSet, s2::IntSet) - if s2.limit > s.limit - sizehint!(s, s2.limit) - end - lim = length(s2.bits) - for n = 1:lim - s.bits[n] &= s2.bits[n] - end - if !s2.fill1s - for n=lim+1:length(s.bits) - s.bits[n] = UInt32(0) - end - end - s.fill1s &= s2.fill1s - s -end - -intersect(s1::IntSet) = copy(s1) -intersect(s1::IntSet, s2::IntSet) = - (s1.limit >= s2.limit ? intersect!(copy(s1), s2) : intersect!(copy(s2), s1)) -intersect(s1::IntSet, ss::IntSet...) = intersect(s1, intersect(ss...)) - -""" - symdiff!(s1, s2) +length(s::IntSet) = sum(s.bits) -Construct the symmetric difference of sets `s1` and `s2`, storing the result in `s1`. -""" -function symdiff!(s::IntSet, s2::IntSet) - if s2.limit > s.limit - sizehint!(s, s2.limit) - end - lim = length(s2.bits) - for n = 1:lim - s.bits[n] ⊻= s2.bits[n] - end - if s2.fill1s - for n=lim+1:length(s.bits) - s.bits[n] = ~s.bits[n] - end +function show(io::IO, s::IntSet) + print(io, "IntSet([") + first = true + for n in s + !first && print(io, ", ") + print(io, n) + first = false end - s.fill1s ⊻= s2.fill1s - s + print(io, "])") end function ==(s1::IntSet, s2::IntSet) - if s1.fill1s != s2.fill1s - return false - end - lim1 = length(s1.bits) - lim2 = length(s2.bits) - for i = 1:min(lim1,lim2) - if s1.bits[i] != s2.bits[i] - return false - end - end - filln = s1.fill1s ? reinterpret(UInt32, Int32(-1)) : UInt32(0) - if lim1 > lim2 - for i = lim2:lim1 - if s1.bits[i] != filln - return false - end - end - else - for i = lim1+1:lim2 - if s2.bits[i] != filln - return false - end - end + l1 = length(s1.bits) + l2 = length(s2.bits) + # If the lengths are the same, simply punt to bitarray comparison + l1 == l2 && return s1.bits == s2.bits + + # Swap so s1 is always longer + if l1 < l2 + s2, s1 = s1, s2 + l2, l1 = l1, l2 + end + # Iteratively check the chunks of the bitarrays + c1 = s1.bits.chunks + c2 = s2.bits.chunks + @inbounds for i in 1:length(c2) + c1[i] == c2[i] || return false + end + # Ensure remaining chunks are zero + @inbounds for i in length(c2)+1:length(c1) + c1[i] == UInt64(0) || return false end return true end +issubset(a::IntSet, b::IntSet) = isequal(a, intersect(a,b)) +<(a::IntSet, b::IntSet) = (a<=b) && !isequal(a,b) +<=(a::IntSet, b::IntSet) = issubset(a, b) + const hashis_seed = UInt === UInt64 ? 0x88989f1fc7dea67d : 0xc7dea67d function hash(s::IntSet, h::UInt) - h += hashis_seed - h += hash(s.fill1s) - filln = s.fill1s ? ~zero(eltype(s.bits)) : zero(eltype(s.bits)) - for x in s.bits - if x != filln - h = hash(x, h) - end + h ⊻= hashis_seed + bc = s.bits.chunks + i = length(bc) + while i > 0 && bc[i] == UInt64(0) + # Skip trailing empty bytes to prevent extra space from changing the hash + i -= 1 + end + while i > 0 + h = hash(bc[i], h) + i -= 1 end - return h + h end - -issubset(a::IntSet, b::IntSet) = isequal(a, intersect(a,b)) -<(a::IntSet, b::IntSet) = (a<=b) && !isequal(a,b) -<=(a::IntSet, b::IntSet) = issubset(a, b) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index b7b305dd621db..74f475fdf177a 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -971,17 +971,6 @@ end return B end -@inline function setindex!(B::BitArray, x, J0::Union{Colon,UnitRange{Int}}) - I0 = to_indices(B, (J0,))[1] - @boundscheck checkbounds(B, I0) - y = Bool(x) - l0 = length(I0) - l0 == 0 && return B - f0 = indexoffset(I0)+1 - fill_chunks!(B.chunks, y, f0, l0) - return B -end - @inline function setindex!(B::BitArray, X::Union{BitArray,Array}, I0::Union{Colon,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Colon}...) J = to_indices(B, (I0, I...)) @@ -1276,10 +1265,6 @@ julia> unique(A, 3) end end -indexoffset(i) = first(i)-1 -indexoffset(::Colon) = 0 - - """ extrema(A, dims) -> Array{Tuple} diff --git a/test/intset.jl b/test/intset.jl index 410daf0ae1437..a3510791869d2 100644 --- a/test/intset.jl +++ b/test/intset.jl @@ -2,61 +2,61 @@ # Test functionality of IntSet +@testset "Construction, collect" begin + data_in = (1,5,100) + s = IntSet(data_in) + data_out = collect(s) + @test all(map(d->in(d,data_out), data_in)) + @test length(data_out) == length(data_in) +end -## IntSet - -# Construction, collect -data_in = (1,5,100) -s = IntSet(data_in) -data_out = collect(s) -@test all(map(d->in(d,data_out), data_in)) -@test length(data_out) == length(data_in) - -# eltype, similar -@test eltype(IntSet()) === Int64 -@test eltype(IntSet) === Int64 -@test isequal(similar(IntSet([1,2,3])), IntSet()) - -# show -@test sprint(show, IntSet()) == "IntSet([])" -@test sprint(show, IntSet([1,2,3])) == "IntSet([1, 2, 3])" - - -s = IntSet([1,2,10,20,200,300,1000,10000,10002]) -@test last(s) == 10002 -@test first(s) == 1 -@test length(s) == 9 -@test pop!(s) == 10002 -@test_throws KeyError pop!(s, -1) -@test length(s) == 8 -@test shift!(s) == 1 -@test length(s) == 7 -@test !in(1,s) -@test !in(10002,s) -@test in(10000,s) -@test in(10000.0,s) -@test !in(10002.0,s) -@test_throws ArgumentError first(IntSet()) -@test_throws ArgumentError last(IntSet()) -t = copy(s) -sizehint!(t, 20000) #check that hash does not depend on size of internal Array{UInt32, 1} -@test hash(s) == hash(t) - -@test setdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3]) -@test symdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3, 5, 6]) - -s2 = IntSet([1, 2, 3, 4]) -setdiff!(s2, IntSet([2, 4, 5, 6])) - -@test s2 == IntSet([1, 3]) - -# == with last-bit set (groups.google.com/forum/#!topic/julia-users/vZNjiIEG_sY) -s = IntSet(255) -@test s == s - -# issue #7851 -@test_throws ArgumentError IntSet(-1) -@test !(-1 in IntSet(1:10)) +@testset "eltype, similar" begin + @test eltype(IntSet()) === Int64 + @test eltype(IntSet) === Int64 + @test isequal(similar(IntSet([1,2,3])), IntSet()) +end + +@testset "show" begin + @test sprint(show, IntSet()) == "IntSet([])" + @test sprint(show, IntSet([1,2,3])) == "IntSet([1, 2, 3])" + show(IOBuffer(), IntSet()) +end + +@testset "in, hashing" begin + s = IntSet([1,2,10,20,200,300,1000,10000,10002]) + @test last(s) == 10002 + @test first(s) == 1 + @test length(s) == 9 + @test pop!(s) == 10002 + @test_throws KeyError pop!(s, -1) + @test length(s) == 8 + @test shift!(s) == 1 + @test length(s) == 7 + @test !in(0,s) + @test !in(1,s) + @test in(2,s) + @test !in(10002,s) + @test in(10000,s) + @test in(10000.0,s) + @test !in(10002.0,s) + @test_throws ArgumentError first(IntSet()) + @test_throws ArgumentError last(IntSet()) + t = copy(s) + sizehint!(t, 20000) #check that hash does not depend on size of internal storage + @test hash(s) == hash(t) + push!(t, 20000) + pop!(t, 20000) + @test hash(s) == hash(t) + # Ensure empty chunks don't affect hash + @test hash(IntSet([1])) != hash(IntSet([17])) + @test hash(IntSet([1])) != hash(IntSet([33])) + @test hash(IntSet([1])) != hash(IntSet([65])) + @test hash(IntSet([1])) != hash(IntSet([129])) + + # issue #7851 + @test_throws ArgumentError IntSet(-1) + @test !(-1 in IntSet(1:10)) +end # # issue #8570 # This requires 2^29 bytes of storage, which is too much for a simple test @@ -64,76 +64,217 @@ s = IntSet(255) # @test length(s) == 1 # for b in s; b; end -i = IntSet([1, 2, 3]) - -union!(i, [1, 2]) -@test length(i) == 3 -union!(i, [3, 4, 5]) -@test length(i) == 5 - -@test_throws KeyError pop!(i, 10) - -empty!(i) -@test length(i) == 0 - -@test_throws ArgumentError symdiff!(i, -3) -@test symdiff!(i, 3) == IntSet([3]) -@test symdiff!(i, 257) == IntSet([3, 257]) -@test symdiff!(i, [3, 6]) == IntSet([6, 257]) - -i = IntSet(1:6) -@test symdiff!(i, IntSet([6, 513])) == IntSet([1:5; 513]) - -i = IntSet([1, 2, 3]) -k = IntSet([4, 5]) -copy!(k, i) -@test k == i -@test !(k === i) -copy!(k, k) -@test k == i - -# unions -i = IntSet([1, 2, 3]) -j = union(i) -@test j == i -@test !(j === i) - -j = IntSet([4, 5, 6]) -@test union(i, j) == IntSet(1:6) - -k = IntSet([7, 8, 9]) -@test union(i, j, k) == IntSet(1:9) - - -## intersections -i = IntSet([1, 2, 3]) -j = IntSet([4, 5, 6]) - -@test intersect(i) == i -@test !(intersect(i) === i) - -@test intersect(i, j) == IntSet([]) -push!(j, 257) -@test intersect(i, j) == IntSet([]) -push!(j, 2, 3, 17) -@test intersect(i, j) == IntSet([2, 3]) -k = IntSet([1, 2, 3, 4, 5, 6, 7]) -@test intersect(i, j, k) == IntSet([2, 3]) - -## equality -i = IntSet([1, 2, 3]) -j = IntSet([1, 2, 4]) -@test i != j - -push!(j, 257) -pop!(j, 257) -@test i != j -@test j != i - -@test issubset(IntSet([1, 2, 4]), IntSet(1:10)) -@test issubset(IntSet([]), IntSet([])) -@test IntSet([1, 2, 4]) < IntSet(1:10) -@test !(IntSet([]) < IntSet([])) -@test IntSet([1, 2, 4]) <= IntSet(1:10) -@test IntSet([1, 2, 4]) <= IntSet([1, 2, 4]) -@test IntSet([]) <= IntSet([]) +@testset "union!, symdiff!" begin + i = IntSet([1, 2, 3]) + union!(i, [1, 2]) + @test length(i) == 3 + union!(i, [3, 4, 5]) + @test length(i) == 5 + + @test_throws KeyError pop!(i, 10) + + empty!(i) + @test length(i) == 0 + + @test_throws ArgumentError symdiff!(i, -3) + @test symdiff!(i, 3) == IntSet([3]) + @test symdiff!(i, 257) == IntSet([3, 257]) + @test symdiff!(i, [3, 6]) == IntSet([6, 257]) + + i = IntSet(1:6) + @test symdiff!(i, IntSet([6, 513])) == IntSet([1:5; 513]) +end + +@testset "copy, copy!, similar" begin + s1 = IntSet([1,2,3]) + s2 = similar(s1) + copy!(s2, s1) + s3 = copy(s2) + @test s3 == s2 == s1 + @test collect(s3) == collect(s2) == [1,2,3] +end + +@testset "push!, union" begin + i = IntSet([1, 2, 3]) + j = union(i) + @test j == i + @test !(j === i) + + j = IntSet([4, 5, 6]) + @test union(i, j) == IntSet(1:6) + + k = IntSet([7, 8, 9]) + @test union(i, j, k) == IntSet(1:9) + i = IntSet([1, 2, 3]) + j = union(i) + @test j == i + @test !(j === i) + + j = IntSet([4, 5, 6]) + @test union(i, j) == IntSet(1:6) + + k = IntSet([7, 8, 9]) + @test union(i, j, k) == IntSet(1:9) + + s1 = IntSet() + @test_throws ArgumentError push!(s1, -1) + push!(s1, 1, 10, 100, 1000) + @test collect(s1) == [1, 10, 100, 1000] + push!(s1, 606) + @test collect(s1) == [1, 10, 100, 606, 1000] + s2 = IntSet() + @test s2 === union!(s2, s1) + s3 = IntSet([1, 10, 100]) + union!(s3, [1, 606, 1000]) + s4 = union(IntSet([1, 100, 1000]), IntSet([10, 100, 606])) + @test s1 == s2 == s3 == s4 +end + +@testset "pop!, delete!" begin + s = IntSet(1:2:10) + @test pop!(s, 1) === 1 + @test !(1 in s) + @test_throws KeyError pop!(s, 1) + @test_throws KeyError pop!(s, -1) + @test pop!(s, -1, 1) === 1 + @test pop!(s, 1, 0) === 0 + @test s === delete!(s, 1) + for i in s; pop!(s, i); end + @test isempty(s) + push!(s, 100) + @test pop!(s, 100) == 100 + push!(s, 1:2:10...) + @test pop!(s) == 9 + @test pop!(s) == 7 + @test shift!(s) == 1 + @test shift!(s) == 3 + @test collect(s) == [5] + empty!(s) + @test isempty(s) +end + +@testset "intersect" begin + i = IntSet([1, 2, 3]) + j = IntSet([4, 5, 6]) + + @test intersect(i) == i + @test !(intersect(i) === i) + @test intersect(i, j) == IntSet([]) + push!(j, 257) + @test intersect(i, j) == IntSet([]) + push!(j, 2, 3, 17) + @test intersect(i, j) == IntSet([2, 3]) + k = IntSet([1, 2, 3, 4, 5, 6, 7]) + @test intersect(i, j, k) == IntSet([2, 3]) + + @test isempty(intersect(IntSet())) + @test isempty(intersect(IntSet(1:10), IntSet())) + @test isempty(intersect(IntSet(), IntSet(1:10))) + + @test intersect(IntSet([1,2,3])) == IntSet([1,2,3]) + @test intersect(IntSet(1:7), IntSet(3:10)) == + intersect(IntSet(3:10), IntSet(1:7)) == IntSet(3:7) + @test intersect(IntSet(1:10), IntSet(1:4), 1:5, [2,3,10]) == [2,3] +end + +@testset "setdiff, symdiff" begin + @test setdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3]) + @test symdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3, 5, 6]) + + s2 = IntSet([1, 2, 3, 4]) + setdiff!(s2, IntSet([2, 4, 5, 6])) + @test s2 == IntSet([1, 3]) + + s1 = IntSet(1:100) + setdiff!(s1, IntSet(1:2:100)) + s2 = setdiff(IntSet(1:100), IntSet(1:2:100)) + @test s1 == s2 == IntSet(2:2:100) + @test collect(s1) == collect(2:2:100) + + @test symdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == + symdiff(IntSet([2, 4, 5, 6]), IntSet([1, 2, 3, 4])) == + symdiff(IntSet([1, 2, 3, 4]), [2, 4, 5, 6]) == + symdiff(IntSet([2, 4, 5, 6]), [1, 2, 3, 4]) == IntSet([1, 3, 5, 6]) +end + +@testset "subsets, equality" begin + i = IntSet([1, 2, 3]) + k = IntSet([4, 5]) + copy!(k, i) + @test k == i + @test !(k === i) + copy!(k, k) + @test k == i + + i = IntSet([1, 2, 3]) + j = IntSet([1, 2, 4]) + @test i != j + + push!(j, 257) + pop!(j, 257) + @test i != j + @test j != i + + @test issubset(IntSet([1, 2, 4]), IntSet(1:10)) + @test issubset(IntSet([]), IntSet([])) + @test IntSet([1, 2, 4]) < IntSet(1:10) + @test !(IntSet([]) < IntSet([])) + @test IntSet([1, 2, 4]) <= IntSet(1:10) + @test IntSet([1, 2, 4]) <= IntSet([1, 2, 4]) + @test IntSet([]) <= IntSet([]) + + @test IntSet(2:2:10) < IntSet(1:10) + @test !(IntSet(2:2:10) < IntSet(2:2:10)) + @test IntSet(2:2:10) <= IntSet(2:10) + @test IntSet(2:2:10) <= IntSet(2:2:10) + + # == with last-bit set (groups.google.com/forum/#!topic/julia-users/vZNjiIEG_sY) + s = IntSet(255) + @test s == s +end + +@testset "setlike" begin + p = IntSet([1,2,5,6]) + resize!(p.bits, 6) + q = IntSet([1,3,5,7]) + resize!(q.bits, 8) + a = Set(p) + b = Set(q) + for f in (union, intersect, setdiff, symdiff) + @test collect(f(p, p)) == sort(collect(f(a, a))) + @test collect(f(q, q)) == sort(collect(f(b, b))) + @test collect(f(p, q)) == sort(collect(f(a, b))) + @test collect(f(q, p)) == sort(collect(f(b, a))) + end +end + +@testset "misc" begin + s = IntSet() + push!(s, 1, 2, 100) + @test !(0 in s) + @test 1 in s + @test 2 in s + @test !(3 in s) + @test 100 in s + @test !(101 in s) + @test !(1000 in s) + @test first(s) == 1 + @test last(s) == 100 + @test s == IntSet([1, 2, 100]) + push!(s, 1000) + @test [i for i in s] == [1, 2, 100, 1000] + @test pop!(s) == 1000 + @test s == IntSet([1, 2, 100]) + @test hash(s) == hash(IntSet([1, 2, 100])) + + b = 1:1000 + s = IntSet(b) + @test collect(s) == collect(b) + @test length(s) == length(b) + @test pop!(s, 100) == 100 + @test collect(s) == [1:99; 101:1000] + @test_throws KeyError pop!(s, 100) + @test_throws KeyError pop!(s, 0) + @test pop!(s, 100, 0) == 0 + @test pop!(s, 99, 0) == 99 +end From bb3198a7c1947c03cdf209ec3f3f3eab7fe703a5 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sat, 4 Feb 2017 18:54:54 -0500 Subject: [PATCH 2/4] Address review comments; use === where possible --- base/inference.jl | 2 +- base/intset.jl | 27 ++++++++++---------- test/intset.jl | 63 +++++++++++++++++++++++++---------------------- 3 files changed, 48 insertions(+), 44 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index 1667535b92a54..3e76d2d196a58 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -2586,7 +2586,7 @@ function typeinf_frame(frame) local pc´::Int = pc + 1 # next program-counter (after executing instruction) if pc == frame.pc´´ # need to update pc´´ to point at the new lowest instruction in W - min_pc = next(W, Int64(pc))[2] + min_pc = next(W, pc)[2] if done(W, min_pc) frame.pc´´ = max(min_pc, n + 1) else diff --git a/base/intset.jl b/base/intset.jl index 9b8456ef0a260..8c25556966914 100644 --- a/base/intset.jl +++ b/base/intset.jl @@ -2,29 +2,29 @@ immutable IntSet <: AbstractSet{Int} bits::BitVector - IntSet() = new(fill!(BitVector(256), false)) + IntSet() = new(falses(256)) end IntSet(itr) = union!(IntSet(), itr) -eltype(::Type{IntSet}) = Int64 +eltype(::Type{IntSet}) = Int similar(s::IntSet) = IntSet() copy(s1::IntSet) = copy!(IntSet(), s1) -function copy!(to::IntSet, from::IntSet) - resize!(to.bits, length(from.bits)) - copy!(to.bits, from.bits) - to +function copy!(dest::IntSet, src::IntSet) + resize!(dest.bits, length(src.bits)) + copy!(dest.bits, src.bits) + dest end eltype(s::IntSet) = Int -sizehint!(s::IntSet, n::Integer) = (_resize0!(s.bits, n); s) +sizehint!(s::IntSet, n::Integer) = (_resize0!(s.bits, max(n, length(s.bits))); s) # An internal function for setting the inclusion bit for a given integer n >= 0 @inline function _setint!(s::IntSet, idx::Integer, b::Bool) if idx > length(s.bits) - !b && return s # setting a bit to zero outside the set's bits is a no-op + b || return s # setting a bit to zero outside the set's bits is a no-op newlen = idx + idx>>1 # This operation may overflow; we want saturation _resize0!(s.bits, ifelse(newlen<0, typemax(Int), newlen)) end - unsafe_setindex!(s.bits, b, idx) # Use @inbounds once available + @inbounds s.bits[idx] = b s end @@ -33,7 +33,7 @@ end @inline function _resize0!(b::BitVector, newlen::Integer) len = length(b) resize!(b, newlen) - len < newlen && unsafe_setindex!(b, false, len+1:newlen) # resize! gives dirty memory + len < newlen && @inbounds b[len+1:newlen] = false # resize! gives dirty memory b end @@ -152,10 +152,11 @@ end @inline function in(n::Integer, s::IntSet) if 1 <= n <= length(s.bits) - unsafe_getindex(s.bits, n) + @inbounds b = s.bits[n] else - false + b = false end + b end # Use the next-set index as the state to prevent looking it up again in done @@ -225,7 +226,7 @@ function hash(s::IntSet, h::UInt) end while i > 0 h = hash(bc[i], h) - i -= 1 + i -= 1 end h end diff --git a/test/intset.jl b/test/intset.jl index a3510791869d2..559d6a7e52459 100644 --- a/test/intset.jl +++ b/test/intset.jl @@ -7,12 +7,12 @@ s = IntSet(data_in) data_out = collect(s) @test all(map(d->in(d,data_out), data_in)) - @test length(data_out) == length(data_in) + @test length(data_out) === length(data_in) end @testset "eltype, similar" begin - @test eltype(IntSet()) === Int64 - @test eltype(IntSet) === Int64 + @test eltype(IntSet()) === Int + @test eltype(IntSet) === Int @test isequal(similar(IntSet([1,2,3])), IntSet()) end @@ -24,14 +24,14 @@ end @testset "in, hashing" begin s = IntSet([1,2,10,20,200,300,1000,10000,10002]) - @test last(s) == 10002 - @test first(s) == 1 - @test length(s) == 9 - @test pop!(s) == 10002 + @test last(s) === 10002 + @test first(s) === 1 + @test length(s) === 9 + @test pop!(s) === 10002 @test_throws KeyError pop!(s, -1) - @test length(s) == 8 - @test shift!(s) == 1 - @test length(s) == 7 + @test length(s) === 8 + @test shift!(s) === 1 + @test length(s) === 7 @test !in(0,s) @test !in(1,s) @test in(2,s) @@ -43,10 +43,13 @@ end @test_throws ArgumentError last(IntSet()) t = copy(s) sizehint!(t, 20000) #check that hash does not depend on size of internal storage - @test hash(s) == hash(t) + @test hash(s) === hash(t) push!(t, 20000) - pop!(t, 20000) - @test hash(s) == hash(t) + @test 20000 in t + sizehint!(t, 200) # ensure that sizehint!'ing a small amount isn't destructive + @test 20000 in t + @test pop!(t, 20000) === 20000 + @test hash(s) === hash(t) # Ensure empty chunks don't affect hash @test hash(IntSet([1])) != hash(IntSet([17])) @test hash(IntSet([1])) != hash(IntSet([33])) @@ -60,21 +63,21 @@ end # # issue #8570 # This requires 2^29 bytes of storage, which is too much for a simple test -# s = IntSet(2^32) -# @test length(s) == 1 +# s = IntSet(typemax(Int32)) +# @test length(s) === 1 # for b in s; b; end @testset "union!, symdiff!" begin i = IntSet([1, 2, 3]) union!(i, [1, 2]) - @test length(i) == 3 + @test length(i) === 3 union!(i, [3, 4, 5]) - @test length(i) == 5 + @test length(i) === 5 @test_throws KeyError pop!(i, 10) empty!(i) - @test length(i) == 0 + @test length(i) === 0 @test_throws ArgumentError symdiff!(i, -3) @test symdiff!(i, 3) == IntSet([3]) @@ -144,10 +147,10 @@ end push!(s, 100) @test pop!(s, 100) == 100 push!(s, 1:2:10...) - @test pop!(s) == 9 - @test pop!(s) == 7 - @test shift!(s) == 1 - @test shift!(s) == 3 + @test pop!(s) === 9 + @test pop!(s) === 7 + @test shift!(s) === 1 + @test shift!(s) === 3 @test collect(s) == [5] empty!(s) @test isempty(s) @@ -258,23 +261,23 @@ end @test 100 in s @test !(101 in s) @test !(1000 in s) - @test first(s) == 1 - @test last(s) == 100 + @test first(s) === 1 + @test last(s) === 100 @test s == IntSet([1, 2, 100]) push!(s, 1000) @test [i for i in s] == [1, 2, 100, 1000] - @test pop!(s) == 1000 + @test pop!(s) === 1000 @test s == IntSet([1, 2, 100]) - @test hash(s) == hash(IntSet([1, 2, 100])) + @test hash(s) === hash(IntSet([1, 2, 100])) b = 1:1000 s = IntSet(b) @test collect(s) == collect(b) - @test length(s) == length(b) - @test pop!(s, 100) == 100 + @test length(s) === length(b) + @test pop!(s, 100) === 100 @test collect(s) == [1:99; 101:1000] @test_throws KeyError pop!(s, 100) @test_throws KeyError pop!(s, 0) - @test pop!(s, 100, 0) == 0 - @test pop!(s, 99, 0) == 99 + @test pop!(s, 100, 0) === 0 + @test pop!(s, 99, 0) === 99 end From bb78c47de050f5f2d0bbe1f40cb5fa565b380657 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sun, 5 Feb 2017 22:28:39 -0500 Subject: [PATCH 3/4] Remove no-longer needed C bitvector functions --- src/support/bitvector.c | 141 ---------------------------------------- src/support/bitvector.h | 7 -- 2 files changed, 148 deletions(-) diff --git a/src/support/bitvector.c b/src/support/bitvector.c index 046216b5aca00..de13c8e285732 100644 --- a/src/support/bitvector.c +++ b/src/support/bitvector.c @@ -56,147 +56,6 @@ uint32_t bitvector_get(uint32_t *b, uint64_t n) return b[n>>5] & (1<<(n&31)); } -// a mask with n set lo or hi bits -#define lomask(n) (uint32_t)((((uint32_t)1)<<(n))-1) -#define ONES32 ((uint32_t)0xffffffff) - -#if defined(__INTEL_COMPILER) && !defined(__clang__) -#define count_bits(b) _popcnt32(b) -#else -STATIC_INLINE uint32_t count_bits(uint32_t b) -{ - b = b - ((b>>1)&0x55555555); - b = ((b>>2)&0x33333333) + (b&0x33333333); - b = ((b>>4)+b)&0x0f0f0f0f; - b += (b>>8); - b += (b>>16); - return b & 0x3f; - // here is the non-optimized version, for clarity: - /* - b = ((b>> 1)&0x55555555) + (b&0x55555555); - b = ((b>> 2)&0x33333333) + (b&0x33333333); - b = ((b>> 4)&0x0f0f0f0f) + (b&0x0f0f0f0f); - b = ((b>> 8)&0x00ff00ff) + (b&0x00ff00ff); - b = ((b>>16)&0x0000ffff) + (b&0x0000ffff); - return b & 0x3f; - */ -} -#endif - -static int ntz(uint32_t x) -{ - int n; - - if (x == 0) return 32; - n = 1; - if ((x & 0x0000FFFF) == 0) {n = n +16; x = x >>16;} - if ((x & 0x000000FF) == 0) {n = n + 8; x = x >> 8;} - if ((x & 0x0000000F) == 0) {n = n + 4; x = x >> 4;} - if ((x & 0x00000003) == 0) {n = n + 2; x = x >> 2;} - return n - (x & 1); -} - -// given a bitvector of n bits, starting at bit n0 find the next -// set bit, including n0. -// returns n if no set bits. -uint64_t bitvector_next(uint32_t *b, uint64_t n0, uint64_t n) -{ - if (n0 >= n) return n; - - uint32_t i = n0>>5; - uint32_t nb = n0&31; - uint32_t nw = (n+31)>>5; - uint32_t w; - - if (i < nw-1 || (n&31)==0) - w = b[i]>>nb; - else - w = (b[i]&lomask(n&31))>>nb; - if (w != 0) - return ntz(w)+n0; - if (i == nw-1) - return n; - i++; - while (i < nw-1) { - w = b[i]; - if (w != 0) { - return ntz(w) + (((uint64_t)i)<<5); - } - i++; - } - w = b[i]; - nb = n&31; - i = ntz(w); - if (nb == 0) - return i + (n-32); - if (i >= nb) - return n; - return i + (n-nb); -} - -uint64_t bitvector_count(uint32_t *b, uint64_t offs, uint64_t nbits) -{ - size_t i, nw; - uint32_t ntail; - uint64_t ans; - - if (nbits == 0) return 0; - nw = (offs+nbits+31)>>5; - - if (nw == 1) { - if (nbits == 32) - return count_bits(b[0] & (ONES32<>offs); // first end cap - - for(i=1; i < nw-1; i++) { - ans += count_bits(b[i]); - } - - ntail = (offs+nbits)&31; - ans += count_bits(b[i]&(ntail>0?lomask(ntail):ONES32)); // last end cap - - return ans; -} - -uint32_t bitvector_any1(uint32_t *b, uint64_t offs, uint64_t nbits) -{ - size_t i; - uint32_t nw, tail; - uint32_t mask; - - if (nbits == 0) return 0; - nw = (offs+nbits+31)>>5; - - if (nw == 1) { - if (nbits == 32) - mask = (ONES32< Date: Mon, 6 Feb 2017 00:44:44 -0500 Subject: [PATCH 4/4] Remove TypeVar in _matched_map! --- base/intset.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/intset.jl b/base/intset.jl index 8c25556966914..cecbb0f2a365d 100644 --- a/base/intset.jl +++ b/base/intset.jl @@ -39,7 +39,7 @@ end # An internal function that takes a pure function `f` and maps across two BitArrays # allowing the lengths to be different and altering b1 with the result -function _matched_map!{F}(f::F, b1::BitArray, b2::BitArray) +function _matched_map!(f, b1::BitArray, b2::BitArray) l1, l2 = length(b1), length(b2) if l1 == l2 map!(f, b1, b1, b2)