diff --git a/base/coreimg.jl b/base/coreimg.jl index c54633d8e2d71..e3c9690bfdee0 100644 --- a/base/coreimg.jl +++ b/base/coreimg.jl @@ -57,6 +57,7 @@ include("functors.jl") include("reduce.jl") ## core structures +include("bitarray.jl") include("intset.jl") include("dict.jl") include("iterator.jl") diff --git a/base/hashing.jl b/base/hashing.jl index babc03efb4bb6..85cfe11fa299b 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -62,7 +62,6 @@ else hash(x::Expr, h::UInt) = hash(x.args, hash(x.head, h + 0x96d26dc6)) end - # hashing ranges by component at worst leads to collisions for very similar ranges const hashr_seed = UInt === UInt64 ? 0x80707b6821b70087 : 0x21b70087 function hash(r::Range, h::UInt) diff --git a/base/intset.jl b/base/intset.jl index 64192c9e6cfcb..3b0a05d364880 100644 --- a/base/intset.jl +++ b/base/intset.jl @@ -1,307 +1,236 @@ # This file is a part of Julia. License is MIT: http://julialang.org/license type IntSet - bits::Array{UInt32,1} - limit::Int - fill1s::Bool - - IntSet() = new(zeros(UInt32,256>>>5), 256, false) + bits::BitVector + inverse::Bool + IntSet() = new(fill!(BitVector(256), false), false) end -IntSet(itr) = (s=IntSet(); for a in itr; push!(s,a); end; s) +IntSet(itr) = union!(IntSet(), itr) -eltype(::Type{IntSet}) = Int64 similar(s::IntSet) = IntSet() - -function show(io::IO, s::IntSet) - print(io, "IntSet([") - first = true - for n in s - if n > s.limit - break - end - if !first - print(io, ", ") - end - print(io, n) - first = false - end - if s.fill1s - print(io, ", ..., ", typemax(Int)-1) - end - print(io, "])") -end - -copy(s::IntSet) = union!(IntSet(), s) - -function sizehint!(s::IntSet, top::Integer) - if top >= s.limit - lim = ((top+31) & -32)>>>5 - olsz = length(s.bits) - if olsz < lim - resize!(s.bits, lim) - fill = s.fill1s ? UInt32(-1) : UInt32(0) - for i=(olsz+1):lim; s.bits[i] = fill; end - end - s.limit = top - end +copy(s1::IntSet) = copy!(IntSet(), s1) +function copy!(to::IntSet, from::IntSet) + resize!(to.bits, length(from.bits)) + copy!(to.bits, from.bits) + to.inverse = from.inverse + to +end +eltype(s::IntSet) = Int +sizehint!(s::IntSet, n::Integer) = (_resize0!(s.bits, n+1); s) + +# An internal function for setting the inclusion bit for a given integer n >= 0 +@inline function _setint!(s::IntSet, n::Integer, b::Bool) + idx = n+1 + if idx > length(s.bits) + !b && return s # setting a bit to zero outside the set's bits is a no-op + newlen = idx + idx>>1 # This operation may overflow; we want saturation + _resize0!(s.bits, ifelse(newlen<0, typemax(Int), newlen)) + end + unsafe_setindex!(s.bits, b, idx) # Use @inbounds once available s end -function push!(s::IntSet, n::Integer) - if n >= s.limit - if s.fill1s - return s - else - lim = Int(n + div(n,2)) - sizehint!(s, lim) - end - elseif n < 0 - throw(ArgumentError("IntSet elements cannot be negative")) - end - s.bits[n>>5 + 1] |= (UInt32(1)<<(n&31)) - return s +# An internal function to resize a bitarray and ensure the newly allocated +# elements are zeroed (will become unnecessary if this behavior changes) +@inline function _resize0!(b::BitVector, newlen::Integer) + len = length(b) + resize!(b, newlen) + len < newlen && unsafe_setindex!(b, false, len+1:newlen) # resize! gives dirty memory + b end -function union!(s::IntSet, ns) - for n in ns - push!(s, n) - end - return s +# An internal function that resizes a bitarray so it matches the length newlen +# Returns a bitvector of the removed elements (empty if none were removed) +function _matchlength!(b::BitArray, newlen::Integer) + len = length(b) + len > newlen && return splice!(b, newlen+1:len) + len < newlen && _resize0!(b, newlen) + return BitVector(0) end -function pop!(s::IntSet, n::Integer, deflt) - if n >= s.limit - if s.fill1s - lim = Int(n + div(n,2)) - sizehint!(s, lim) - else - return deflt - end - end - mask = UInt32(1)<<(n&31) - idx = n>>5 + 1 - b = s.bits[idx] - if (b&mask)==0; return deflt; end - s.bits[idx] = b&~mask - return n +const _intset_bounds_err_msg = "elements of IntSet must be between 0 and typemax(Int)-1" + +function push!(s::IntSet, n::Integer) + 0 <= n < typemax(Int) || throw(ArgumentError(_intset_bounds_err_msg)) + _setint!(s, n, !s.inverse) end +push!(s::IntSet, ns::Integer...) = (for n in ns; push!(s, n); end; s) +function pop!(s::IntSet) + s.inverse && throw(ArgumentError("cannot pop the last element of complement IntSet")) + pop!(s, last(s)) +end function pop!(s::IntSet, n::Integer) - if pop!(s, n, n+1) == n+1 - throw(KeyError(n)) - end - return n + 0 <= n < typemax(Int) || throw(ArgumentError(_intset_bounds_err_msg)) + n in s ? (_delete!(s, n); n) : throw(KeyError(n)) end +function pop!(s::IntSet, n::Integer, default) + 0 <= n < typemax(Int) || throw(ArgumentError(_intset_bounds_err_msg)) + n in s ? (_delete!(s, n); n) : default +end +function pop!(f::Function, s::IntSet, n::Integer) + 0 <= n < typemax(Int) || throw(ArgumentError(_intset_bounds_err_msg)) + n in s ? (_delete!(s, n); n) : f() +end +_delete!(s::IntSet, n::Integer) = _setint!(s, n, s.inverse) +delete!(s::IntSet, n::Integer) = n < 0 ? s : _delete!(s, n) +shift!(s::IntSet) = pop!(s, first(s)) -# TODO: what should happen when fill1s == true? -pop!(s::IntSet) = pop!(s, last(s)) +empty!(s::IntSet) = (fill!(s.bits, false); s.inverse = false; s) +isempty(s::IntSet) = s.inverse ? length(s.bits) == typemax(Int) && all(s.bits) : !any(s.bits) -function delete!(s::IntSet, n::Integer) - pop!(s, n, n) - return s +# Mathematical set functions: union!, intersect!, setdiff!, symdiff! +# When applied to two intsets, these all have a similar form: +# - Reshape s1 to match s2, occasionally grabbing the bits that were removed +# - Use map to apply some bitwise operation across the entire bitvector +# - These operations use functors to work on the bitvector chunks, so are +# very efficient... but a little untraditional. E.g., (p > q) => (p & ~q) +# - If needed, append the removed bits back to s1 or invert the array + +union(s::IntSet, ns) = union!(copy(s), ns) +union!(s::IntSet, ns) = (for n in ns; push!(s, n); end; s) +function union!(s1::IntSet, s2::IntSet) + l = length(s2.bits) + if !s1.inverse & !s2.inverse; e = _matchlength!(s1.bits, l); map!(|, s1.bits, s1.bits, s2.bits); append!(s1.bits, e) + elseif s1.inverse & !s2.inverse; e = _matchlength!(s1.bits, l); map!(>, s1.bits, s1.bits, s2.bits); append!(s1.bits, e) + elseif !s1.inverse & s2.inverse; _resize0!(s1.bits, l); map!(<, s1.bits, s1.bits, s2.bits); s1.inverse = true + else #= s1.inverse & s2.inverse=# _resize0!(s1.bits, l); map!(&, s1.bits, s1.bits, s2.bits) + end + s1 end -function setdiff!(s::IntSet, ns) +intersect(s1::IntSet) = copy(s1) +intersect(s1::IntSet, ss...) = intersect(s1, intersect(ss...)) +function intersect(s1::IntSet, ns) + s = IntSet() for n in ns - delete!(s, n) + n in s1 && push!(s, n) end - return s + s end - -setdiff(a::IntSet, b::IntSet) = setdiff!(copy(a),b) -symdiff(s1::IntSet, s2::IntSet) = - (s1.limit >= s2.limit ? symdiff!(copy(s1), s2) : symdiff!(copy(s2), s1)) - -function empty!(s::IntSet) - s.bits[:] = 0 - return s +intersect(s1::IntSet, s2::IntSet) = intersect!(copy(s1), s2) +function intersect!(s1::IntSet, s2::IntSet) + l = length(s2.bits) + if !s1.inverse & !s2.inverse; _resize0!(s1.bits, l); map!(&, s1.bits, s1.bits, s2.bits) + elseif s1.inverse & !s2.inverse; _resize0!(s1.bits, l); map!(<, s1.bits, s1.bits, s2.bits); s1.inverse = false + elseif !s1.inverse & s2.inverse; e = _matchlength!(s1.bits, l); map!(>, s1.bits, s1.bits, s2.bits); append!(s1.bits, e) + else #= s1.inverse & s2.inverse=# e = _matchlength!(s1.bits, l); map!(|, s1.bits, s1.bits, s2.bits); append!(s1.bits, e) + end + s1 end -function symdiff!(s::IntSet, n::Integer) - if n >= s.limit - lim = Int(n + dim(n,2)) - sizehint!(s, lim) - elseif n < 0 - throw(ArgumentError("IntSet elements cannot be negative")) +setdiff(s::IntSet, ns) = setdiff!(copy(s), ns) +setdiff!(s::IntSet, ns) = (for n in ns; _delete!(s, n); end; s) +function setdiff!(s1::IntSet, s2::IntSet) + l = length(s2.bits) + if !s1.inverse & !s2.inverse; e = _matchlength!(s1.bits, l); map!(>, s1.bits, s1.bits, s2.bits); append!(s1.bits, e) + elseif s1.inverse & !s2.inverse; e = _matchlength!(s1.bits, l); map!(|, s1.bits, s1.bits, s2.bits); append!(s1.bits, e) + elseif !s1.inverse & s2.inverse; _resize0!(s1.bits, l); map!(&, s1.bits, s1.bits, s2.bits) + else #= s1.inverse & s2.inverse=# _resize0!(s1.bits, l); map!(<, s1.bits, s1.bits, s2.bits); s1.inverse = false end - s.bits[n>>5 + 1] $= (UInt32(1)<<(n&31)) - return s + s1 end -function symdiff!(s::IntSet, ns) - for n in ns - symdiff!(s, n) - end - return s +symdiff(s::IntSet, ns) = symdiff!(copy(s), ns) +symdiff!(s::IntSet, ns) = (for n in ns; symdiff!(s, n); end; s) +function symdiff!(s::IntSet, n::Integer) + 0 <= n < typemax(Int) || throw(ArgumentError(_intset_bounds_err_msg)) + val = (n in s) $ !s.inverse + _setint!(s, n, val) + s end - -function copy!(to::IntSet, from::IntSet) - empty!(to) - union!(to, from) +function symdiff!(s1::IntSet, s2::IntSet) + e = _matchlength!(s1.bits, length(s2.bits)) + map!($, s1.bits, s1.bits, s2.bits) + s2.inverse && (s1.inverse = !s1.inverse) + append!(s1.bits, e) + s1 end function in(n::Integer, s::IntSet) - if n >= s.limit - # max IntSet length is typemax(Int), so highest possible element is - # typemax(Int)-1 - s.fill1s && n >= 0 && n < typemax(Int) - elseif n < 0 - return false + idx = n+1 + if 1 <= idx <= length(s.bits) + unsafe_getindex(s.bits, idx) != s.inverse else - (s.bits[n>>5 + 1] & (UInt32(1)<<(n&31))) != 0 + ifelse((idx <= 0) | (idx > typemax(Int)), false, s.inverse) end end -start(s::IntSet) = Int64(0) -done(s::IntSet, i) = (!s.fill1s && next(s,i)[1] >= s.limit) || i == typemax(Int) -function next(s::IntSet, i) - if i >= s.limit - n = Int64(i) +# Use the next-set index as the state to prevent looking it up again in done +start(s::IntSet) = next(s, 0)[2] +function next(s::IntSet, i, invert=false) + if s.inverse $ invert + # i+1 could rollover causing a BoundsError in findnext/findnextnot + nextidx = i == typemax(Int) ? 0 : findnextnot(s.bits, i+1) + # Extend indices beyond the length of the bits since it is inverted + nextidx = nextidx == 0 ? max(i, length(s.bits))+1 : nextidx else - n = Int64(ccall(:bitvector_next, UInt64, (Ptr{UInt32}, UInt64, UInt64), s.bits, i, s.limit)) + nextidx = i == typemax(Int) ? 0 : findnext(s.bits, i+1) end - (n, n+1) + (i-1, nextidx) end +done(s::IntSet, i) = i <= 0 -isempty(s::IntSet) = - !s.fill1s && ccall(:bitvector_any1, UInt32, (Ptr{UInt32}, UInt64, UInt64), s.bits, 0, s.limit)==0 - -function first(s::IntSet) - n = next(s,0)[1] - if n >= s.limit - throw(ArgumentError("set must be non-empty")) - end - return n -end - -shift!(s::IntSet) = pop!(s, first(s)) +# Nextnot iterates through elements *not* in the set +nextnot(s::IntSet, i) = next(s, i, true) function last(s::IntSet) - if !s.fill1s - for i = length(s.bits):-1:1 - w = s.bits[i] - if w != 0 - return (i-1)<<5 + (31-leading_zeros(w)) - end - end - end - throw(ArgumentError("set has no last element")) -end - -length(s::IntSet) = Int(ccall(:bitvector_count, UInt64, (Ptr{UInt32}, UInt64, UInt64), s.bits, 0, s.limit)) + - (s.fill1s ? typemax(Int) - s.limit : 0) - - -# Math functions -function union!(s::IntSet, s2::IntSet) - if s2.limit > s.limit - sizehint!(s, s2.limit) - end - lim = length(s2.bits) - for n = 1:lim - s.bits[n] |= s2.bits[n] - end - if s2.fill1s - for n=lim+1:length(s.bits) - s.bits[n] = UInt32(-1) - end + l = length(s.bits) + if s.inverse + idx = l < typemax(Int) ? typemax(Int) : findprevnot(s.bits, l) + else + idx = findprev(s.bits, l) end - s.fill1s |= s2.fill1s - s + idx == 0 ? throw(ArgumentError("collection must be non-empty")) : idx - 1 end -union(s1::IntSet) = copy(s1) -union(s1::IntSet, s2::IntSet) = (s1.limit >= s2.limit ? union!(copy(s1), s2) : union!(copy(s2), s1)) -union(s1::IntSet, ss::IntSet...) = union(s1, union(ss...)) - -function intersect!(s::IntSet, s2::IntSet) - if s2.limit > s.limit - sizehint!(s, s2.limit) - end - lim = length(s2.bits) - for n = 1:lim - s.bits[n] &= s2.bits[n] - end - if !s2.fill1s - for n=lim+1:length(s.bits) - s.bits[n] = UInt32(0) - end - end - s.fill1s &= s2.fill1s - s -end - -intersect(s1::IntSet) = copy(s1) -intersect(s1::IntSet, s2::IntSet) = - (s1.limit >= s2.limit ? intersect!(copy(s1), s2) : intersect!(copy(s2), s1)) -intersect(s1::IntSet, ss::IntSet...) = intersect(s1, intersect(ss...)) - -function complement!(s::IntSet) - for n = 1:length(s.bits) - s.bits[n] = ~s.bits[n] - end - s.fill1s = !s.fill1s - s -end +length(s::IntSet) = (n = sum(s.bits); ifelse(s.inverse, typemax(Int) - n, n)) complement(s::IntSet) = complement!(copy(s)) +complement!(s::IntSet) = (s.inverse = !s.inverse; s) -function symdiff!(s::IntSet, s2::IntSet) - if s2.limit > s.limit - sizehint!(s, s2.limit) - end - lim = length(s2.bits) - for n = 1:lim - s.bits[n] $= s2.bits[n] - end - if s2.fill1s - for n=lim+1:length(s.bits) - s.bits[n] = ~s.bits[n] - end +function show(io::IO, s::IntSet) + print(io, "IntSet([") + first = true + for n in s + if s.inverse && n > 2 && done(s, nextnot(s, n-3)[2]) + print(io, ", ..., ", typemax(Int)-1) + break + end + !first && print(io, ", ") + print(io, n) + first = false end - s.fill1s $= s2.fill1s - s + print(io, "])") end function ==(s1::IntSet, s2::IntSet) - if s1.fill1s != s2.fill1s - return false - end - lim1 = length(s1.bits) - lim2 = length(s2.bits) - for i = 1:min(lim1,lim2) - if s1.bits[i] != s2.bits[i] - return false - end - end - filln = s1.fill1s ? UInt32(-1) : UInt32(0) - if lim1 > lim2 - for i = lim2:lim1 - if s1.bits[i] != filln - return false - end - end + l1 = length(s1.bits) + l2 = length(s2.bits) + l1 < l2 && return ==(s2, s1) # Swap so s1 is always equal-length or longer + + # Try to do this without allocating memory or checking bit-by-bit + if s1.inverse == s2.inverse + # If the lengths are the same, simply punt to bitarray comparison + l1 == l2 && return s1.bits == s2.bits + # Otherwise check the last bit. If equal, we only need to check up to l2 + return findprev(s1.bits, l1) == findprev(s2.bits, l2) && + unsafe_getindex(s1.bits, 1:l2) == s2.bits else - for i = lim1+1:lim2 - if s2.bits[i] != filln - return false - end - end + # one complement, one not. Could feasibly be true on 32 bit machines + # Only if all non-overlapping bits are set and overlaps are inverted + return l1 == typemax(Int) && + map!(!, unsafe_getindex(s1.bits, 1:l2)) == s2.bits && + (l1 == l2 || all(unsafe_getindex(s1.bits, l2+1:l1))) end - return true end const hashis_seed = UInt === UInt64 ? 0x88989f1fc7dea67d : 0xc7dea67d function hash(s::IntSet, h::UInt) - h += hashis_seed - h += hash(s.fill1s) - filln = s.fill1s ? ~zero(eltype(s.bits)) : zero(eltype(s.bits)) - for x in s.bits - if x != filln - h = hash(x, h) - end - end - return h + # Only hash the bits array up to the last-set bit to prevent extra empty + # bits from changing the hash result + l = findprev(s.bits, length(s.bits)) + hash(unsafe_getindex(s.bits, 1:l), h) $ hash(s.inverse) $ hashis_seed end issubset(a::IntSet, b::IntSet) = isequal(a, intersect(a,b)) diff --git a/test/choosetests.jl b/test/choosetests.jl index 8b354c70d82f1..df516340cdb71 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -27,7 +27,7 @@ function choosetests(choices = []) "floatapprox", "readdlm", "reflection", "regex", "float16", "combinatorics", "sysinfo", "rounding", "ranges", "mod2pi", "euler", "show", "lineedit", "replcompletions", "repl", - "replutil", "sets", "test", "goto", "llvmcall", "grisu", + "replutil", "sets", "intset", "test", "goto", "llvmcall", "grisu", "nullable", "meta", "profile", "libgit2", "docs", "markdown", "base64", "parser", "serialize", "functors", "char", "misc", "enums", "cmdlineargs", "i18n" diff --git a/test/intset.jl b/test/intset.jl new file mode 100644 index 0000000000000..d0875715da1fb --- /dev/null +++ b/test/intset.jl @@ -0,0 +1,283 @@ +## IntSet + +# Construction, collect +data_in = (1,5,100) +s = IntSet(data_in) +data_out = collect(s) +@test all(map(d->in(d,data_out), data_in)) +@test length(data_out) == length(data_in) + +# eltype, similar +@test is(eltype(IntSet()), Int) +@test isequal(similar(IntSet([1,2,3])), IntSet()) + +# show +@test sprint(show, IntSet()) == "IntSet([])" +@test sprint(show, IntSet([1,2,3])) == "IntSet([1, 2, 3])" +@test contains(sprint(show, complement(IntSet())), "...,") + + +s = IntSet([0,1,10,20,200,300,1000,10000,10002]) +@test last(s) == 10002 +@test first(s) == 0 +@test length(s) == 9 +@test pop!(s) == 10002 +@test length(s) == 8 +@test shift!(s) == 0 +@test length(s) == 7 +@test !in(0,s) +@test !in(10002,s) +@test in(10000,s) +@test_throws ArgumentError first(IntSet()) +@test_throws ArgumentError last(IntSet()) +t = copy(s) +sizehint!(t, 20000) #check that hash does not depend on size of internal Array{UInt32, 1} +@test hash(s) == hash(t) +@test hash(complement(s)) == hash(complement(t)) + +@test setdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3]) +@test symdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3, 5, 6]) + +s2 = IntSet([1, 2, 3, 4]) +setdiff!(s2, IntSet([2, 4, 5, 6])) + +@test s2 == IntSet([1, 3]) + +# issue #7851 +@test_throws ArgumentError IntSet(-1) +@test !(-1 in IntSet(0:10)) + +# # issue #8570 +# This requires 2^29 bytes of storage, which is too much for a simple test +# s = IntSet(2^32) +# @test length(s) == 1 +# for b in s; b; end + +# Copy, copy!, similar +s1 = IntSet([1,2,3]) +s2 = similar(s1) +copy!(s2, s1) +s3 = copy(s2) +@test s3 == s2 == s1 +@test collect(s3) == collect(s2) == [1,2,3] + +c1 = complement!(IntSet()) +pop!(c1, 1) +pop!(c1, 2) +pop!(c1, 3) +c2 = similar(c1) +copy!(c2, c1) +c3 = copy(c2) +c4 = complement(s1) +@test c1 == c2 == c3 == c4 +@test c4 === sizehint!(c4, 100) +@test c1 == c4 + +# Push, union +s1 = IntSet() +@test_throws ArgumentError push!(s1, -1) +push!(s1, 1, 10, 100, 1000) +@test collect(s1) == [1, 10, 100, 1000] +push!(s1, 606) +@test collect(s1) == [1, 10, 100, 606, 1000] +s2 = IntSet() +@test s2 === union!(s2, s1) +s3 = IntSet([1, 10, 100]) +union!(s3, [1, 606, 1000]) +s4 = union(IntSet([1, 100, 1000]), IntSet([10, 100, 606])) +@test s1 == s2 == s3 == s4 + +c1 = complement(s1) +@test !(1 in c1) +push!(c1, 1) +@test 1 in c1 +push!(c1, 10, 100, 10) +@test collect(complement(c1)) == [606, 1000] +c2 = complement(IntSet([606, 1000])) +@test c2 === union!(c2, c1) +c3 = union!(complement(IntSet([10, 606, 1000])), complement(IntSet([1, 606, 1000, 2000]))) +@test c3 == union(complement(IntSet([1, 606, 1000, 2000])), complement(IntSet([10, 606, 1000]))) +c4 = union!(complement(IntSet([1, 10, 606, 1000, 1001])), complement(IntSet([606, 1000]))) +@test c4 == union(complement(IntSet([606, 1000])), complement(IntSet([1, 10, 606, 1000, 1001]))) +c5 = union!(complement(IntSet([10, 606, 1000])), IntSet([0, 10, 20, 30])) +@test c5 == union(IntSet([0, 10, 20, 30]), complement(IntSet([10, 606, 1000]))) +c6 = union!(complement(IntSet([10, 606, 1000])), IntSet([10, 4000, 3])) +@test c6 == union(IntSet([10, 4000, 3]), complement(IntSet([10, 606, 1000]))) +@test c1 == c2 == c3 == c4 == c5 == c6 + +## pop!, delete! +s = IntSet(1:2:10) +@test pop!(s, 1) == 1 +@test !(1 in s) +@test_throws KeyError pop!(s, 1) +@test_throws ArgumentError pop!(s, -1) +@test_throws ArgumentError pop!(s, -1, 1) +@test_throws ArgumentError pop!(()->error(), s, -1) +@test pop!(s, 1, 0) == 0 +@test s === delete!(s, 1) +for i in s; pop!(s, i); end +@test isempty(s) +x = 0 +@test 1 == pop!(()->(global x; x+=1), s, 100) +@test x == 1 +push!(s, 100) +@test pop!(()->error(), s, 100) == 100 +push!(s, 1:2:10...) +@test pop!(s) == 9 +@test pop!(s) == 7 +@test shift!(s) == 1 +@test shift!(s) == 3 +@test collect(s) == [5] +empty!(s) +@test isempty(s) + +c = complement(IntSet()) +@test pop!(c, 1) == 1 +@test !(1 in c) +@test_throws KeyError pop!(c, 1) +@test_throws ArgumentError pop!(c, -1) +@test_throws ArgumentError pop!(c, -1, 1) +@test_throws ArgumentError pop!(()->error(), c, -1) +@test pop!(c, 1, 0) == 0 +@test c === delete!(c, 1) +@test shift!(c) == 0 +@test shift!(c) == 2 +@test_throws ArgumentError pop!(c) +@test collect(complement(c)) == [0,1,2] +@test empty!(c) == IntSet() + +## Intersect +@test isempty(intersect(IntSet())) +@test isempty(intersect(IntSet(1:10), IntSet())) +@test isempty(intersect(IntSet(), IntSet(1:10))) +@test isempty(intersect(IntSet(), complement(IntSet()))) +@test isempty(intersect(IntSet(), complement(IntSet(1:10)))) +@test isempty(intersect(complement(IntSet()), IntSet())) +@test isempty(intersect(complement(IntSet(1:10)), IntSet())) + +@test intersect(IntSet([1,2,3])) == IntSet([1,2,3]) +@test intersect(complement!(IntSet()), IntSet(1)) == + intersect(IntSet(1), complement!(IntSet())) == IntSet(1) + +@test intersect(IntSet(0:7), IntSet(3:10)) == + intersect(IntSet(3:10), IntSet(0:7)) == IntSet(3:7) +@test intersect(complement(IntSet([0:2; 11:16])), IntSet(0:7)) == + intersect(IntSet(0:7), complement(IntSet([0:2; 11:16]))) == IntSet(3:7) + +@test intersect(complement(IntSet(5:12)), complement(IntSet(7:10))) == + intersect(complement(IntSet(7:10)), complement(IntSet(5:12))) == complement(IntSet(5:12)) + +@test intersect(IntSet(0:10), IntSet(1:4), 0:5, [1,2,10]) == IntSet(1:2) + +## Setdiff +s1 = IntSet(1:100) +setdiff!(s1, IntSet(1:2:100)) +s2 = setdiff(IntSet(1:100), IntSet(1:2:100)) +@test s1 == s2 == IntSet(2:2:100) +@test collect(s1) == collect(2:2:100) + +s1 = IntSet(1:10) +s2 = complement(IntSet(3:5)) +@test setdiff(s1, s2) == setdiff(s1, [0:2; 6:100]) == IntSet(3:5) +@test isempty(setdiff(complement(IntSet()), complement(IntSet()))) +@test setdiff(complement(IntSet()), complement(IntSet(3:5))) == IntSet(3:5) +@test setdiff(complement(IntSet(1:5)), complement(IntSet(3:10))) == IntSet([6, 7, 8, 9, 10]) +@test setdiff(complement(IntSet(2:2:10)), IntSet(1:5)) == complement(IntSet([1:5; 6:2:10])) +@test setdiff!(complement(IntSet()), complement(IntSet())) == IntSet() +@test setdiff!(complement(IntSet(0:2:10)), complement(IntSet(0:10))) == IntSet(1:2:9) + +## Symdiff +@test symdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == + symdiff(IntSet([2, 4, 5, 6]), IntSet([1, 2, 3, 4])) == + symdiff(IntSet([1, 2, 3, 4]), [2, 4, 5, 6]) == + symdiff(IntSet([2, 4, 5, 6]), [1, 2, 3, 4]) == IntSet([1, 3, 5, 6]) +@test symdiff(complement(IntSet()), IntSet(2:3)) == + symdiff(IntSet(2:3), complement(IntSet())) == + symdiff(complement(IntSet()), 2:3) ==complement(IntSet(2:3)) + +@test symdiff(complement(IntSet(2:7)), IntSet(5:10)) == + symdiff(IntSet(5:10), complement(IntSet(2:7))) == complement(IntSet([2:4; 8:10])) +@test symdiff(complement(IntSet(3:7)), complement(IntSet(5:10))) == IntSet([3:4; 8:10]) + +## Subsets, equality +@test IntSet(2:2:10) < IntSet(1:10) +@test !(IntSet(2:2:10) < IntSet(2:2:10)) +@test IntSet(2:2:10) <= IntSet(2:10) +@test IntSet(2:2:10) <= IntSet(2:2:10) +@test IntSet(1) < complement!(IntSet()) +@test IntSet(1) <= complement!(IntSet()) +@test !(IntSet(1) < complement!(IntSet(1))) + +# Test logic against Set +p = IntSet([0,1,4,5]) +resize!(p.bits, 6) +q = IntSet([0,2,4,6]) +resize!(q.bits, 8) +p′ = complement(p) +q′ = complement(q) +function collect10(itr) + r = eltype(itr)[] + for i in itr + i > 10 && break + push!(r, i) + end + r +end +a = Set(p) +b = Set(q) +a′ = Set(collect10(p′)) +b′ = Set(collect10(q′)) +for f in (union, intersect, setdiff, symdiff) + @test collect(f(p, p)) == sort(collect(f(a, a))) + @test collect(f(q, q)) == sort(collect(f(b, b))) + @test collect(f(p, q)) == sort(collect(f(a, b))) + @test collect(f(q, p)) == sort(collect(f(b, a))) + + @test collect10(f(p′, p)) == sort(collect(f(a′, a))) + @test collect10(f(q′, q)) == sort(collect(f(b′, b))) + @test collect10(f(p′, q)) == sort(collect(f(a′, b))) + @test collect10(f(q′, p)) == sort(collect(f(b′, a))) + + @test collect10(f(p, p′)) == sort(collect(f(a, a′))) + @test collect10(f(q, q′)) == sort(collect(f(b, b′))) + @test collect10(f(p, q′)) == sort(collect(f(a, b′))) + @test collect10(f(q, p′)) == sort(collect(f(b, a′))) + + @test collect10(f(p′, p′)) == sort(collect(f(a′, a′))) + @test collect10(f(q′, q′)) == sort(collect(f(b′, b′))) + @test collect10(f(p′, q′)) == sort(collect(f(a′, b′))) + @test collect10(f(q′, p′)) == sort(collect(f(b′, a′))) +end + +## Other +s = IntSet() +push!(s, 0, 2, 100) +@test 0 in s +@test !(1 in s) +@test 2 in s +@test 100 in s +@test !(101 in s) +@test !(1000 in s) +@test first(s) == 0 +@test last(s) == 100 +@test s == IntSet([0, 2, 100]) +push!(s, 1000) +@test [i for i in s] == [0, 2, 100, 1000] +@test pop!(s) == 1000 +@test s == IntSet([0, 2, 100]) +@test hash(s) == hash(IntSet([0, 2, 100])) + +b = 0:1000 +s = IntSet(b) +@test collect(s) == collect(b) +@test length(s) == length(b) +@test pop!(s, 100) == 100 +@test collect(s) == [0:99; 101:1000] +@test_throws KeyError pop!(s, 100) +@test pop!(s, 100, 0) == 0 +@test pop!(s, 99, 0) == 99 +@test pop!(()->1, s, 99) == 1 +@test pop!(()->1, s, 98) == 98 + +show(IOBuffer(), IntSet()) +show(IOBuffer(), complement(IntSet())) diff --git a/test/sets.jl b/test/sets.jl index 0169b99b02460..e3bf4ebf77b88 100644 --- a/test/sets.jl +++ b/test/sets.jl @@ -195,62 +195,3 @@ filter!(isodd, s) @test first(Set(2)) == 2 # ########## end of set tests ########## - -## IntSet - -# Construction, collect -data_in = (1,5,100) -s = IntSet(data_in) -data_out = collect(s) -@test all(map(d->in(d,data_out), data_in)) -@test length(data_out) == length(data_in) - -# eltype, similar -@test is(eltype(IntSet()), Int64) -@test isequal(similar(IntSet([1,2,3])), IntSet()) - -# show -@test sprint(show, IntSet()) == "IntSet([])" -@test sprint(show, IntSet([1,2,3])) == "IntSet([1, 2, 3])" -@test contains(sprint(show, complement(IntSet())), "...,") - - -s = IntSet([0,1,10,20,200,300,1000,10000,10002]) -@test last(s) == 10002 -@test first(s) == 0 -@test length(s) == 9 -@test pop!(s) == 10002 -@test length(s) == 8 -@test shift!(s) == 0 -@test length(s) == 7 -@test !in(0,s) -@test !in(10002,s) -@test in(10000,s) -@test_throws ArgumentError first(IntSet()) -@test_throws ArgumentError last(IntSet()) -t = copy(s) -sizehint!(t, 20000) #check that hash does not depend on size of internal Array{UInt32, 1} -@test hash(s) == hash(t) -@test hash(complement(s)) == hash(complement(t)) - -@test setdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3]) -@test symdiff(IntSet([1, 2, 3, 4]), IntSet([2, 4, 5, 6])) == IntSet([1, 3, 5, 6]) - -s2 = IntSet([1, 2, 3, 4]) -setdiff!(s2, IntSet([2, 4, 5, 6])) - -@test s2 == IntSet([1, 3]) - -# == with last-bit set (groups.google.com/forum/#!topic/julia-users/vZNjiIEG_sY) -s = IntSet(255) -@test s == s - -# issue #7851 -@test_throws ArgumentError IntSet(-1) -@test !(-1 in IntSet(0:10)) - -# # issue #8570 -# This requires 2^29 bytes of storage, which is too much for a simple test -# s = IntSet(2^32) -# @test length(s) == 1 -# for b in s; b; end