Skip to content

Commit

Permalink
Make unique(f, itr) and unique!(f, itr) faster (#30286)
Browse files Browse the repository at this point in the history
* Make `unique(f, itr)` and `unique!(f, itr)` faster

Avoid creation of a `Set{Any}`.

* Fix unique! for resizable OffsetVector
  • Loading branch information
andyferris authored Dec 9, 2018
1 parent d7c3926 commit c2fb1dc
Showing 1 changed file with 58 additions and 17 deletions.
75 changes: 58 additions & 17 deletions base/set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,39 @@ julia> unique(x -> x^2, [1, -1, 3, -3, 4])
"""
function unique(f, C)
out = Vector{eltype(C)}()
seen = Set()
for x in C

s = iterate(C)
if s === nothing
return out
end
(x, i) = s
y = f(x)
seen = Set{typeof(y)}()
push!(seen, y)
push!(out, x)

return _unique!(f, out, C, seen, i)
end

function _unique!(f, out::AbstractVector, C, seen::Set, i)
s = iterate(C, i)
while s !== nothing
(x, i) = s
y = f(x)
if !in(y, seen)
push!(seen, y)
if y seen
push!(out, x)
if y isa eltype(seen)
push!(seen, y)
else
seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen)
push!(seen2, y)
return _unique!(f, out, C, seen2, i)
end
end
s = iterate(C, i)
end
out

return out
end

"""
Expand Down Expand Up @@ -208,22 +232,39 @@ julia> unique!(iseven, [2, 3, 5, 7, 9])
```
"""
function unique!(f, A::AbstractVector)
seen = Set()
idxs = eachindex(A)
y = iterate(idxs)
count = 0
for x in A
t = f(x)
if t seen
push!(seen,t)
count += 1
A[y[1]] = x
y = iterate(idxs, y[2])
if length(A) <= 1
return A
end

i = firstindex(A)
x = @inbounds A[i]
y = f(x)
seen = Set{typeof(y)}()
push!(seen, y)
return _unique!(f, A, seen, i, i+1)
end

function _unique!(f, A::AbstractVector, seen::Set, current::Integer, i::Integer)
while i <= lastindex(A)
x = @inbounds A[i]
y = f(x)
if y seen
current += 1
@inbounds A[current] = x
if y isa eltype(seen)
push!(seen, y)
else
seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen)
push!(seen2, y)
return _unique!(f, A, seen2, current, i+1)
end
end
i += 1
end
resize!(A, count)
return resize!(A, current - firstindex(A) + 1)
end


# If A is not grouped, then we will need to keep track of all of the elements that we have
# seen so far.
_unique!(A::AbstractVector) = unique!(identity, A::AbstractVector)
Expand Down

0 comments on commit c2fb1dc

Please sign in to comment.