Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unzip: the inverse of zip #33515

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Build system changes
New library functions
---------------------
* `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
* `unzip(itr)` is now provided, essentially as an inverse of `zip` ([#33515]).

New library features
--------------------
Expand Down
2 changes: 1 addition & 1 deletion base/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ include("iddict.jl")
include("idset.jl")

include("iterators.jl")
using .Iterators: zip, enumerate, only
using .Iterators: zip, unzip, enumerate, only
using .Iterators: Flatten, Filter, product # for generators
using .Iterators: Stateful # compat (was formerly used in reinterpretarray.jl)

Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ export
iterate,
enumerate, # re-exported from Iterators
zip,
unzip,
only,

# object identity and equality
Expand Down
83 changes: 81 additions & 2 deletions base/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ using .Base:
SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
@propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator,
AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom,
(:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing,
(:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, copyto!,
any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex,
tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape
using Core: @doc
Expand All @@ -35,7 +35,7 @@ import .Base:
getindex, setindex!, get, iterate,
popfirst!, isdone, peek, intersect

export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap
export enumerate, zip, unzip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap

if Base !== Core.Compiler
export partition
Expand Down Expand Up @@ -471,6 +471,85 @@ zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail..
reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length
last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is)))

# unzip

"""
unzip(itrs) -> NTuple{length(first(itrs)), Vector}

The `unzip` function takes an iterator of iterators and returns a tuple of
vectors such that the first vector contains the first element yielded by each
iterator, the second vector the second element yielded by each iterator, etc.
`unzip` is sort of an inverse to the `zip` operation, as the name suggests.
In particular, if we define

≐(a, b) = collect(collect.(a)) == collect(collect.(b))

then the following identities relating `zip` and `unzip` hold for any `itrs`
that is is an iterator of iterators:

unzip(zip(itrs...)) ≐ itrs
zip(unzip(itrs)...) ≐ itrs

Note that `unzip` does not return an iterator: it always consumes all of
its argument and all of each iterator yielded by its argument. It is only
associated with iteration because it is the inverse of `zip`.

# Examples

```jldoctest
julia> unzip(enumerate("Hello"))
([1, 2, 3, 4, 5], ['H', 'e', 'l', 'l', 'o'])

julia> unzip([[1, "apple"], [2.5, "orange"], [0, "mango"]])
(Real[1, 2.5, 0], ["apple", "orange", "mango"])
```

!!! compat "Julia 1.11"
The `unzip` function requires Julia 1.11 or later.
"""
function unzip(itrs)
n = Base.haslength(itrs) ? length(itrs) : nothing
outer = iterate(itrs)
outer === nothing && return ()
vals, state = outer
vecs = ntuple(length(vals)) do i
x = vals[i]
v = Vector{typeof(x)}(undef, Base.something(n, 1))
@inbounds v[1] = x
return v
end
unzip_rest(vecs, typeof(vals), n isa Int ? 1 : nothing, itrs, state)
end

function unzip_rest(vecs, eltypes, i, itrs, state)
while true
i isa Int && (i += 1)
outer = iterate(itrs, state)
outer === nothing && return vecs
itr, state = outer
vals = Tuple(itr)
if vals isa eltypes
for (v, x) in zip(vecs, vals)
if i isa Int
@inbounds v[i] = x
else
push!(v, x)
end
end
else
vecs′ = map(vecs, vals) do v, x
T = Base.promote_typejoin(eltype(v), typeof(x))
v′ = Vector{T}(undef, length(v) + !(i isa Int))
copyto!(v′, v)
@inbounds v′[Base.something(i, end)] = x
return v′
end
eltypes′ = Tuple{map(eltype, vecs′)...}
return unzip_rest(Tuple(vecs′), eltypes′, i, itrs, state)
end
end
end

# filter

struct Filter{F,I}
Expand Down
11 changes: 11 additions & 0 deletions test/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1001,3 +1001,14 @@ end
end
@test v == ()
end

@testset "unzip" begin
for itrs in ((1:3,), (1:3, 4:6), (1:3,4:6,7:9),
((), ()), (Bool[], Int8[]),
(Iterators.filter(isodd,1:6), 4:6))
@test unzip(zip(itrs...)) == collect.(itrs)
end
@test unzip([(), (), ()]) == ()
@test unzip([(1,2), (4,5,6)]) == ([1, 4], [2, 5])
@test unzip([(4,5,6), (1,2)]) == ([4, 1], [5, 2])
end