diff --git a/NEWS.md b/NEWS.md index 52180a852b0e7..d60008e2b3831 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,7 @@ Build system changes New library functions --------------------- * `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]). +* `unzip(itr)` is now provided, essentially as an inverse of `zip` ([#33515]). New library features -------------------- diff --git a/base/Base.jl b/base/Base.jl index 1fc20293aa384..4f7032a4d0868 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -199,7 +199,7 @@ include("iddict.jl") include("idset.jl") include("iterators.jl") -using .Iterators: zip, enumerate, only +using .Iterators: zip, unzip, enumerate, only using .Iterators: Flatten, Filter, product # for generators using .Iterators: Stateful # compat (was formerly used in reinterpretarray.jl) diff --git a/base/exports.jl b/base/exports.jl index 0959fa1c391e2..cda1937253ef4 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -666,6 +666,7 @@ export iterate, enumerate, # re-exported from Iterators zip, + unzip, only, # object identity and equality diff --git a/base/iterators.jl b/base/iterators.jl index 11e94d3384de8..3b23313cbada0 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -13,7 +13,7 @@ using .Base: SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo, @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom, - (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, + (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, copyto!, any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex, tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape using Core: @doc @@ -35,7 +35,7 @@ import .Base: getindex, setindex!, get, iterate, popfirst!, isdone, peek, intersect -export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap +export enumerate, zip, unzip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap if Base !== Core.Compiler export partition @@ -471,6 +471,85 @@ zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail.. reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is))) +# unzip + +""" + unzip(itrs) -> NTuple{length(first(itrs)), Vector} + +The `unzip` function takes an iterator of iterators and returns a tuple of +vectors such that the first vector contains the first element yielded by each +iterator, the second vector the second element yielded by each iterator, etc. +`unzip` is sort of an inverse to the `zip` operation, as the name suggests. +In particular, if we define + + ≐(a, b) = collect(collect.(a)) == collect(collect.(b)) + +then the following identities relating `zip` and `unzip` hold for any `itrs` +that is is an iterator of iterators: + + unzip(zip(itrs...)) ≐ itrs + zip(unzip(itrs)...) ≐ itrs + +Note that `unzip` does not return an iterator: it always consumes all of +its argument and all of each iterator yielded by its argument. It is only +associated with iteration because it is the inverse of `zip`. + +# Examples + +```jldoctest +julia> unzip(enumerate("Hello")) +([1, 2, 3, 4, 5], ['H', 'e', 'l', 'l', 'o']) + +julia> unzip([[1, "apple"], [2.5, "orange"], [0, "mango"]]) +(Real[1, 2.5, 0], ["apple", "orange", "mango"]) +``` + +!!! compat "Julia 1.11" + The `unzip` function requires Julia 1.11 or later. +""" +function unzip(itrs) + n = Base.haslength(itrs) ? length(itrs) : nothing + outer = iterate(itrs) + outer === nothing && return () + vals, state = outer + vecs = ntuple(length(vals)) do i + x = vals[i] + v = Vector{typeof(x)}(undef, Base.something(n, 1)) + @inbounds v[1] = x + return v + end + unzip_rest(vecs, typeof(vals), n isa Int ? 1 : nothing, itrs, state) +end + +function unzip_rest(vecs, eltypes, i, itrs, state) + while true + i isa Int && (i += 1) + outer = iterate(itrs, state) + outer === nothing && return vecs + itr, state = outer + vals = Tuple(itr) + if vals isa eltypes + for (v, x) in zip(vecs, vals) + if i isa Int + @inbounds v[i] = x + else + push!(v, x) + end + end + else + vecs′ = map(vecs, vals) do v, x + T = Base.promote_typejoin(eltype(v), typeof(x)) + v′ = Vector{T}(undef, length(v) + !(i isa Int)) + copyto!(v′, v) + @inbounds v′[Base.something(i, end)] = x + return v′ + end + eltypes′ = Tuple{map(eltype, vecs′)...} + return unzip_rest(Tuple(vecs′), eltypes′, i, itrs, state) + end + end +end + # filter struct Filter{F,I} diff --git a/test/iterators.jl b/test/iterators.jl index 59588bdac9684..b361c7e8391e1 100644 --- a/test/iterators.jl +++ b/test/iterators.jl @@ -1001,3 +1001,14 @@ end end @test v == () end + +@testset "unzip" begin + for itrs in ((1:3,), (1:3, 4:6), (1:3,4:6,7:9), + ((), ()), (Bool[], Int8[]), + (Iterators.filter(isodd,1:6), 4:6)) + @test unzip(zip(itrs...)) == collect.(itrs) + end + @test unzip([(), (), ()]) == () + @test unzip([(1,2), (4,5,6)]) == ([1, 4], [2, 5]) + @test unzip([(4,5,6), (1,2)]) == ([4, 1], [5, 2]) +end