From 72343f1dcd89b902b675a372a674b878403e7b26 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 9 Oct 2019 14:11:39 -0400 Subject: [PATCH 1/6] unzip: the inverse of zip --- base/Base.jl | 2 +- base/exports.jl | 1 + base/iterators.jl | 64 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 1fc20293aa384..4f7032a4d0868 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -199,7 +199,7 @@ include("iddict.jl") include("idset.jl") include("iterators.jl") -using .Iterators: zip, enumerate, only +using .Iterators: zip, unzip, enumerate, only using .Iterators: Flatten, Filter, product # for generators using .Iterators: Stateful # compat (was formerly used in reinterpretarray.jl) diff --git a/base/exports.jl b/base/exports.jl index 0959fa1c391e2..cda1937253ef4 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -666,6 +666,7 @@ export iterate, enumerate, # re-exported from Iterators zip, + unzip, only, # object identity and equality diff --git a/base/iterators.jl b/base/iterators.jl index 11e94d3384de8..b1601b011db1c 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -35,7 +35,7 @@ import .Base: getindex, setindex!, get, iterate, popfirst!, isdone, peek, intersect -export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap +export enumerate, zip, unzip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap if Base !== Core.Compiler export partition @@ -471,6 +471,68 @@ zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail.. reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is))) +# unzip + +""" + unzip(itrs) -> Vector{<:Vector} + +The `unzip` function takes an iterator of iterators and returns a vector of +vectors such that the first vector contains the first element yielded by each +iterator, the second vector the second element yielded by each iterator, etc. +`unzip` is sort of an inverse to the `zip` operation, as the name suggests. +In particular, if we define + + ≐(a, b) = collect(collect.(a)) == collect(collect.(b)) + +then the following identities relating `zip` and `unzip` hold for any `itrs` +that is is an iterator of iterators: + + unzip(zip(itrs...)) ≐ itrs + zip(unzip(itrs)...) ≐ itrs + +Note that `unzip` does not return an iterator: it always consumes all of +its argument and all of each iterator yielded by its argument. It is only +associated with iteration because it is the inverse of `zip`. + +# Examples + +```jldoctest +julia> unzip(enumerate("Hello")) +2-element Array{Array{T,1} where T,1}: + [1, 2, 3] + ['a', 'b', 'c'] + +julia> unzip([[1, 'a'], [2.5, 'z'], [0, 'x']]) +2-element Array{Array{T,1} where T,1}: + Real[1, 2.5, 0] + ['a', 'z', 'x'] +``` +""" +function unzip(itrs) + n = Base.haslength(itrs) ? length(itrs) : -1 + vecs = Vector[] + for itr in itrs + for (j, x) in enumerate(itr) + if length(vecs) < j + v = [x] + push!(vecs, v) + n ≥ 0 && sizehint!(v, n) + else + v = vecs[j] + if !(x isa eltype(v)) + T = Base.promote_typejoin(typeof(x), eltype(v)) + v = vecs[j] = copyto!(similar(v, T), v) + n ≥ 0 && sizehint!(v, n) + end + push!(v, x) + end + end + length(first(vecs)) == length(last(vecs)) || + throw(ArgumentError("unzip called with uneven iterators")) + end + return vecs +end + # filter struct Filter{F,I} From e06bf6598a7ec2f63cd2ca3d05bc0f3c50920958 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 9 Oct 2019 16:18:26 -0400 Subject: [PATCH 2/6] unzip: return tuple of vectors --- base/iterators.jl | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/base/iterators.jl b/base/iterators.jl index b1601b011db1c..fa12367170183 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -474,9 +474,9 @@ last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is))) # unzip """ - unzip(itrs) -> Vector{<:Vector} + unzip(itrs) -> NTuple{length(first(itrs)), Vector} -The `unzip` function takes an iterator of iterators and returns a vector of +The `unzip` function takes an iterator of iterators and returns a tuple of vectors such that the first vector contains the first element yielded by each iterator, the second vector the second element yielded by each iterator, etc. `unzip` is sort of an inverse to the `zip` operation, as the name suggests. @@ -498,14 +498,10 @@ associated with iteration because it is the inverse of `zip`. ```jldoctest julia> unzip(enumerate("Hello")) -2-element Array{Array{T,1} where T,1}: - [1, 2, 3] - ['a', 'b', 'c'] - -julia> unzip([[1, 'a'], [2.5, 'z'], [0, 'x']]) -2-element Array{Array{T,1} where T,1}: - Real[1, 2.5, 0] - ['a', 'z', 'x'] +([1, 2, 3, 4, 5], ['H', 'e', 'l', 'l', 'o']) + +julia> unzip([[1, "apple"], [2.5, "orange"], [0, "mango"]]) +(Real[1, 2.5, 0], ["apple", "orange", "mango"]) ``` """ function unzip(itrs) @@ -530,7 +526,7 @@ function unzip(itrs) length(first(vecs)) == length(last(vecs)) || throw(ArgumentError("unzip called with uneven iterators")) end - return vecs + return Tuple(vecs) end # filter From 1c102937bacc90c5474b273ff2dc6e457e171b6d Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Tue, 15 Oct 2019 13:18:41 -0400 Subject: [PATCH 3/6] unzip: now with widening recursion trick --- base/iterators.jl | 54 +++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/base/iterators.jl b/base/iterators.jl index fa12367170183..10a12acc355e2 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -505,28 +505,46 @@ julia> unzip([[1, "apple"], [2.5, "orange"], [0, "mango"]]) ``` """ function unzip(itrs) - n = Base.haslength(itrs) ? length(itrs) : -1 - vecs = Vector[] - for itr in itrs - for (j, x) in enumerate(itr) - if length(vecs) < j - v = [x] - push!(vecs, v) - n ≥ 0 && sizehint!(v, n) - else - v = vecs[j] - if !(x isa eltype(v)) - T = Base.promote_typejoin(typeof(x), eltype(v)) - v = vecs[j] = copyto!(similar(v, T), v) - n ≥ 0 && sizehint!(v, n) + n = Base.haslength(itrs) ? length(itrs) : nothing + outer = iterate(itrs) + outer === nothing && return () + vals, state = outer + vecs = ntuple(length(vals)) do i + x = vals[i] + v = Vector{typeof(x)}(undef, something(n, 1)) + @inbounds v[1] = x + return v + end + unzip_rest(vecs, typeof(vals), n isa Int ? 1 : nothing, itrs, state) +end + +function unzip_rest(vecs, eltypes, i, itrs, state) + while true + i isa Int && (i += 1) + outer = iterate(itrs, state) + outer === nothing && return vecs + itr, state = outer + vals = Tuple(itr) + if vals isa eltypes + for (v, x) in zip(vecs, vals) + if i isa Int + @inbounds v[i] = x + else + push!(v, x) end - push!(v, x) end + else + vecs′ = map(vecs, vals) do v, x + T = Base.promote_typejoin(eltype(v), typeof(x)) + v′ = Vector{T}(undef, length(v) + !(i isa Int)) + copyto!(v′, v) + @inbounds v′[something(i, end)] = x + return v′ + end + eltypes′ = Tuple{map(eltype, vecs′)...} + return unzip_rest(Tuple(vecs′), eltypes′, i, itrs, state) end - length(first(vecs)) == length(last(vecs)) || - throw(ArgumentError("unzip called with uneven iterators")) end - return Tuple(vecs) end # filter From b35c5eb5a22b044fbccd09985cf49440e8bf8f07 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 10 Jul 2023 10:08:29 -0400 Subject: [PATCH 4/6] added tests, NEWS, and a bugfix --- NEWS.md | 1 + base/iterators.jl | 2 +- test/iterators.jl | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 52180a852b0e7..d60008e2b3831 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,7 @@ Build system changes New library functions --------------------- * `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]). +* `unzip(itr)` is now provided, essentially as an inverse of `zip` ([#33515]). New library features -------------------- diff --git a/base/iterators.jl b/base/iterators.jl index 10a12acc355e2..8567fdce2514f 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -13,7 +13,7 @@ using .Base: SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo, @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom, - (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, + (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, something, copyto!, any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex, tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape using Core: @doc diff --git a/test/iterators.jl b/test/iterators.jl index 59588bdac9684..b361c7e8391e1 100644 --- a/test/iterators.jl +++ b/test/iterators.jl @@ -1001,3 +1001,14 @@ end end @test v == () end + +@testset "unzip" begin + for itrs in ((1:3,), (1:3, 4:6), (1:3,4:6,7:9), + ((), ()), (Bool[], Int8[]), + (Iterators.filter(isodd,1:6), 4:6)) + @test unzip(zip(itrs...)) == collect.(itrs) + end + @test unzip([(), (), ()]) == () + @test unzip([(1,2), (4,5,6)]) == ([1, 4], [2, 5]) + @test unzip([(4,5,6), (1,2)]) == ([4, 1], [5, 2]) +end From a3405c1e89f43a968327568cb201ea6f5af0e2e3 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 10 Jul 2023 10:09:54 -0400 Subject: [PATCH 5/6] added compat --- base/iterators.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/iterators.jl b/base/iterators.jl index 8567fdce2514f..73777953c72de 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -503,6 +503,9 @@ julia> unzip(enumerate("Hello")) julia> unzip([[1, "apple"], [2.5, "orange"], [0, "mango"]]) (Real[1, 2.5, 0], ["apple", "orange", "mango"]) ``` + +!!! compat "Julia 1.11" + The `unzip` function requires Julia 1.11 or later. """ function unzip(itrs) n = Base.haslength(itrs) ? length(itrs) : nothing From 9cc8b3d35a200be9ecd6a2dce388d92e16bb1d07 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 10 Jul 2023 10:43:39 -0400 Subject: [PATCH 6/6] fix bootstrap failure --- base/iterators.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/iterators.jl b/base/iterators.jl index 73777953c72de..3b23313cbada0 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -13,7 +13,7 @@ using .Base: SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo, @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom, - (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, something, copyto!, + (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, copyto!, any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex, tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape using Core: @doc @@ -514,7 +514,7 @@ function unzip(itrs) vals, state = outer vecs = ntuple(length(vals)) do i x = vals[i] - v = Vector{typeof(x)}(undef, something(n, 1)) + v = Vector{typeof(x)}(undef, Base.something(n, 1)) @inbounds v[1] = x return v end @@ -541,7 +541,7 @@ function unzip_rest(vecs, eltypes, i, itrs, state) T = Base.promote_typejoin(eltype(v), typeof(x)) v′ = Vector{T}(undef, length(v) + !(i isa Int)) copyto!(v′, v) - @inbounds v′[something(i, end)] = x + @inbounds v′[Base.something(i, end)] = x return v′ end eltypes′ = Tuple{map(eltype, vecs′)...}