From f2356178544a3d6a2f4e62f200db9f1513573399 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 1 Apr 2020 12:30:48 -0400 Subject: [PATCH 1/8] Squash everything --- src/Missings.jl | 257 +++++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 50 +++++++++ 2 files changed, 307 insertions(+) diff --git a/src/Missings.jl b/src/Missings.jl index b7c4c50..fd8af5c 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -207,4 +207,261 @@ missing """ passmissing(f) = PassMissing{Core.Typeof(f)}(f) +""" + skipmissings(args...) + +Return a tuple of iterators wrapping each of the iterators in `args`, but +skipping elements at positions where at least one of the iterators returns `missing` +(listwise deletion of missing values). + +# Examples +``` +julia> x = [1, 2, missing, 4]; y = [1, 2, 3, missing]; + +julia> tx, ty = skipmissings(x, y) +(Missings.SkipMissings{Array{Union{Missing, Int64},1},Tuple{Array{Union{Missing, Int64},1}}} +(Union{Missing, Int64}[1, 2, missing, 4], (Union{Missing, Int64}[1, 2, 3, missing],)), Missi +ngs.SkipMissings{Array{Union{Missing, Int64},1},Tuple{Array{Union{Missing, Int64},1}}}(Union +{Missing, Int64}[1, 2, 3, missing], (Union{Missing, Int64}[1, 2, missing, 4],))) + +julia> collect(tx) +2-element Array{Int64,1}: + 1 + 2 + +``` +""" +function skipmissings(args...) + if isempty(args) + throw(ArgumentError("Must input one or more arguments")) + end + + if args isa Tuple{Vararg{AbstractArray}} + if !all(x -> length(x) == length(args[1]), args) + throw(ArgumentError("All arguments must have the same length")) + end + + if !all(x -> eachindex(x) == eachindex(args[1]), args) + throw(ArgumentError("All arguments must have the same indices")) + end + end + + ntuple(length(args)) do i + s = setdiff(1:length(args), i) + SkipMissings(args[i], args[s]) + end +end + +struct SkipMissings{V, T} + x::V + others::T +end + +Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{AbstractArray}}, i) + for oth in others + oth[i] === missing && return true + end + + return false +end + +@inline function _anymissingiterate(others::Tuple, state) + for oth in others + y = iterate(oth, state) + y !== nothing && first(y) === missing && return true + end + + return false +end + +const SkipMissingsofArrays = SkipMissings{V, T} where + {V <: AbstractArray, T <: Tuple{Vararg{AbstractArray}}} + +function Base.show(io::IO, mime::MIME"text/plain", itr::SkipMissings{V}) where V + print(io, SkipMissings, '{', V, '}', '(', itr.x, ')', " comprised of " * + "$(length(itr.others) + 1) iterators") +end + +Base.IteratorSize(::Type{<:SkipMissings}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{<:SkipMissings{V}}) where {V} = Base.IteratorEltype(V) +Base.eltype(::Type{<:SkipMissings{V}}) where {V} = nonmissingtype(eltype(V)) +Base.IndexStyle(itr::SkipMissings) = Base.IndexStyle(itr.x) + +function Base.iterate(itr::SkipMissings, state=1) + x_itr = iterate(itr.x, state) + x_itr === nothing && return nothing + x_item, x_state = x_itr + while true + x_item === missing || _anymissingiterate(itr.others, state) || break + x_itr = iterate(itr.x, x_state) + x_itr === nothing && return nothing + state = x_state + x_item, x_state = x_itr + end + return x_item, x_state +end + +function Base.iterate(itr::SkipMissingsofArrays, state=0) + eix = eachindex(itr.x) + ind_itr = iterate(eix, state) + ind_itr === nothing && return nothing + ind_item, ind_state = ind_itr + @inbounds x_item = itr.x[ind_item] + @inbounds while true + x_item === missing || _anymissingindex(itr.others, ind_item) || break + ind_itr = iterate(eix, ind_state) + ind_itr === nothing && return nothing + ind_item, ind_state = ind_itr + x_item = itr.x[ind_item] + end + return x_item, ind_state +end + +Base.IndexStyle(::Type{<:SkipMissings{V}}) where {V} = Base.IndexStyle(V) + +function Base.eachindex(itr::SkipMissingsofArrays) + @inbounds Iterators.filter(eachindex(itr.x)) do i + itr.x[i] !== missing && !_anymissingindex(itr.others, i) + end +end + +function Base.keys(itr::SkipMissingsofArrays) + @inbounds Iterators.filter(keys(itr.x)) do i + itr.x[i] !== missing && !_anymissingindex(itr.others, i) + end +end + +@inline function Base.getindex(itr::SkipMissingsofArrays, i) + @boundscheck checkbounds(itr.x, i) + @inbounds xi = itr.x[i] + if xi === missing || @inbounds _anymissingindex(itr.others, i) + throw(MissingException("the value at index $i is missing for some element")) + end + return xi +end + +Base.mapreduce(f, op, itr::SkipMissingsofArrays) = + Base._mapreduce(f, op, Base.IndexStyle(itr), itr) + +function Base._mapreduce(f, op, ::IndexLinear, itr::SkipMissingsofArrays) + A = itr.x + local ai + inds = LinearIndices(A) + i = first(inds) + ilast = last(inds) + @inbounds while i <= ilast + ai = A[i] + ai === missing || _anymissingindex(itr.others, i) || break + i += 1 + end + i > ilast && return Base.mapreduce_empty(f, op, Base.eltype(itr)) + a1 = ai + i += 1 + @inbounds while i <= ilast + ai = A[i] + ai === missing || _anymissingindex(itr.others, i) || break + i += 1 + end + i > ilast && return Base.mapreduce_first(f, op, a1) + # We know A contains at least two non-missing entries: the result cannot be nothing + something(Base.mapreduce_impl(f, op, itr, first(inds), last(inds))) +end + +Base._mapreduce(f, op, ::IndexCartesian, itr::SkipMissingsofArrays) = mapfoldl(f, op, itr) + + +Base.mapreduce_impl(f, op, A::SkipMissingsofArrays, ifirst::Integer, ilast::Integer) = + Base.mapreduce_impl(f, op, A, ifirst, ilast, Base.pairwise_blocksize(f, op)) + +# Returns nothing when the input contains only missing values, and Some(x) otherwise +@noinline function Base.mapreduce_impl(f, op, itr::SkipMissingsofArrays, + ifirst::Integer, ilast::Integer, blksize::Int) + A = itr.x + if ifirst == ilast + @inbounds a1 = A[ifirst] + if a1 === missing + return nothing + elseif _anymissingindex(itr.others, ifirst) + return nothing + else + return Some(Base.mapreduce_first(f, op, a1)) + end + elseif ifirst + blksize > ilast + # sequential portion + local ai + i = ifirst + @inbounds while i <= ilast + ai = A[i] + ai === missing || _anymissingindex(itr.others, i) || break + i += 1 + end + i > ilast && return nothing + a1 = ai::eltype(itr) + i += 1 + @inbounds while i <= ilast + ai = A[i] + ai === missing || _anymissingindex(itr.others, i) || break + i += 1 + end + i > ilast && return Some(Base.mapreduce_first(f, op, a1)) + a2 = ai::eltype(itr) + i += 1 + v = op(f(a1), f(a2)) + @simd for i = i:ilast + @inbounds ai = A[i] + ai === missing || @inbounds _anymissingindex(itr.others, i) || (v = op(v, f(ai))) + end + return Some(v) + else + # pairwise portion + imid = (ifirst + ilast) >> 1 + v1 = Base.mapreduce_impl(f, op, itr, ifirst, imid, blksize) + v2 = Base.mapreduce_impl(f, op, itr, imid+1, ilast, blksize) + if v1 === nothing && v2 === nothing + return nothing + elseif v1 === nothing + return v2 + elseif v2 === nothing + return v1 + else + return Some(op(something(v1), something(v2))) + end + end +end + +""" + filter(f, itr::SkipMissings) + +Return a vector similar to the array wrapped by the given `SkipMissings` iterator +but skipping all elements with a `missing` value in one of the iterators passed +to `skipmissing` and elements for which `f` returns `false`. This method +only applies when all iterators passed to `skipmissings` are arrays. + +# Examples +``` +julia> x = [missing; 2:9]; y = [1:9; missing]; + +julia> mx, my = skipmissings(x, y); + +julia> filter(isodd, mx) +4-element Array{Int64,1}: + 3 + 5 + 7 + 9 + +``` +""" +function filter(f, itr::SkipMissingsofArrays) + x = itr.x + y = similar(x, eltype(itr), 0) + for i in eachindex(x) + @inbounds xi = x[i] + if xi !== missing && @inbounds !_anymissingindex(itr.others, i) && f(xi) + push!(y, xi) + end + end + y +end + end # module diff --git a/test/runtests.jl b/test/runtests.jl index 1aedba7..b3f8e8f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -75,6 +75,56 @@ struct CubeRooter end @test collect(x) == [1, 2, 4] @test collect(x) isa Vector{Int} + x = [1, 2, missing, 4] + y = ["a", "b", "c", missing] + z = [missing, missing, 3.1, 4.5] + l = [1, 2, 3, 4, 5] + @test_throws ArgumentError skipmissings(x, l) + mx, my = skipmissings(x, y) + iobuf = IOBuffer() + show(iobuf, MIME("text/plain"), mx) + s = String(take!(iobuf)) + @test s == "Missings.SkipMissings{Array{Union{Missing, Int64},1}}(Union{Missing, Int64"* + "}[1, 2, missing, 4]) comprised of 2 iterators" + @test collect(mx) == [1, 2] + @test collect(mx) isa Vector{Int} + @test reduce(+, mx) === reduce(+, collect(mx)) === sum(mx) === + mapreduce(identity, +, mx) === 3 + @test mapreduce(x -> x^2, +, mx) === mapreduce(x -> x^2, +, collect(mx)) === 5 + mx, my, mz = skipmissings(x, y, z) + @test eltype(mx) == Int + @test eltype(my) == String + @test eltype(mz) == Float64 + @test isempty(collect(mx)) + @test sum(mx) === 0 + x = [missing 4; 2 5; 3 6] + y = [1 4; missing 5; 3 6] + mx, my = skipmissings(x, y) + @test collect(mx) == [3, 4, 5, 6] + @test mx[3] == 3 + @test_throws MissingException mx[1] + @test reduce(+, mx) === 18 + @test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx)) + if VERSION >= v"1.4.0-DEV" + @inferred Union{Float64, Missing} mapreduce(cos, *, mx) + end + + x = [missing missing missing] + y = [1, 2, 3] + mx, my = skipmissings(x, y) + @test_throws ArgumentError reduce(x -> x/2, mx) + @test_throws ArgumentError mapreduce(x -> x/2, +, mx) + @test_throws MethodError length(mx) + @test IndexStyle(typeof(mx)) == IndexStyle(typeof(x)) + x = [isodd(i) ? missing : i for i in 1:64] + y = [isodd(i) ? missing : i for i in 65:128] + mx, my = skipmissings(x, y) + @test sum(mx) === 1056 + @inferred Union{Missing, Int} sum(mx) + if VERSION >= v"1.4.0-DEV" + @inferred Union{Missing, Int} sum(mx) + end + @test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1] @test levels(2:-1:1) == levels([2, 1]) == levels([2, missing, 1]) == [1, 2] @test levels([missing, "a", "c", missing, "b"]) == ["a", "b", "c"] From f115e46c640b7f31c3b6bc5e7eee6714db8b5f65 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 1 Apr 2020 12:40:34 -0400 Subject: [PATCH 2/8] small post-rebase fixes --- README.md | 1 + src/Missings.jl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b9e0cdc..82c2aa3 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ This package provides additional functionality for working with `missing` values - `allowmissing` and `disallowmissing` to convert between `Vector{T}` and `Vector{Union{T, Missing}}` - `passmissing` to wrap a function so that it returns `missing` if any of its positional arguments is `missing` - `levels` to get the unique values in a vector excluding `missing` and in their preferred order +- `skipmissings` to loop through a collection of iterators excluding indi ces where any iterators are `missing` ## Contributing and Questions diff --git a/src/Missings.jl b/src/Missings.jl index fd8af5c..d903890 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -1,7 +1,8 @@ module Missings export allowmissing, disallowmissing, ismissing, missing, missings, - Missing, MissingException, levels, coalesce, passmissing, nonmissingtype + Missing, MissingException, levels, coalesce, passmissing, nonmissingtype, + skipmissings using Base: ismissing, missing, Missing, MissingException using Base: @deprecate From 8c874e801ae6a6effc5c958cebecb0f60988e476 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 1 Apr 2020 21:58:05 +0200 Subject: [PATCH 3/8] Remove duplicate test --- test/runtests.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index b3f8e8f..9740146 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -120,7 +120,6 @@ struct CubeRooter end y = [isodd(i) ? missing : i for i in 65:128] mx, my = skipmissings(x, y) @test sum(mx) === 1056 - @inferred Union{Missing, Int} sum(mx) if VERSION >= v"1.4.0-DEV" @inferred Union{Missing, Int} sum(mx) end From beab796dc3f249679e315ca3997ff22600081f0e Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 1 Apr 2020 16:48:31 -0400 Subject: [PATCH 4/8] eval in tests --- test/runtests.jl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 9740146..a9c974d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -106,7 +106,10 @@ struct CubeRooter end @test reduce(+, mx) === 18 @test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx)) if VERSION >= v"1.4.0-DEV" - @inferred Union{Float64, Missing} mapreduce(cos, *, mx) + t = quote + @inferred Union{Float64, Missing} mapreduce(cos, *, mx) + end + eval(t) end x = [missing missing missing] @@ -121,7 +124,10 @@ struct CubeRooter end mx, my = skipmissings(x, y) @test sum(mx) === 1056 if VERSION >= v"1.4.0-DEV" - @inferred Union{Missing, Int} sum(mx) + t = quote + @inferred Union{Missing, Int} sum(mx) + end + eval(t) end @test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1] From 6edb38a8e7f0a2614935a19799b5f351559debeb Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 1 Apr 2020 17:02:41 -0400 Subject: [PATCH 5/8] interpolate so tests work --- test/runtests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index a9c974d..51a8efb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -107,7 +107,7 @@ struct CubeRooter end @test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx)) if VERSION >= v"1.4.0-DEV" t = quote - @inferred Union{Float64, Missing} mapreduce(cos, *, mx) + @inferred Union{Float64, Missing} mapreduce(cos, *, $mx) end eval(t) end @@ -125,7 +125,7 @@ struct CubeRooter end @test sum(mx) === 1056 if VERSION >= v"1.4.0-DEV" t = quote - @inferred Union{Missing, Int} sum(mx) + @inferred Union{Missing, Int} sum($mx) end eval(t) end From 2d3cb693cdb3535027472e7f8ebc05b320d7a6b2 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 1 Apr 2020 17:48:28 -0400 Subject: [PATCH 6/8] make 32 bit tests pass --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 51a8efb..e0d494c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -84,7 +84,7 @@ struct CubeRooter end iobuf = IOBuffer() show(iobuf, MIME("text/plain"), mx) s = String(take!(iobuf)) - @test s == "Missings.SkipMissings{Array{Union{Missing, Int64},1}}(Union{Missing, Int64"* + @test s == "Missings.SkipMissings{Array{Union{Missing, Int$(Sys.WORD_SIZE)},1}}(Union{Missing, Int$(Sys.WORD_SIZE)" * "}[1, 2, missing, 4]) comprised of 2 iterators" @test collect(mx) == [1, 2] @test collect(mx) isa Vector{Int} From 4b20194ccb33eede16307c5ec194d9515a28e2e2 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Thu, 2 Apr 2020 09:43:19 -0400 Subject: [PATCH 7/8] Update test/runtests.jl Co-Authored-By: Milan Bouchet-Valat --- test/runtests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index e0d494c..55c9895 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -84,8 +84,8 @@ struct CubeRooter end iobuf = IOBuffer() show(iobuf, MIME("text/plain"), mx) s = String(take!(iobuf)) - @test s == "Missings.SkipMissings{Array{Union{Missing, Int$(Sys.WORD_SIZE)},1}}(Union{Missing, Int$(Sys.WORD_SIZE)" * - "}[1, 2, missing, 4]) comprised of 2 iterators" + @test s == "Missings.SkipMissings{Array{Union{Missing, $Int},1}}(" * + "Union{Missing, $Int}[1, 2, missing, 4]) comprised of 2 iterators" @test collect(mx) == [1, 2] @test collect(mx) isa Vector{Int} @test reduce(+, mx) === reduce(+, collect(mx)) === sum(mx) === From 70637ead02b86aa9b488cbc9d6ca018e30aa12a1 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Fri, 3 Apr 2020 12:33:27 +0200 Subject: [PATCH 8/8] Use `@static` --- test/runtests.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 55c9895..79b259a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -123,11 +123,8 @@ struct CubeRooter end y = [isodd(i) ? missing : i for i in 65:128] mx, my = skipmissings(x, y) @test sum(mx) === 1056 - if VERSION >= v"1.4.0-DEV" - t = quote - @inferred Union{Missing, Int} sum($mx) - end - eval(t) + @static if VERSION >= v"1.4.0-DEV" + @inferred Union{Missing, Int} sum(mx) end @test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1]