From f2356178544a3d6a2f4e62f200db9f1513573399 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Wed, 1 Apr 2020 12:30:48 -0400
Subject: [PATCH 1/8] Squash everything
---
src/Missings.jl | 257 +++++++++++++++++++++++++++++++++++++++++++++++
test/runtests.jl | 50 +++++++++
2 files changed, 307 insertions(+)
diff --git a/src/Missings.jl b/src/Missings.jl
index b7c4c50..fd8af5c 100644
--- a/src/Missings.jl
+++ b/src/Missings.jl
@@ -207,4 +207,261 @@ missing
"""
passmissing(f) = PassMissing{Core.Typeof(f)}(f)
+"""
+ skipmissings(args...)
+
+Return a tuple of iterators wrapping each of the iterators in `args`, but
+skipping elements at positions where at least one of the iterators returns `missing`
+(listwise deletion of missing values).
+
+# Examples
+```
+julia> x = [1, 2, missing, 4]; y = [1, 2, 3, missing];
+
+julia> tx, ty = skipmissings(x, y)
+(Missings.SkipMissings{Array{Union{Missing, Int64},1},Tuple{Array{Union{Missing, Int64},1}}}
+(Union{Missing, Int64}[1, 2, missing, 4], (Union{Missing, Int64}[1, 2, 3, missing],)), Missi
+ngs.SkipMissings{Array{Union{Missing, Int64},1},Tuple{Array{Union{Missing, Int64},1}}}(Union
+{Missing, Int64}[1, 2, 3, missing], (Union{Missing, Int64}[1, 2, missing, 4],)))
+
+julia> collect(tx)
+2-element Array{Int64,1}:
+ 1
+ 2
+
+```
+"""
+function skipmissings(args...)
+ if isempty(args)
+ throw(ArgumentError("Must input one or more arguments"))
+ end
+
+ if args isa Tuple{Vararg{AbstractArray}}
+ if !all(x -> length(x) == length(args[1]), args)
+ throw(ArgumentError("All arguments must have the same length"))
+ end
+
+ if !all(x -> eachindex(x) == eachindex(args[1]), args)
+ throw(ArgumentError("All arguments must have the same indices"))
+ end
+ end
+
+ ntuple(length(args)) do i
+ s = setdiff(1:length(args), i)
+ SkipMissings(args[i], args[s])
+ end
+end
+
+struct SkipMissings{V, T}
+ x::V
+ others::T
+end
+
+Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{AbstractArray}}, i)
+ for oth in others
+ oth[i] === missing && return true
+ end
+
+ return false
+end
+
+@inline function _anymissingiterate(others::Tuple, state)
+ for oth in others
+ y = iterate(oth, state)
+ y !== nothing && first(y) === missing && return true
+ end
+
+ return false
+end
+
+const SkipMissingsofArrays = SkipMissings{V, T} where
+ {V <: AbstractArray, T <: Tuple{Vararg{AbstractArray}}}
+
+function Base.show(io::IO, mime::MIME"text/plain", itr::SkipMissings{V}) where V
+ print(io, SkipMissings, '{', V, '}', '(', itr.x, ')', " comprised of " *
+ "$(length(itr.others) + 1) iterators")
+end
+
+Base.IteratorSize(::Type{<:SkipMissings}) = Base.SizeUnknown()
+Base.IteratorEltype(::Type{<:SkipMissings{V}}) where {V} = Base.IteratorEltype(V)
+Base.eltype(::Type{<:SkipMissings{V}}) where {V} = nonmissingtype(eltype(V))
+Base.IndexStyle(itr::SkipMissings) = Base.IndexStyle(itr.x)
+
+function Base.iterate(itr::SkipMissings, state=1)
+ x_itr = iterate(itr.x, state)
+ x_itr === nothing && return nothing
+ x_item, x_state = x_itr
+ while true
+ x_item === missing || _anymissingiterate(itr.others, state) || break
+ x_itr = iterate(itr.x, x_state)
+ x_itr === nothing && return nothing
+ state = x_state
+ x_item, x_state = x_itr
+ end
+ return x_item, x_state
+end
+
+function Base.iterate(itr::SkipMissingsofArrays, state=0)
+ eix = eachindex(itr.x)
+ ind_itr = iterate(eix, state)
+ ind_itr === nothing && return nothing
+ ind_item, ind_state = ind_itr
+ @inbounds x_item = itr.x[ind_item]
+ @inbounds while true
+ x_item === missing || _anymissingindex(itr.others, ind_item) || break
+ ind_itr = iterate(eix, ind_state)
+ ind_itr === nothing && return nothing
+ ind_item, ind_state = ind_itr
+ x_item = itr.x[ind_item]
+ end
+ return x_item, ind_state
+end
+
+Base.IndexStyle(::Type{<:SkipMissings{V}}) where {V} = Base.IndexStyle(V)
+
+function Base.eachindex(itr::SkipMissingsofArrays)
+ @inbounds Iterators.filter(eachindex(itr.x)) do i
+ itr.x[i] !== missing && !_anymissingindex(itr.others, i)
+ end
+end
+
+function Base.keys(itr::SkipMissingsofArrays)
+ @inbounds Iterators.filter(keys(itr.x)) do i
+ itr.x[i] !== missing && !_anymissingindex(itr.others, i)
+ end
+end
+
+@inline function Base.getindex(itr::SkipMissingsofArrays, i)
+ @boundscheck checkbounds(itr.x, i)
+ @inbounds xi = itr.x[i]
+ if xi === missing || @inbounds _anymissingindex(itr.others, i)
+ throw(MissingException("the value at index $i is missing for some element"))
+ end
+ return xi
+end
+
+Base.mapreduce(f, op, itr::SkipMissingsofArrays) =
+ Base._mapreduce(f, op, Base.IndexStyle(itr), itr)
+
+function Base._mapreduce(f, op, ::IndexLinear, itr::SkipMissingsofArrays)
+ A = itr.x
+ local ai
+ inds = LinearIndices(A)
+ i = first(inds)
+ ilast = last(inds)
+ @inbounds while i <= ilast
+ ai = A[i]
+ ai === missing || _anymissingindex(itr.others, i) || break
+ i += 1
+ end
+ i > ilast && return Base.mapreduce_empty(f, op, Base.eltype(itr))
+ a1 = ai
+ i += 1
+ @inbounds while i <= ilast
+ ai = A[i]
+ ai === missing || _anymissingindex(itr.others, i) || break
+ i += 1
+ end
+ i > ilast && return Base.mapreduce_first(f, op, a1)
+ # We know A contains at least two non-missing entries: the result cannot be nothing
+ something(Base.mapreduce_impl(f, op, itr, first(inds), last(inds)))
+end
+
+Base._mapreduce(f, op, ::IndexCartesian, itr::SkipMissingsofArrays) = mapfoldl(f, op, itr)
+
+
+Base.mapreduce_impl(f, op, A::SkipMissingsofArrays, ifirst::Integer, ilast::Integer) =
+ Base.mapreduce_impl(f, op, A, ifirst, ilast, Base.pairwise_blocksize(f, op))
+
+# Returns nothing when the input contains only missing values, and Some(x) otherwise
+@noinline function Base.mapreduce_impl(f, op, itr::SkipMissingsofArrays,
+ ifirst::Integer, ilast::Integer, blksize::Int)
+ A = itr.x
+ if ifirst == ilast
+ @inbounds a1 = A[ifirst]
+ if a1 === missing
+ return nothing
+ elseif _anymissingindex(itr.others, ifirst)
+ return nothing
+ else
+ return Some(Base.mapreduce_first(f, op, a1))
+ end
+ elseif ifirst + blksize > ilast
+ # sequential portion
+ local ai
+ i = ifirst
+ @inbounds while i <= ilast
+ ai = A[i]
+ ai === missing || _anymissingindex(itr.others, i) || break
+ i += 1
+ end
+ i > ilast && return nothing
+ a1 = ai::eltype(itr)
+ i += 1
+ @inbounds while i <= ilast
+ ai = A[i]
+ ai === missing || _anymissingindex(itr.others, i) || break
+ i += 1
+ end
+ i > ilast && return Some(Base.mapreduce_first(f, op, a1))
+ a2 = ai::eltype(itr)
+ i += 1
+ v = op(f(a1), f(a2))
+ @simd for i = i:ilast
+ @inbounds ai = A[i]
+ ai === missing || @inbounds _anymissingindex(itr.others, i) || (v = op(v, f(ai)))
+ end
+ return Some(v)
+ else
+ # pairwise portion
+ imid = (ifirst + ilast) >> 1
+ v1 = Base.mapreduce_impl(f, op, itr, ifirst, imid, blksize)
+ v2 = Base.mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
+ if v1 === nothing && v2 === nothing
+ return nothing
+ elseif v1 === nothing
+ return v2
+ elseif v2 === nothing
+ return v1
+ else
+ return Some(op(something(v1), something(v2)))
+ end
+ end
+end
+
+"""
+ filter(f, itr::SkipMissings)
+
+Return a vector similar to the array wrapped by the given `SkipMissings` iterator
+but skipping all elements with a `missing` value in one of the iterators passed
+to `skipmissing` and elements for which `f` returns `false`. This method
+only applies when all iterators passed to `skipmissings` are arrays.
+
+# Examples
+```
+julia> x = [missing; 2:9]; y = [1:9; missing];
+
+julia> mx, my = skipmissings(x, y);
+
+julia> filter(isodd, mx)
+4-element Array{Int64,1}:
+ 3
+ 5
+ 7
+ 9
+
+```
+"""
+function filter(f, itr::SkipMissingsofArrays)
+ x = itr.x
+ y = similar(x, eltype(itr), 0)
+ for i in eachindex(x)
+ @inbounds xi = x[i]
+ if xi !== missing && @inbounds !_anymissingindex(itr.others, i) && f(xi)
+ push!(y, xi)
+ end
+ end
+ y
+end
+
end # module
diff --git a/test/runtests.jl b/test/runtests.jl
index 1aedba7..b3f8e8f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -75,6 +75,56 @@ struct CubeRooter end
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}
+ x = [1, 2, missing, 4]
+ y = ["a", "b", "c", missing]
+ z = [missing, missing, 3.1, 4.5]
+ l = [1, 2, 3, 4, 5]
+ @test_throws ArgumentError skipmissings(x, l)
+ mx, my = skipmissings(x, y)
+ iobuf = IOBuffer()
+ show(iobuf, MIME("text/plain"), mx)
+ s = String(take!(iobuf))
+ @test s == "Missings.SkipMissings{Array{Union{Missing, Int64},1}}(Union{Missing, Int64"*
+ "}[1, 2, missing, 4]) comprised of 2 iterators"
+ @test collect(mx) == [1, 2]
+ @test collect(mx) isa Vector{Int}
+ @test reduce(+, mx) === reduce(+, collect(mx)) === sum(mx) ===
+ mapreduce(identity, +, mx) === 3
+ @test mapreduce(x -> x^2, +, mx) === mapreduce(x -> x^2, +, collect(mx)) === 5
+ mx, my, mz = skipmissings(x, y, z)
+ @test eltype(mx) == Int
+ @test eltype(my) == String
+ @test eltype(mz) == Float64
+ @test isempty(collect(mx))
+ @test sum(mx) === 0
+ x = [missing 4; 2 5; 3 6]
+ y = [1 4; missing 5; 3 6]
+ mx, my = skipmissings(x, y)
+ @test collect(mx) == [3, 4, 5, 6]
+ @test mx[3] == 3
+ @test_throws MissingException mx[1]
+ @test reduce(+, mx) === 18
+ @test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx))
+ if VERSION >= v"1.4.0-DEV"
+ @inferred Union{Float64, Missing} mapreduce(cos, *, mx)
+ end
+
+ x = [missing missing missing]
+ y = [1, 2, 3]
+ mx, my = skipmissings(x, y)
+ @test_throws ArgumentError reduce(x -> x/2, mx)
+ @test_throws ArgumentError mapreduce(x -> x/2, +, mx)
+ @test_throws MethodError length(mx)
+ @test IndexStyle(typeof(mx)) == IndexStyle(typeof(x))
+ x = [isodd(i) ? missing : i for i in 1:64]
+ y = [isodd(i) ? missing : i for i in 65:128]
+ mx, my = skipmissings(x, y)
+ @test sum(mx) === 1056
+ @inferred Union{Missing, Int} sum(mx)
+ if VERSION >= v"1.4.0-DEV"
+ @inferred Union{Missing, Int} sum(mx)
+ end
+
@test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1]
@test levels(2:-1:1) == levels([2, 1]) == levels([2, missing, 1]) == [1, 2]
@test levels([missing, "a", "c", missing, "b"]) == ["a", "b", "c"]
From f115e46c640b7f31c3b6bc5e7eee6714db8b5f65 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Wed, 1 Apr 2020 12:40:34 -0400
Subject: [PATCH 2/8] small post-rebase fixes
---
README.md | 1 +
src/Missings.jl | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index b9e0cdc..82c2aa3 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ This package provides additional functionality for working with `missing` values
- `allowmissing` and `disallowmissing` to convert between `Vector{T}` and `Vector{Union{T, Missing}}`
- `passmissing` to wrap a function so that it returns `missing` if any of its positional arguments is `missing`
- `levels` to get the unique values in a vector excluding `missing` and in their preferred order
+- `skipmissings` to loop through a collection of iterators excluding indi ces where any iterators are `missing`
## Contributing and Questions
diff --git a/src/Missings.jl b/src/Missings.jl
index fd8af5c..d903890 100644
--- a/src/Missings.jl
+++ b/src/Missings.jl
@@ -1,7 +1,8 @@
module Missings
export allowmissing, disallowmissing, ismissing, missing, missings,
- Missing, MissingException, levels, coalesce, passmissing, nonmissingtype
+ Missing, MissingException, levels, coalesce, passmissing, nonmissingtype,
+ skipmissings
using Base: ismissing, missing, Missing, MissingException
using Base: @deprecate
From 8c874e801ae6a6effc5c958cebecb0f60988e476 Mon Sep 17 00:00:00 2001
From: Milan Bouchet-Valat
Date: Wed, 1 Apr 2020 21:58:05 +0200
Subject: [PATCH 3/8] Remove duplicate test
---
test/runtests.jl | 1 -
1 file changed, 1 deletion(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index b3f8e8f..9740146 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -120,7 +120,6 @@ struct CubeRooter end
y = [isodd(i) ? missing : i for i in 65:128]
mx, my = skipmissings(x, y)
@test sum(mx) === 1056
- @inferred Union{Missing, Int} sum(mx)
if VERSION >= v"1.4.0-DEV"
@inferred Union{Missing, Int} sum(mx)
end
From beab796dc3f249679e315ca3997ff22600081f0e Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Wed, 1 Apr 2020 16:48:31 -0400
Subject: [PATCH 4/8] eval in tests
---
test/runtests.jl | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index 9740146..a9c974d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -106,7 +106,10 @@ struct CubeRooter end
@test reduce(+, mx) === 18
@test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx))
if VERSION >= v"1.4.0-DEV"
- @inferred Union{Float64, Missing} mapreduce(cos, *, mx)
+ t = quote
+ @inferred Union{Float64, Missing} mapreduce(cos, *, mx)
+ end
+ eval(t)
end
x = [missing missing missing]
@@ -121,7 +124,10 @@ struct CubeRooter end
mx, my = skipmissings(x, y)
@test sum(mx) === 1056
if VERSION >= v"1.4.0-DEV"
- @inferred Union{Missing, Int} sum(mx)
+ t = quote
+ @inferred Union{Missing, Int} sum(mx)
+ end
+ eval(t)
end
@test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1]
From 6edb38a8e7f0a2614935a19799b5f351559debeb Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Wed, 1 Apr 2020 17:02:41 -0400
Subject: [PATCH 5/8] interpolate so tests work
---
test/runtests.jl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index a9c974d..51a8efb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -107,7 +107,7 @@ struct CubeRooter end
@test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx))
if VERSION >= v"1.4.0-DEV"
t = quote
- @inferred Union{Float64, Missing} mapreduce(cos, *, mx)
+ @inferred Union{Float64, Missing} mapreduce(cos, *, $mx)
end
eval(t)
end
@@ -125,7 +125,7 @@ struct CubeRooter end
@test sum(mx) === 1056
if VERSION >= v"1.4.0-DEV"
t = quote
- @inferred Union{Missing, Int} sum(mx)
+ @inferred Union{Missing, Int} sum($mx)
end
eval(t)
end
From 2d3cb693cdb3535027472e7f8ebc05b320d7a6b2 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Wed, 1 Apr 2020 17:48:28 -0400
Subject: [PATCH 6/8] make 32 bit tests pass
---
test/runtests.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index 51a8efb..e0d494c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -84,7 +84,7 @@ struct CubeRooter end
iobuf = IOBuffer()
show(iobuf, MIME("text/plain"), mx)
s = String(take!(iobuf))
- @test s == "Missings.SkipMissings{Array{Union{Missing, Int64},1}}(Union{Missing, Int64"*
+ @test s == "Missings.SkipMissings{Array{Union{Missing, Int$(Sys.WORD_SIZE)},1}}(Union{Missing, Int$(Sys.WORD_SIZE)" *
"}[1, 2, missing, 4]) comprised of 2 iterators"
@test collect(mx) == [1, 2]
@test collect(mx) isa Vector{Int}
From 4b20194ccb33eede16307c5ec194d9515a28e2e2 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Thu, 2 Apr 2020 09:43:19 -0400
Subject: [PATCH 7/8] Update test/runtests.jl
Co-Authored-By: Milan Bouchet-Valat
---
test/runtests.jl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index e0d494c..55c9895 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -84,8 +84,8 @@ struct CubeRooter end
iobuf = IOBuffer()
show(iobuf, MIME("text/plain"), mx)
s = String(take!(iobuf))
- @test s == "Missings.SkipMissings{Array{Union{Missing, Int$(Sys.WORD_SIZE)},1}}(Union{Missing, Int$(Sys.WORD_SIZE)" *
- "}[1, 2, missing, 4]) comprised of 2 iterators"
+ @test s == "Missings.SkipMissings{Array{Union{Missing, $Int},1}}(" *
+ "Union{Missing, $Int}[1, 2, missing, 4]) comprised of 2 iterators"
@test collect(mx) == [1, 2]
@test collect(mx) isa Vector{Int}
@test reduce(+, mx) === reduce(+, collect(mx)) === sum(mx) ===
From 70637ead02b86aa9b488cbc9d6ca018e30aa12a1 Mon Sep 17 00:00:00 2001
From: Milan Bouchet-Valat
Date: Fri, 3 Apr 2020 12:33:27 +0200
Subject: [PATCH 8/8] Use `@static`
---
test/runtests.jl | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index 55c9895..79b259a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -123,11 +123,8 @@ struct CubeRooter end
y = [isodd(i) ? missing : i for i in 65:128]
mx, my = skipmissings(x, y)
@test sum(mx) === 1056
- if VERSION >= v"1.4.0-DEV"
- t = quote
- @inferred Union{Missing, Int} sum($mx)
- end
- eval(t)
+ @static if VERSION >= v"1.4.0-DEV"
+ @inferred Union{Missing, Int} sum(mx)
end
@test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1]