diff --git a/base/missing.jl b/base/missing.jl index 6e31ee6c55bda..38322bee65b1e 100644 --- a/base/missing.jl +++ b/base/missing.jl @@ -177,7 +177,7 @@ IteratorSize(::Type{<:SkipMissing}) = SizeUnknown() IteratorEltype(::Type{SkipMissing{T}}) where {T} = IteratorEltype(T) eltype(::Type{SkipMissing{T}}) where {T} = nonmissingtype(eltype(T)) -function Base.iterate(itr::SkipMissing, state...) +function iterate(itr::SkipMissing, state...) y = iterate(itr.x, state...) y === nothing && return nothing item, state = y @@ -189,6 +189,99 @@ function Base.iterate(itr::SkipMissing, state...) item, state end +# Optimized mapreduce implementation +# The generic method is faster when !(eltype(A) >: Missing) since it does not need +# additional loops to identify the two first non-missing values of each block +mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) = + _mapreduce(f, op, IndexStyle(itr.x), eltype(itr.x) >: Missing ? itr : itr.x) + +function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray}) + A = itr.x + local ai + inds = LinearIndices(A) + i = first(inds) + ilast = last(inds) + while i <= ilast + @inbounds ai = A[i] + ai === missing || break + i += 1 + end + i > ilast && return mapreduce_empty(f, op, eltype(itr)) + a1 = ai + i += 1 + while i <= ilast + @inbounds ai = A[i] + ai === missing || break + i += 1 + end + i > ilast && return mapreduce_first(f, op, a1) + # We know A contains at least two non-missing entries: the result cannot be nothing + something(mapreduce_impl(f, op, itr, first(inds), last(inds))) +end + +_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr) + +mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) = + mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) + +# Returns nothing when the input contains only missing values, and Some(x) otherwise +@noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray}, + ifirst::Integer, ilast::Integer, blksize::Int) + A = itr.x + if ifirst == ilast + @inbounds a1 = A[ifirst] + if a1 === missing + return nothing + else + return Some(mapreduce_first(f, op, a1)) + end + elseif ifirst + blksize > ilast + # sequential portion + local ai + i = ifirst + while i <= ilast + @inbounds ai = A[i] + ai === missing || break + i += 1 + end + i > ilast && return nothing + a1 = ai::eltype(itr) + i += 1 + while i <= ilast + @inbounds ai = A[i] + ai === missing || break + i += 1 + end + i > ilast && return Some(mapreduce_first(f, op, a1)) + a2 = ai::eltype(itr) + # Unexpectedly, the following assertion allows SIMD instructions to be emitted + A[i]::eltype(itr) + i += 1 + v = op(f(a1), f(a2)) + @simd for i = i:ilast + @inbounds ai = A[i] + if ai !== missing + v = op(v, f(ai)) + end + end + return Some(v) + else + # pairwise portion + imid = (ifirst + ilast) >> 1 + v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize) + v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize) + if v1 === nothing && v2 === nothing + return nothing + elseif v1 === nothing + return v2 + elseif v2 === nothing + return v1 + else + return Some(op(something(v1), something(v2))) + end + end +end + """ coalesce(x, y...) diff --git a/test/missing.jl b/test/missing.jl index 1c37af827a985..d25751322cb2e 100644 --- a/test/missing.jl +++ b/test/missing.jl @@ -361,6 +361,57 @@ end @test eltype(x) === Any @test collect(x) == [1, 2, 4] @test collect(x) isa Vector{Int} + + @testset "mapreduce" begin + # Vary size to test splitting blocks with several configurations of missing values + for T in (Int, Float64), + A in (rand(T, 10), rand(T, 1000), rand(T, 10000)) + if T === Int + @test sum(A) === sum(skipmissing(A)) === + reduce(+, skipmissing(A)) === mapreduce(identity, +, skipmissing(A)) + else + @test sum(A) ≈ sum(skipmissing(A)) === + reduce(+, skipmissing(A)) === mapreduce(identity, +, skipmissing(A)) + end + @test mapreduce(cos, *, A) ≈ mapreduce(cos, *, skipmissing(A)) + + B = Vector{Union{T,Missing}}(A) + replace!(x -> rand(Bool) ? x : missing, B) + if T === Int + @test sum(collect(skipmissing(B))) === sum(skipmissing(B)) === + reduce(+, skipmissing(B)) === mapreduce(identity, +, skipmissing(B)) + else + @test sum(collect(skipmissing(B))) ≈ sum(skipmissing(B)) === + reduce(+, skipmissing(B)) === mapreduce(identity, +, skipmissing(B)) + end + @test mapreduce(cos, *, collect(skipmissing(A))) ≈ mapreduce(cos, *, skipmissing(A)) + + # Test block full of missing values + B[1:length(B)÷2] .= missing + if T === Int + @test sum(collect(skipmissing(B))) == sum(skipmissing(B)) == + reduce(+, skipmissing(B)) == mapreduce(identity, +, skipmissing(B)) + else + @test sum(collect(skipmissing(B))) ≈ sum(skipmissing(B)) == + reduce(+, skipmissing(B)) == mapreduce(identity, +, skipmissing(B)) + end + + @test mapreduce(cos, *, collect(skipmissing(A))) ≈ mapreduce(cos, *, skipmissing(A)) + end + + # Patterns that exercize code paths for inputs with 1 or 2 non-missing values + @test sum(skipmissing([1, missing, missing, missing])) === 1 + @test sum(skipmissing([missing, missing, missing, 1])) === 1 + @test sum(skipmissing([1, missing, missing, missing, 2])) === 3 + @test sum(skipmissing([missing, missing, missing, 1, 2])) === 3 + + for n in 0:3 + itr = skipmissing(Vector{Union{Int,Missing}}(fill(missing, n))) + @test sum(itr) == reduce(+, itr) == mapreduce(identity, +, itr) === 0 + @test_throws ArgumentError reduce(x -> x/2, itr) + @test_throws ArgumentError mapreduce(x -> x/2, +, itr) + end + end end @testset "coalesce" begin