From 2f90dded2ac6577197285fd48e20edbbd8732561 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Thu, 30 Apr 2020 14:35:23 -0700 Subject: [PATCH] Faster mapreduce for Broadcasted (#31020) * Better mapreduce for Broadcasted * Use Axes in IndexStyle for Broadcasted * Apply suggestions from code review Co-Authored-By: tkf <29282+tkf@users.noreply.github.com> * Update base/broadcast.jl Co-Authored-By: tkf <29282+tkf@users.noreply.github.com> * Fix IndexStyle for IndexLinear case * Fix LinearIndices for Broadcasted * Test that pairwise mapreduce is used * Test count(::Broadcasted) * Support Broadcasted in mapreducedim! Co-authored-by: Matt Bauman --- base/broadcast.jl | 36 +++++++++++++++++++++--------- base/reduce.jl | 20 ++++++++++------- base/reducedim.jl | 37 ++++++++++++++++++------------- test/broadcast.jl | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 33 deletions(-) diff --git a/base/broadcast.jl b/base/broadcast.jl index 4c6178b5fb84d..c33674883c1ad 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -166,7 +166,7 @@ BroadcastStyle(a::AbstractArrayStyle{M}, ::DefaultArrayStyle{N}) where {M,N} = # methods that instead specialize on `BroadcastStyle`, # copyto!(dest::AbstractArray, bc::Broadcasted{MyStyle}) -struct Broadcasted{Style<:Union{Nothing,BroadcastStyle}, Axes, F, Args<:Tuple} +struct Broadcasted{Style<:Union{Nothing,BroadcastStyle}, Axes, F, Args<:Tuple} <: Base.AbstractBroadcasted f::F args::Args axes::Axes # the axes of the resulting object (may be bigger than implied by `args` if this is nested inside a larger `Broadcasted`) @@ -193,21 +193,25 @@ function Base.show(io::IO, bc::Broadcasted{Style}) where {Style} end ## Allocating the output container -Base.similar(bc::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}) where {N,ElType} = - similar(Array{ElType}, axes(bc)) -Base.similar(bc::Broadcasted{DefaultArrayStyle{N}}, ::Type{Bool}) where N = - similar(BitArray, axes(bc)) +Base.similar(bc::Broadcasted, ::Type{T}) where {T} = similar(bc, T, axes(bc)) +Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}, dims) where {N,ElType} = + similar(Array{ElType}, dims) +Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{Bool}, dims) where N = + similar(BitArray, dims) # In cases of conflict we fall back on Array -Base.similar(bc::Broadcasted{ArrayConflict}, ::Type{ElType}) where ElType = - similar(Array{ElType}, axes(bc)) -Base.similar(bc::Broadcasted{ArrayConflict}, ::Type{Bool}) = - similar(BitArray, axes(bc)) +Base.similar(::Broadcasted{ArrayConflict}, ::Type{ElType}, dims) where ElType = + similar(Array{ElType}, dims) +Base.similar(::Broadcasted{ArrayConflict}, ::Type{Bool}, dims) = + similar(BitArray, dims) @inline Base.axes(bc::Broadcasted) = _axes(bc, bc.axes) _axes(::Broadcasted, axes::Tuple) = axes @inline _axes(bc::Broadcasted, ::Nothing) = combine_axes(bc.args...) _axes(bc::Broadcasted{<:AbstractArrayStyle{0}}, ::Nothing) = () +@inline Base.axes(bc::Broadcasted{<:Any, <:NTuple{N}}, d::Integer) where N = + d <= N ? axes(bc)[d] : OneTo(1) + BroadcastStyle(::Type{<:Broadcasted{Style}}) where {Style} = Style() BroadcastStyle(::Type{<:Broadcasted{S}}) where {S<:Union{Nothing,Unknown}} = throw(ArgumentError("Broadcasted{Unknown} wrappers do not have a style assigned")) @@ -219,6 +223,12 @@ argtype(bc::Broadcasted) = argtype(typeof(bc)) _eachindex(t::Tuple{Any}) = t[1] _eachindex(t::Tuple) = CartesianIndices(t) +Base.IndexStyle(bc::Broadcasted) = IndexStyle(typeof(bc)) +Base.IndexStyle(::Type{<:Broadcasted{<:Any,<:Tuple{Any}}}) = IndexLinear() +Base.IndexStyle(::Type{<:Broadcasted{<:Any}}) = IndexCartesian() + +Base.LinearIndices(bc::Broadcasted{<:Any,<:Tuple{Any}}) = axes(bc)[1] + Base.ndims(::Broadcasted{<:Any,<:NTuple{N,Any}}) where {N} = N Base.ndims(::Type{<:Broadcasted{<:Any,<:NTuple{N,Any}}}) where {N} = N @@ -564,7 +574,13 @@ end @boundscheck checkbounds(bc, I) @inbounds _broadcast_getindex(bc, I) end -Base.@propagate_inbounds Base.getindex(bc::Broadcasted, i1::Integer, i2::Integer, I::Integer...) = bc[CartesianIndex((i1, i2, I...))] +Base.@propagate_inbounds Base.getindex( + bc::Broadcasted, + i1::Union{Integer,CartesianIndex}, + i2::Union{Integer,CartesianIndex}, + I::Union{Integer,CartesianIndex}..., +) = + bc[CartesianIndex((i1, i2, I...))] Base.@propagate_inbounds Base.getindex(bc::Broadcasted) = bc[CartesianIndex(())] @inline Base.checkbounds(bc::Broadcasted, I::Union{Integer,CartesianIndex}) = diff --git a/base/reduce.jl b/base/reduce.jl index 73b200eb77288..f95fb5ea74976 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -12,6 +12,9 @@ else const SmallUnsigned = Union{UInt8,UInt16,UInt32} end +abstract type AbstractBroadcasted end +const AbstractArrayOrBroadcasted = Union{AbstractArray, AbstractBroadcasted} + """ Base.add_sum(x, y) @@ -227,7 +230,8 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...) # This is a generic implementation of `mapreduce_impl()`, # certain `op` (e.g. `min` and `max`) may have their own specialized versions. -@noinline function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer, blksize::Int) +@noinline function mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, + ifirst::Integer, ilast::Integer, blksize::Int) if ifirst == ilast @inbounds a1 = A[ifirst] return mapreduce_first(f, op, a1) @@ -250,7 +254,7 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...) end end -mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer) = +mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) """ @@ -383,13 +387,13 @@ The default is `reduce_first(op, f(x))`. """ mapreduce_first(f, op, x) = reduce_first(op, f(x)) -_mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A) +_mapreduce(f, op, A::AbstractArrayOrBroadcasted) = _mapreduce(f, op, IndexStyle(A), A) -function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T +function _mapreduce(f, op, ::IndexLinear, A::AbstractArrayOrBroadcasted) inds = LinearIndices(A) n = length(inds) if n == 0 - return mapreduce_empty(f, op, T) + return mapreduce_empty_iter(f, op, A, IteratorEltype(A)) elseif n == 1 @inbounds a1 = A[first(inds)] return mapreduce_first(f, op, a1) @@ -410,7 +414,7 @@ end mapreduce(f, op, a::Number) = mapreduce_first(f, op, a) -_mapreduce(f, op, ::IndexCartesian, A::AbstractArray) = mapfoldl(f, op, A) +_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted) = mapfoldl(f, op, A) """ reduce(op, itr; [init]) @@ -560,7 +564,7 @@ isgoodzero(::typeof(max), x) = isbadzero(min, x) isgoodzero(::typeof(min), x) = isbadzero(max, x) function mapreduce_impl(f, op::Union{typeof(max), typeof(min)}, - A::AbstractArray, first::Int, last::Int) + A::AbstractArrayOrBroadcasted, first::Int, last::Int) a1 = @inbounds A[first] v1 = mapreduce_first(f, op, a1) v2 = v3 = v4 = v1 @@ -856,7 +860,7 @@ function count(pred, itr) end return n end -function count(pred, a::AbstractArray) +function count(pred, a::AbstractArrayOrBroadcasted) n = 0 for i in eachindex(a) @inbounds n += pred(a[i])::Bool diff --git a/base/reducedim.jl b/base/reducedim.jl index fe3adef035dec..331ea9a2eb099 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -12,7 +12,7 @@ No method is implemented for reducing index range of type $(typeof(i)). Please i reduced_index for this index type or report this as an issue. """ )) -reduced_indices(a::AbstractArray, region) = reduced_indices(axes(a), region) +reduced_indices(a::AbstractArrayOrBroadcasted, region) = reduced_indices(axes(a), region) # for reductions that keep 0 dims as 0 reduced_indices0(a::AbstractArray, region) = reduced_indices0(axes(a), region) @@ -89,8 +89,8 @@ for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false)) end # reducedim_initarray is called by -reducedim_initarray(A::AbstractArray, region, init, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), init) -reducedim_initarray(A::AbstractArray, region, init::T) where {T} = reducedim_initarray(A, region, init, T) +reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), init) +reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init::T) where {T} = reducedim_initarray(A, region, init, T) # TODO: better way to handle reducedim initialization # @@ -156,8 +156,8 @@ end reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} = reducedim_initarray(A, region, zero(f(zero(T))), _realtype(f, T)) -reducedim_init(f, op::typeof(&), A::AbstractArray, region) = reducedim_initarray(A, region, true) -reducedim_init(f, op::typeof(|), A::AbstractArray, region) = reducedim_initarray(A, region, false) +reducedim_init(f, op::typeof(&), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true) +reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false) # specialize to make initialization more efficient for common cases @@ -179,8 +179,11 @@ end ## generic (map)reduction -has_fast_linear_indexing(a::AbstractArray) = false +has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = false has_fast_linear_indexing(a::Array) = true +has_fast_linear_indexing(::Number) = true # for Broadcasted +has_fast_linear_indexing(bc::Broadcast.Broadcasted) = + all(has_fast_linear_indexing, bc.args) function check_reducedims(R, A) # Check whether R has compatible dimensions w.r.t. A for reduction @@ -233,7 +236,7 @@ _firstslice(i::OneTo) = OneTo(1) _firstslice(i::Slice) = Slice(_firstslice(i.indices)) _firstslice(i) = i[firstindex(i):firstindex(i)] -function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) +function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) lsiz = check_reducedims(R,A) isempty(A) && return R @@ -271,10 +274,10 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) return R end -mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) = +mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) = (_mapreducedim!(f, op, R, A); R) -reducedim!(op, R::AbstractArray{RT}, A::AbstractArray) where {RT} = +reducedim!(op, R::AbstractArray{RT}, A::AbstractArrayOrBroadcasted) where {RT} = mapreducedim!(identity, op, R, A) """ @@ -304,17 +307,21 @@ julia> mapreduce(isodd, |, a, dims=1) 1 1 1 1 ``` """ -mapreduce(f, op, A::AbstractArray; dims=:, kw...) = _mapreduce_dim(f, op, kw.data, A, dims) -mapreduce(f, op, A::AbstractArray...; kw...) = reduce(op, map(f, A...); kw...) +mapreduce(f, op, A::AbstractArrayOrBroadcasted; dims=:, kw...) = + _mapreduce_dim(f, op, kw.data, A, dims) +mapreduce(f, op, A::AbstractArrayOrBroadcasted...; kw...) = + reduce(op, map(f, A...); kw...) -_mapreduce_dim(f, op, nt::NamedTuple{(:init,)}, A::AbstractArray, ::Colon) = mapfoldl(f, op, A; nt...) +_mapreduce_dim(f, op, nt::NamedTuple{(:init,)}, A::AbstractArrayOrBroadcasted, ::Colon) = + mapfoldl(f, op, A; nt...) -_mapreduce_dim(f, op, ::NamedTuple{()}, A::AbstractArray, ::Colon) = _mapreduce(f, op, IndexStyle(A), A) +_mapreduce_dim(f, op, ::NamedTuple{()}, A::AbstractArrayOrBroadcasted, ::Colon) = + _mapreduce(f, op, IndexStyle(A), A) -_mapreduce_dim(f, op, nt::NamedTuple{(:init,)}, A::AbstractArray, dims) = +_mapreduce_dim(f, op, nt::NamedTuple{(:init,)}, A::AbstractArrayOrBroadcasted, dims) = mapreducedim!(f, op, reducedim_initarray(A, dims, nt.init), A) -_mapreduce_dim(f, op, ::NamedTuple{()}, A::AbstractArray, dims) = +_mapreduce_dim(f, op, ::NamedTuple{()}, A::AbstractArrayOrBroadcasted, dims) = mapreducedim!(f, op, reducedim_init(f, op, A, dims), A) """ diff --git a/test/broadcast.jl b/test/broadcast.jl index acaafbe20d0ae..2551ee6f4925f 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -821,6 +821,7 @@ end # Broadcasted iterable/indexable APIs let bc = Broadcast.instantiate(Broadcast.broadcasted(+, zeros(5), 5)) + @test IndexStyle(bc) == IndexLinear() @test eachindex(bc) === Base.OneTo(5) @test length(bc) === 5 @test ndims(bc) === 1 @@ -831,6 +832,7 @@ let @test ndims(copy(bc)) == ndims([v for v in bc]) == ndims(collect(bc)) == ndims(bc) bc = Broadcast.instantiate(Broadcast.broadcasted(+, zeros(5), 5*ones(1, 4))) + @test IndexStyle(bc) == IndexCartesian() @test eachindex(bc) === CartesianIndices((Base.OneTo(5), Base.OneTo(4))) @test length(bc) === 20 @test ndims(bc) === 2 @@ -851,6 +853,60 @@ let a = rand(5), b = rand(5), c = copy(a) @test x == [2] end +@testset "broadcasted mapreduce" begin + xs = 1:10 + ys = 1:2:20 + bc = Broadcast.instantiate(Broadcast.broadcasted(*, xs, ys)) + @test IndexStyle(bc) == IndexLinear() + @test sum(bc) == mapreduce(Base.splat(*), +, zip(xs, ys)) + + xs2 = reshape(xs, 1, :) + ys2 = reshape(ys, 1, :) + bc = Broadcast.instantiate(Broadcast.broadcasted(*, xs2, ys2)) + @test IndexStyle(bc) == IndexCartesian() + @test sum(bc) == mapreduce(Base.splat(*), +, zip(xs, ys)) + + xs = 1:5:3*5 + ys = 1:4:3*4 + bc = Broadcast.instantiate( + Broadcast.broadcasted(iseven, Broadcast.broadcasted(-, xs, ys))) + @test count(bc) == count(iseven, map(-, xs, ys)) + + xs = reshape(1:6, (2, 3)) + ys = 1:2 + bc = Broadcast.instantiate(Broadcast.broadcasted(*, xs, ys)) + @test reduce(+, bc; dims=1, init=0) == [5 11 17] + + # Let's test that `Broadcasted` actually hits the efficient + # `mapreduce` method as intended. We are going to invoke `reduce` + # with this *NON-ASSOCIATIVE* binary operator to see what + # associativity is chosen by the implementation: + paren = (x, y) -> "($x,$y)" + # Next, we construct data `xs` such that `length(xs)` is greater + # than short array cutoff of `_mapreduce`: + alphabets = 'a':'z' + blksize = Base.pairwise_blocksize(identity, paren) รท length(alphabets) + xs = repeat(alphabets, 2 * blksize) + @test length(xs) > blksize + # So far we constructed the data `xs` and reducing function + # `paren` such that `reduce` and `foldl` results are different. + # That is to say, this `reduce` does not hit the fall-back `foldl` + # branch: + @test foldl(paren, xs) != reduce(paren, xs) + + # Now let's try it with `Broadcasted`: + bcraw = Broadcast.broadcasted(identity, xs) + bc = Broadcast.instantiate(bcraw) + # If `Broadcasted` has `IndexLinear` style, it should hit the + # `reduce` branch: + @test IndexStyle(bc) == IndexLinear() + @test reduce(paren, bc) == reduce(paren, xs) + # If `Broadcasted` does not have `IndexLinear` style, it should + # hit the `foldl` branch: + @test IndexStyle(bcraw) == IndexCartesian() + @test reduce(paren, bcraw) == foldl(paren, xs) +end + # treat Pair as scalar: @test replace.(split("The quick brown fox jumps over the lazy dog"), r"[aeiou]"i => "_") == ["Th_", "q__ck", "br_wn", "f_x", "j_mps", "_v_r", "th_", "l_zy", "d_g"]