Skip to content

Commit

Permalink
mapreduce: avoid deadlock by forcing the accumulator type.
Browse files Browse the repository at this point in the history
Otherwise we may union-split across a shfl invocation,
resulting in a deadlock.
  • Loading branch information
maleadt committed Dec 18, 2024
1 parent 03ebed7 commit 22a89f9
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/mapreduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Base.@propagate_inbounds _map_getindex(args::Tuple{}, I) = ()
# Reduce an array across the grid. All elements to be processed can be addressed by the
# product of the two iterators `Rreduce` and `Rother`, where the latter iterator will have
# singleton entries for the dimensions that should be reduced (and vice versa).
function partial_mapreduce_grid(f, op, neutral, Rreduce, Rother, shuffle, R, As...)
function partial_mapreduce_grid(f, op, neutral, Rreduce, Rother, shuffle, R::AbstractArray{T}, As...) where T
assume(length(Rother) > 0)

# decompose the 1D hardware indices into separate ones for reduction (across threads
Expand All @@ -112,7 +112,7 @@ function partial_mapreduce_grid(f, op, neutral, Rreduce, Rother, shuffle, R, As.
neutral
end

val = op(neutral, neutral)
val::T = op(neutral, neutral)

# reduce serially across chunks of input vector that don't fit in a block
ireduce = threadIdx_reduce + (blockIdx_reduce - 1) * blockDim_reduce
Expand Down
8 changes: 8 additions & 0 deletions test/base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -916,3 +916,11 @@ end
@test c == a′ + b
@test c === a
end

@testset "issue 2595" begin
# mixed-type reductions resulted in a deadlock because of union splitting over shfl
a = CUDA.zeros(Float32, 1)
b = CUDA.ones(Float64, 2)
sum!(a, b)
@test Array(a) == [2f0]
end

0 comments on commit 22a89f9

Please sign in to comment.