Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt at including offsets in kernel launch #399

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ end

include("nditeration.jl")
using .NDIteration
import .NDIteration: get
import .NDIteration: get, getrange

###
# Kernel closure struct
Expand Down Expand Up @@ -590,11 +590,13 @@ backend(kernel::Kernel) = kernel.backend
error(errmsg)
end

offsets = nothing

if static_ndrange <: StaticSize
if ndrange !== nothing && ndrange != get(static_ndrange)
error("Static NDRange ($static_ndrange) and launch NDRange ($ndrange) differ")
end
ndrange = get(static_ndrange)
ndrange, offsets = getrange(static_ndrange)
end

if static_workgroupsize <: StaticSize
Expand Down Expand Up @@ -623,7 +625,7 @@ backend(kernel::Kernel) = kernel.backend
workgroupsize = CartesianIndices(workgroupsize)
end

iterspace = NDRange{length(ndrange), static_blocks, static_workgroupsize}(blocks, workgroupsize)
iterspace = NDRange{length(ndrange), offsets, static_blocks, static_workgroupsize}(blocks, workgroupsize)
return iterspace, dynamic
end

Expand Down
5 changes: 5 additions & 0 deletions src/macros.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import MacroTools: splitdef, combinedef, isexpr, postwalk

@inline contiguousrange(range::NTuple{N, Int}, offset::NTuple{N, Int}) where N =
Tuple(1+o:r+o for (r, o) in zip(range, offset))

function find_return(stmt)
result = false
postwalk(stmt) do expr
Expand Down Expand Up @@ -54,6 +57,8 @@ function __kernel(expr, generate_cpu=true)
Core.@__doc__ $name(dev) = $name(dev, $DynamicSize(), $DynamicSize())
$name(dev, size) = $name(dev, $StaticSize(size), $DynamicSize())
$name(dev, size, range) = $name(dev, $StaticSize(size), $StaticSize(range))
$name(dev, size, range, ::Nothing) = $name(dev, size, range)
$name(dev, size, range, offset) = $name(dev, $StaticSize(size), $StaticSize($contiguousrange(range, offset)))
function $name(dev::Dev, sz::S, range::NDRange) where {Dev, S<:$_Size, NDRange<:$_Size}
if $isgpu(dev)
return $construct(dev, sz, range, $gpu_name)
Expand Down
57 changes: 43 additions & 14 deletions src/nditeration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,33 @@ abstract type _Size end
struct DynamicSize <: _Size end
struct StaticSize{S} <: _Size
function StaticSize{S}() where S
new{S::Tuple{Vararg{Int}}}()
new{S::Tuple{Vararg}}()
end
end

@pure StaticSize(s::Tuple{Vararg{Int}}) = StaticSize{s}()
@pure StaticSize(s::Int...) = StaticSize{s}()
@pure StaticSize(s::Type{<:Tuple}) = StaticSize{tuple(s.parameters...)}()
@pure StaticSize(s::Tuple{Vararg{UnitRange{Int}}}) = StaticSize{s}()

# Some @pure convenience functions for `StaticSize`
@pure get(::Type{StaticSize{S}}) where {S} = S
@pure get(::StaticSize{S}) where {S} = S
@pure Base.getindex(::StaticSize{S}, i::Int) where {S} = i <= length(S) ? S[i] : 1
@pure Base.ndims(::StaticSize{S}) where {S} = length(S)
@pure Base.length(::StaticSize{S}) where {S} = prod(S)
@pure Base.ndims(::StaticSize{S}) where {S} = length(S)
@pure Base.length(::StaticSize{S}) where {S} = prod(worksize.(S))

@inline getrange(::StaticSize{S}) where {S} = worksize(S), offsets(S)
@inline getrange(::Type{StaticSize{S}}) where {S} = worksize(S), offsets(S)

@inline worksize(i::Tuple) = worksize.(i)
@inline worksize(i::Int) = i
@inline worksize(i::UnitRange) = length(i)

@inline offsets(i) = offsets.(i)
@inline offsets(::NTuple{N, Int}) where N = nothing
@inline offsets(::Int) = nothing
@inline offsets(i::UnitRange) = i.start - 1

"""
NDRange
Expand All @@ -36,7 +48,7 @@ Encodes a blocked iteration space.

# Example
```
ndrange = NDRange{2, DynamicSize, DynamicSize}(CartesianIndices((256, 256)), CartesianIndices((32, 32)))
ndrange = NDRange{2, nothing, DynamicSize, DynamicSize}(CartesianIndices((256, 256)), CartesianIndices((32, 32)))
for block in ndrange
for items in workitems(ndrange)
I = expand(ndrange, block, items)
Expand All @@ -46,36 +58,54 @@ for block in ndrange
end
```
"""
struct NDRange{N, StaticBlocks, StaticWorkitems, DynamicBlock, DynamicWorkitems}
struct NDRange{N, Offsets, StaticBlocks, StaticWorkitems, DynamicBlock, DynamicWorkitems}
blocks::DynamicBlock
workitems::DynamicWorkitems

function NDRange{N, B, W}() where {N, B, W}
new{N, B, W, Nothing, Nothing}(nothing, nothing)
new{N, nothing, B, W, Nothing, Nothing}(nothing, nothing)
end

function NDRange{N, B, W}(blocks, workitems) where {N, B, W}
new{N, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems)
new{N, nothing, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems)
end

function NDRange{N, O, B, W}() where {N, O, B, W}
new{N, O, B, W, Nothing, Nothing}(nothing, nothing)
end

function NDRange{N, O, B, W}(blocks, workitems) where {N, O, B, W}
new{N, O, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems)
end
end

@inline workitems(range::NDRange{N, B, W}) where {N,B,W<:DynamicSize} = range.workitems::CartesianIndices{N}
@inline workitems(range::NDRange{N, B, W}) where {N,B,W<:StaticSize} = CartesianIndices(get(W))::CartesianIndices{N}
@inline blocks(range::NDRange{N, B}) where {N,B<:DynamicSize} = range.blocks::CartesianIndices{N}
@inline blocks(range::NDRange{N, B}) where {N,B<:StaticSize} = CartesianIndices(get(B))::CartesianIndices{N}
@inline workitems(range::NDRange{N, O, B, W}) where {N,O,B,W<:DynamicSize} = range.workitems::CartesianIndices{N}
@inline workitems(range::NDRange{N, O, B, W}) where {N,O,B,W<:StaticSize} = CartesianIndices(get(W))::CartesianIndices{N}
@inline blocks(range::NDRange{N, O, B}) where {N,O,B<:DynamicSize} = range.blocks::CartesianIndices{N}
@inline blocks(range::NDRange{N, O, B}) where {N,O,B<:StaticSize} = CartesianIndices(get(B))::CartesianIndices{N}

import Base.iterate
@inline iterate(range::NDRange) = iterate(blocks(range))
@inline iterate(range::NDRange, state) = iterate(blocks(range), state)

Base.length(range::NDRange) = length(blocks(range))

@inline function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where N
@inline function expand(ndrange::NDRange{N, nothing}, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where N
nI = ntuple(Val(N)) do I
Base.@_inline_meta
stride = size(workitems(ndrange), I)
gidx = groupidx.I[I]
(gidx-1)*stride + idx.I[I]
(gidx-1)*stride + idx.I[I]
end
CartesianIndex(nI)
end

@inline function expand(ndrange::NDRange{N, Offsets}, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where {N, Offsets}
nI = ntuple(Val(N)) do I
Base.@_inline_meta
stride = size(workitems(ndrange), I)
gidx = groupidx.I[I]
(gidx-1)*stride + idx.I[I] + Offsets[I]
end
CartesianIndex(nI)
end
Expand Down Expand Up @@ -121,7 +151,6 @@ needs to perform dynamic bounds-checking.
dynamic[] |= mod(ndrange[I], workgroupsize[I]) != 0
return fld1(ndrange[I], workgroupsize[I])
end

return blocks, workgroupsize, dynamic[] ? DynamicCheck() : NoDynamicCheck()
end
end
Expand Down
22 changes: 22 additions & 0 deletions test/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,28 @@ end
synchronize(Backend())
end

@kernel function index_global_offset!(a)
i, j = @index(Global, NTuple)
n, m = size(a)
@inbounds a[i, j] = i + n * j
end

@conditional_testset "Offset iteration space $Backend" skip_tests begin
a = KernelAbstractions.zeros(Backend(), 7, 9)
index_global_offset!(Backend(), (2, 2), size(a) .- 4, (2, 2))(a)
synchronize(Backend())

b = KernelAbstractions.zeros(CPU(), 7, 9)
b .= a

c = [i + 7 * j for i in 1:7, j in 1:9]

@test b[3:5, 3:7] == c[3:5, 3:7]
@test b[1:2, :] == zeros(2, 9)
@test b[6:7, :] == zeros(2, 9)
@test b[:, 1:2] == zeros(7, 2)
@test b[:, 8:9] == zeros(7, 2)
end

@conditional_testset "return statement" skip_tests begin
try
Expand Down