From 5427d33396e439931dc7ee7d5391b0c03af303c2 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 20 Jun 2023 18:45:03 -0400 Subject: [PATCH 1/8] Add implementation notes to host functionality (#401) --- src/KernelAbstractions.jl | 87 ++++++++++++++++++++++++++++++++------- test/convert.jl | 2 +- test/runtests.jl | 27 ++++++++++++ 3 files changed, 99 insertions(+), 17 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 756f40f9..de8098ed 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -66,7 +66,6 @@ KernelAbstractions primitives can be used in non-kernel functions. !!! warn This is an experimental feature. - """ macro kernel(config, expr) if config isa Expr && config.head == :(=) && @@ -97,6 +96,9 @@ macro Const end copyto!(::Backend, dest::AbstractArray, src::AbstractArray) Perform a `copyto!` operation that execution ordered with respect to the backend. + +!!! note + Backend implementations **must** implement this function. """ function copyto! end @@ -104,6 +106,9 @@ function copyto! end synchronize(::Backend) Synchronize the current backend. + +!!! note + Backend implementations **must** implement this function. """ function synchronize end @@ -114,12 +119,13 @@ Release the memory of an array for reuse by future allocations and reduce pressure on the allocator. After releasing the memory of an array, it should no longer be accessed. -This function is optional both to implement and call. -If not implemented for a particular backend, default action is a no-op. -Otherwise, it should be defined for backend's array type. - !!! note On CPU backend this is always a no-op. + +!!! note + Backend implementations **may** implement this function. + If not implemented for a particular backend, default action is a no-op. + Otherwise, it should be defined for backend's array type. """ function unsafe_free! end @@ -393,9 +399,17 @@ constify(arg) = adapt(ConstAdaptor(), arg) ### """ - Abstract type for all KernelAbstractions backends. + +Abstract type for all KernelAbstractions backends. """ abstract type Backend end + +""" +Abstract type for all GPU based KernelAbstractions backends. + +!!! note + New backend implementations **must** sub-type this abstract type. +""" abstract type GPU <: Backend end """ @@ -412,6 +426,11 @@ struct CPU <: Backend CPU(;static::Bool=false) = new(static) end +""" + isgpu(::Backend)::Bool + +Returns true for all [`GPU`](@ref) backends. +""" isgpu(::GPU) = true isgpu(::CPU) = false @@ -420,6 +439,10 @@ isgpu(::CPU) = false get_backend(A::AbstractArray)::Backend Get a [`Backend`](@ref) instance suitable for array `A`. + +!!! note + Backend implementations **must** provide `get_backend` for their custom array type. + It should be the same as the return type of [`allocate`](@ref) """ function get_backend end @@ -438,39 +461,61 @@ get_backend(::Array) = CPU() Adapt.adapt_storage(::CPU, a::Array) = a """ - allocate(::Backend, Type, dims...) + allocate(::Backend, Type, dims...)::AbstractArray Allocate a storage array appropriate for the computational backend. + +!!! note + Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)` +""" +allocate(backend::Backend, T, dims...) = allocate(backend, T, dims) +allocate(backend::Backend, T, dims::Tuple) = throw(MethodError(allocate, (backend, T, dims))) + """ -allocate(backend, T, dims...) = return allocate(backend, T, dims) + zeros(::Backend, Type, dims...)::AbstractArray -zeros(backend, T, dims...) = zeros(backend, T, dims) -function zeros(backend, ::Type{T}, dims::Tuple) where T +Allocate a storage array appropriate for the computational backend filled with zeros. +""" +zeros(backend::Backend, T, dims...) = zeros(backend, T, dims) +function zeros(backend::Backend, ::Type{T}, dims::Tuple) where T data = allocate(backend, T, dims...) fill!(data, zero(T)) return data end -ones(backend, T, dims...) = ones(backend, T, dims) -function ones(backend, ::Type{T}, dims::Tuple) where T +""" + ones(::Backend, Type, dims...)::AbstractArray + +Allocate a storage array appropriate for the computational backend filled with ones. +""" +ones(backend::Backend, T, dims...) = ones(backend, T, dims) +function ones(backend::Backend, ::Type{T}, dims::Tuple) where T data = allocate(backend, T, dims) fill!(data, one(T)) return data end """ - supports_atomics(::Backend) + supports_atomics(::Backend)::Bool Returns whether `@atomic` operations are supported by the backend. + +!!! note + Backend implementations **must** implement this function, + only if they **do not** support atomic operations with Atomix. """ -supports_atomics(backend) = true +supports_atomics(::Backend) = true """ - supports_float64(::Backend) + supports_float64(::Backend)::Bool Returns whether `Float64` values are supported by the backend. + +!!! note + Backend implementations **must** implement this function, + only if they **do not** support `Float64`. """ -supports_float64(backend) = true +supports_float64(::Backend) = true """ priority!(::Backend, prio::Symbol) @@ -479,6 +524,9 @@ Set the priority for the backend stream/queue. This is an optional feature that backends may or may not implement. If a backend shall support priorities it must accept `:high`, `:normal`, `:low`. Where `:normal` is the default. + +!!! note + Backend implementations **may** implement this function. """ function priority!(::Backend, prio::Symbol) if !(prio in (:high, :normal, :low)) @@ -501,6 +549,13 @@ import .NDIteration: get Kernel closure struct that is used to represent the backend kernel on the host. `WorkgroupSize` is the number of workitems in a workgroup. + +!!! note + Backend implementations **must** implement: + ``` + (kernel::Kernel{<:NewBackend})(args...; ndrange=nothing, workgroupsize=nothing) + ``` + As well as the on-device functionality. """ struct Kernel{Backend, WorkgroupSize<:_Size, NDRange<:_Size, Fun} backend::Backend diff --git a/test/convert.jl b/test/convert.jl index 9103c9ce..95a9a822 100644 --- a/test/convert.jl +++ b/test/convert.jl @@ -44,7 +44,7 @@ using KernelAbstractions, Test end function convert_testsuite(backend, ArrayT) - ET = KernelAbstractions.supports_float64(backend) ? Float64 : Float32 + ET = KernelAbstractions.supports_float64(backend()) ? Float64 : Float32 N = 32 d_A = ArrayT([rand(ET)*3 for i = 1:N]) diff --git a/test/runtests.jl b/test/runtests.jl index c961aef4..d287d8b4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,3 +20,30 @@ kern_static(CPU(static=true), (1,))(A, ndrange=length(A)) @kernel cpu=false function my_no_cpu_kernel(a) end @test_throws ErrorException("This kernel is unavailable for backend CPU") my_no_cpu_kernel(CPU()) + +struct NewBackend <: KernelAbstractions.GPU end +@testset "Default host implementation" begin + backend = NewBackend() + @test KernelAbstractions.isgpu(backend) == true + + @test_throws MethodError KernelAbstractions.synchronize(backend) + + @test_throws MethodError KernelAbstractions.allocate(backend, Float32, 1) + @test_throws MethodError KernelAbstractions.allocate(backend, Float32, (1,)) + @test_throws MethodError KernelAbstractions.allocate(backend, Float32, 1, 2) + + @test_throws MethodError KernelAbstractions.zeros(backend, Float32, 1) + @test_throws MethodError KernelAbstractions.ones(backend, Float32, 1) + + @test KernelAbstractions.supports_atomics(backend) == true + @test KernelAbstractions.supports_float64(backend) == true + + @test KernelAbstractions.priority!(backend, :high) === nothing + @test KernelAbstractions.priority!(backend, :normal) === nothing + @test KernelAbstractions.priority!(backend, :low) === nothing + + @test_throws ErrorException KernelAbstractions.priority!(backend, :middle) + + kernel = my_no_cpu_kernel(backend) + @test_throws MethodError kernel() +end From 516ce1dcf0545603f579eb91308bbf816dcbca52 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 20 Jun 2023 18:45:27 -0400 Subject: [PATCH 2/8] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e458b580..8d119485 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "KernelAbstractions" uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" authors = ["Valentin Churavy and contributors"] -version = "0.9.4" +version = "0.9.5" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From c48658633f89ccfc24ad0cb6f4c2fa2ce7a1914d Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 21 Jun 2023 18:25:13 +0200 Subject: [PATCH 3/8] Don't use Float64 in tests, as some back-ends do not support it. (#402) --- test/compiler.jl | 8 ++++---- test/reflection.jl | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/compiler.jl b/test/compiler.jl index 5df1c241..51050650 100644 --- a/test/compiler.jl +++ b/test/compiler.jl @@ -49,20 +49,20 @@ function compiler_testsuite(backend, ArrayT) @test !any(check_for_overdub, CI.code) end - A = ArrayT{Float64}(undef, 1) + A = ArrayT{Float32}(undef, 1) let (CI, rt) = @ka_code_typed square(backend())(A, A, ndrange=1) # test that there is no invoke of overdub @test !any(check_for_overdub, CI.code) end - A = ArrayT{Float64}(undef, 1) - B = ArrayT{Float64}(undef, 1) + A = ArrayT{Float32}(undef, 1) + B = ArrayT{Float32}(undef, 1) let (CI, rt) = @ka_code_typed pow(backend())(A, B, ndrange=1) # test that there is no invoke of overdub @test !any(check_for_overdub, CI.code) end - A = ArrayT{Float64}(undef, 1) + A = ArrayT{Float32}(undef, 1) B = ArrayT{Int32}(undef, 1) let (CI, rt) = @ka_code_typed pow(backend())(A, B, ndrange=1) # test that there is no invoke of overdub diff --git a/test/reflection.jl b/test/reflection.jl index 58c97c57..f2d5bd27 100644 --- a/test/reflection.jl +++ b/test/reflection.jl @@ -16,7 +16,7 @@ end end function test_typed_kernel_dynamic(backend, backend_str, ArrayT) - A = ArrayT(ones(1024, 1024)) + A = ArrayT(ones(Float32, 1024, 1024)) kernel = mul2(backend()) res = if backend == CPU @ka_code_typed kernel(A, ndrange=size(A), workgroupsize=16) @@ -32,7 +32,7 @@ function test_typed_kernel_dynamic(backend, backend_str, ArrayT) end function test_typed_kernel_dynamic_no_info(backend, backend_str, ArrayT) - A = ArrayT(ones(1024, 1024)) + A = ArrayT(ones(Float32, 1024, 1024)) B = similar(A) C = similar(A) kernel = add3(backend()) @@ -46,7 +46,7 @@ function test_typed_kernel_dynamic_no_info(backend, backend_str, ArrayT) end function test_typed_kernel_static(backend, backend_str, ArrayT) - A = ArrayT(ones(1024, 1024)) + A = ArrayT(ones(Float32, 1024, 1024)) kernel = if backend == CPU mul2(backend(), 16) else @@ -62,7 +62,7 @@ function test_typed_kernel_static(backend, backend_str, ArrayT) end function test_typed_kernel_no_optimize(backend, backend_str, ArrayT) - A = ArrayT(ones(1024, 1024)) + A = ArrayT(ones(Float32, 1024, 1024)) kernel = if backend == CPU mul2(backend(), 16) else @@ -75,7 +75,7 @@ function test_typed_kernel_no_optimize(backend, backend_str, ArrayT) end function test_expr_kernel(backend, backend_str, ArrayT) - A = ArrayT(ones(1024, 1024)) + A = ArrayT(ones(Float32, 1024, 1024)) C = similar(A) kernel = if backend == CPU addi(backend()) From 95262067fc347713390591d25d79c2885e13f4be Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 22 Jun 2023 08:43:40 +0200 Subject: [PATCH 4/8] Bump version. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8d119485..8b62fed7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "KernelAbstractions" uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" authors = ["Valentin Churavy and contributors"] -version = "0.9.5" +version = "0.9.6" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From 537efe024932a0f0dd1bb5ce2d8fd1f29b30bb0a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 7 Jul 2023 21:39:56 +0200 Subject: [PATCH 5/8] Fix CUDA CI (#406) --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 4be18bd3..e71ddae7 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -19,7 +19,7 @@ steps: julia -e 'println("+++ :julia: Running tests") using Pkg - Pkg.test("CUDA"; coverage=true, test_args=["kernelabstractions"])' + Pkg.test("CUDA"; coverage=true, test_args=["base/kernelabstractions"])' agents: queue: "juliagpu" cuda: "*" From 703816affd57c2c2dc4eb069c7bcc61cd5e359c5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 11 Jul 2023 10:36:15 +0300 Subject: [PATCH 6/8] Update CI pipelines (#407) --- .buildkite/pipeline.yml | 3 +++ .github/workflows/ci.yml | 1 + 2 files changed, 4 insertions(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index e71ddae7..e4d954fc 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -6,6 +6,7 @@ steps: - "1.6" - "1.7" - "1.8" + - "1.9" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" @@ -32,6 +33,7 @@ steps: setup: version: - "1.8" + - "1.9" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" @@ -59,6 +61,7 @@ steps: setup: version: - "1.8" + - "1.9" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f78ab86..9564cc64 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,7 @@ jobs: - '1.6' - '1.7' - '1.8' + - '1.9' - 'nightly' os: - ubuntu-latest From a53054d5dde1a4abd4bf2e2b8b0b3388d9889e55 Mon Sep 17 00:00:00 2001 From: William Moses Date: Tue, 11 Jul 2023 04:08:37 -0400 Subject: [PATCH 7/8] Enzyme Rules (fwd for all backends, reverse for CPU) (#382) Co-authored-by: Valentin Churavy --- .buildkite/pipeline.yml | 28 ++++++++++++++ Project.toml | 14 ++++++- ext/EnzymeExt.jl | 81 +++++++++++++++++++++++++++++++++++++++ src/KernelAbstractions.jl | 18 +++++++-- src/cpu.jl | 2 +- test/Project.toml | 1 + test/extensions/enzyme.jl | 35 +++++++++++++++++ test/runtests.jl | 7 ++++ test/testsuite.jl | 5 +++ 9 files changed, 185 insertions(+), 6 deletions(-) create mode 100644 ext/EnzymeExt.jl create mode 100644 test/extensions/enzyme.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index e4d954fc..d8018140 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -28,6 +28,34 @@ steps: soft_fail: - exit_status: 3 + - label: "CUDA Enzyme Julia {{matrix.version}}" + matrix: + setup: + version: + - "1.8" + - "1.9" + plugins: + - JuliaCI/julia#v1: + version: "{{matrix.version}}" + - JuliaCI/julia-coverage#v1: + codecov: true + command: | + julia -e 'println("--- :julia: Instantiating project") + using Pkg + Pkg.develop(; path=pwd()) + Pkg.add(["CUDA", "Enzyme"])' || exit 3 + + julia -e 'println("+++ :julia: Running tests") + using CUDA + include("test/extensions/enzyme.jl") + enzyme_testsuite(CUDABackend, CuArray, false)' + agents: + queue: "juliagpu" + cuda: "*" + timeout_in_minutes: 120 + soft_fail: + - exit_status: 3 + - label: "Metal Julia {{matrix.version}}" matrix: setup: diff --git a/Project.toml b/Project.toml index 8b62fed7..7e0322a2 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +Requires = "ae029012-a4dd-5104-9daa-d747884805df" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" @@ -19,9 +20,20 @@ UnsafeAtomicsLLVM = "d80eeb9a-aca5-4d75-85e5-170c8b632249" [compat] Adapt = "0.4, 1.0, 2.0, 3.0" Atomix = "0.1" +EnzymeCore = "0.5" MacroTools = "0.5" +PrecompileTools = "1" +Requires = "1.3" StaticArrays = "0.12, 1.0" UnsafeAtomics = "0.2.1" UnsafeAtomicsLLVM = "0.1" -PrecompileTools = "1" julia = "1.6" + +[extensions] +EnzymeExt = "EnzymeCore" + +[extras] +EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + +[weakdeps] +EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" diff --git a/ext/EnzymeExt.jl b/ext/EnzymeExt.jl new file mode 100644 index 00000000..8f35a31c --- /dev/null +++ b/ext/EnzymeExt.jl @@ -0,0 +1,81 @@ +module EnzymeExt + if isdefined(Base, :get_extension) + using EnzymeCore + using EnzymeCore.EnzymeRules + else + using ..EnzymeCore + using ..EnzymeCore.EnzymeRules + end + import KernelAbstractions: Kernel, StaticSize, launch_config, __groupsize, __groupindex, blocks, mkcontext, CompilerMetadata, CPU + + EnzymeRules.inactive(::typeof(StaticSize), x...) = nothing + + function fwd(ctx, f, args...) + EnzymeCore.autodiff_deferred(Forward, Const(f), Const, Const(ctx), args...) + return nothing + end + + function aug_fwd(ctx, f::FT, ::Val{ModifiedBetween}, subtape, args...) where {ModifiedBetween, FT} + forward, reverse = EnzymeCore.autodiff_deferred_thunk(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), Const{Core.Typeof(f)}, Const, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) + subtape[__groupindex(ctx)] = forward(Const(f), Const(ctx), args...)[1] + return nothing + end + + function rev(ctx, f::FT, ::Val{ModifiedBetween}, subtape, args...) where {ModifiedBetween, FT} + forward, reverse = EnzymeCore.autodiff_deferred_thunk(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), Const{Core.Typeof(f)}, Const, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) + tp = subtape[__groupindex(ctx)] + reverse(Const(f), Const(ctx), args..., tp) + return nothing + end + + function EnzymeRules.forward(func::Const{<:Kernel}, ::Type{Const{Nothing}}, args...; ndrange=nothing, workgroupsize=nothing) + kernel = func.val + f = kernel.f + fwd_kernel = similar(kernel, fwd) + + fwd_kernel(f, args...; ndrange, workgroupsize) + end + + function EnzymeRules.augmented_primal(config::Config, func::Const{<:Kernel{CPU}}, ::Type{Const{Nothing}}, args...; ndrange=nothing, workgroupsize=nothing) + kernel = func.val + f = kernel.f + + ndrange, workgroupsize, iterspace, dynamic = launch_config(kernel, ndrange, workgroupsize) + block = first(blocks(iterspace)) + + ctx = mkcontext(kernel, block, ndrange, iterspace, dynamic) + ctxTy = Core.Typeof(ctx) # CompilerMetadata{ndrange(kernel), Core.Typeof(dynamic)} + + # TODO autodiff_deferred on the func.val + ModifiedBetween = Val((overwritten(config)[1], false, overwritten(config)[2:end]...)) + + FT = Const{Core.Typeof(f)} + + # TODO in KA backends like CUDAKernels, etc have a version with a parent job type + TapeType = EnzymeCore.tape_type(ReverseSplitModified(ReverseSplitWithPrimal, ModifiedBetween), FT, Const, Const{ctxTy}, map(Core.Typeof, args)...) + + subtape = Array{TapeType}(undef, __groupsize(ctx)) + + aug_kernel = similar(kernel, aug_fwd) + + aug_kernel(f, ModifiedBetween, subtape, args...; ndrange, workgroupsize) + + # TODO the fact that ctxTy is type unstable means this is all type unstable. + # Since custom rules require a fixed return type, explicitly cast to Any, rather + # than returning a AugmentedReturn{Nothing, Nothing, T} where T. + + res = AugmentedReturn{Nothing, Nothing, Vector}(nothing, nothing, subtape) + return res + end + + function EnzymeRules.reverse(config::Config, func::Const{<:Kernel}, ::Type{<:EnzymeCore.Annotation}, subtape, args...; ndrange=nothing, workgroupsize=nothing) + kernel = func.val + f = kernel.f + + ModifiedBetween = Val((overwritten(config)[1], false, overwritten(config)[2:end]...)) + + rev_kernel = similar(func.val, rev) + rev_kernel(f, ModifiedBetween, subtape, args...; ndrange, workgroupsize) + return ((nothing for a in args)...,) + end +end diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index de8098ed..335d2cd9 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -153,7 +153,7 @@ the total size you can use `prod(@groupsize())`. macro groupsize() quote $groupsize($(esc(:__ctx__))) - end + end end """ @@ -165,7 +165,7 @@ a tuple corresponding to kernel configuration. macro ndrange() quote $size($ndrange($(esc(:__ctx__)))) - end + end end """ @@ -563,14 +563,14 @@ struct Kernel{Backend, WorkgroupSize<:_Size, NDRange<:_Size, Fun} end function Base.similar(kernel::Kernel{D, WS, ND}, f::F) where {D, WS, ND, F} - Kernel{D, WS, ND, F}(f) + Kernel{D, WS, ND, F}(kernel.backend, f) end workgroupsize(::Kernel{D, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize ndrange(::Kernel{D, WorkgroupSize, NDRange}) where {D, WorkgroupSize,NDRange} = NDRange backend(kernel::Kernel) = kernel.backend -function partition(kernel, ndrange, workgroupsize) +@inline function partition(kernel, ndrange, workgroupsize) static_ndrange = KernelAbstractions.ndrange(kernel) static_workgroupsize = KernelAbstractions.workgroupsize(kernel) @@ -716,4 +716,14 @@ PrecompileTools.@compile_workload begin end end +if !isdefined(Base, :get_extension) +using Requires +end + +@static if !isdefined(Base, :get_extension) + function __init__() + @require EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" include("../ext/EnzymeExt.jl") + end +end + end #module diff --git a/src/cpu.jl b/src/cpu.jl index 545a3b7a..8c3e8afd 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -44,7 +44,7 @@ function (obj::Kernel{CPU})(args...; ndrange=nothing, workgroupsize=nothing, ) __run(obj, ndrange, iterspace, args, dynamic, obj.backend.static) end -function launch_config(kernel::Kernel{CPU}, ndrange, workgroupsize) +@inline function launch_config(kernel::Kernel{CPU}, ndrange, workgroupsize) if ndrange isa Integer ndrange = (ndrange,) end diff --git a/test/Project.toml b/test/Project.toml index 0cc3afd2..231ca958 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/test/extensions/enzyme.jl b/test/extensions/enzyme.jl new file mode 100644 index 00000000..6bf08a64 --- /dev/null +++ b/test/extensions/enzyme.jl @@ -0,0 +1,35 @@ +using Test +using Enzyme +using KernelAbstractions + +@kernel function square!(A) + I = @index(Global, Linear) + @inbounds A[I] *= A[I] +end + +function caller(A, backend) + kernel = square!(backend) + kernel(A, ndrange=size(A)) + synchronize(backend) +end + +function enzyme_testsuite(backend, ArrayT, supports_reverse=true) + @testset "kernels" begin + A = ArrayT{Float64}(undef, 64) + A .= (1:1:64) + dA = ArrayT{Float64}(undef, 64) + dA .= 1 + + if supports_reverse + Enzyme.autodiff(Reverse, caller, Duplicated(A, dA), Const(backend())) + @test all(dA .≈ (2:2:128)) + end + + A .= (1:1:64) + dA .= 1 + + Enzyme.autodiff(Forward, caller, Duplicated(A, dA), Const(backend())) + @test all(dA .≈ 2:2:128) + + end +end diff --git a/test/runtests.jl b/test/runtests.jl index d287d8b4..2572f2aa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -47,3 +47,10 @@ struct NewBackend <: KernelAbstractions.GPU end kernel = my_no_cpu_kernel(backend) @test_throws MethodError kernel() end + +include("extensions/enzyme.jl") +@static if VERSION >= v"1.7.0" + @testset "Enzyme" begin + enzyme_testsuite(CPU, Array) + end +end diff --git a/test/testsuite.jl b/test/testsuite.jl index 3c59f0a1..cd78e76d 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -3,6 +3,10 @@ module Testsuite using ..KernelAbstractions using ..Test +# We can't add test-dependencies withouth breaking backend packages +const Pkg = Base.require(Base.PkgId( + Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg")) + macro conditional_testset(name, skip_tests, expr) esc(quote @testset $name begin @@ -15,6 +19,7 @@ macro conditional_testset(name, skip_tests, expr) end) end + include("test.jl") include("localmem.jl") include("private.jl") From 10b54b02017c36286072397e9017bc340fd8d46f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 11 Jul 2023 04:11:44 -0400 Subject: [PATCH 8/8] Bump patch version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7e0322a2..ee4340b0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "KernelAbstractions" uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" authors = ["Valentin Churavy and contributors"] -version = "0.9.6" +version = "0.9.7" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"