Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check status of broken tests #742

Merged
merged 3 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions .buildkite/testing.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
steps:
- group: ":julia: CUDA GPU"
steps:
- label: ":julia: Julia {{matrix.julia}} + CUDA GPU"
- label: ":julia: Julia {{matrix.julia}} + {{matrix.testing_group}} + CUDA GPU"
plugins:
- JuliaCI/julia#v1:
version: "{{matrix.julia}}"
Expand All @@ -17,12 +17,16 @@ steps:
cuda: "*"
env:
BACKEND_GROUP: "CUDA"
LUX_TEST_GROUP: "{{matrix.testing_group}}"
if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/
timeout_in_minutes: 240
timeout_in_minutes: 60
matrix:
setup:
julia:
- "1"
testing_group:
- "!distributed"
- "distributed"

- group: ":telescope: Downstream CUDA"
steps:
Expand All @@ -42,7 +46,7 @@ steps:
env:
RETESTITEMS_NWORKERS: 2
if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/ && build.pull_request.labels includes "run downstream test"
timeout_in_minutes: 240
timeout_in_minutes: 60
matrix:
setup:
repo:
Expand All @@ -52,7 +56,7 @@ steps:

- group: ":julia: AMD GPU"
steps:
- label: ":julia: Julia: {{matrix.julia}} + AMD GPU"
- label: ":julia: Julia: {{matrix.julia}} + {{matrix.testing_group}} + AMD GPU"
plugins:
- JuliaCI/julia#v1:
version: "{{matrix.julia}}"
Expand All @@ -68,6 +72,8 @@ steps:
JULIA_AMDGPU_HIP_MUST_LOAD: "1"
JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
BACKEND_GROUP: "AMDGPU"
LUX_TEST_GROUP: "{{matrix.testing_group}}"
RETESTITEMS_NWORKERS: 2
agents:
queue: "juliagpu"
rocm: "*"
Expand All @@ -78,6 +84,9 @@ steps:
setup:
julia:
- "1"
testing_group:
- "!distributed"
- "distributed"

- group: ":telescope: Downstream AMD GPU"
steps:
Expand Down
3 changes: 0 additions & 3 deletions codecov.yml

This file was deleted.

9 changes: 6 additions & 3 deletions ext/LuxForwardDiffExt/utils.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Low-Level functions
@inline function Lux.__partials(::Type{Tag}, x, i) where {Tag}
x isa ForwardDiff.Dual && return ForwardDiff.partials(Tag, x, i)
x isa AbstractArray && return ForwardDiff.partials.(Tag, x, i)
if x isa AbstractArray
bfn(xᵢ, iᵢ) = ForwardDiff.partials(Tag, xᵢ, iᵢ)
return bfn.(x, i)
end
map_fn = @closure(xᵢ->Lux.__partials(Tag, xᵢ, i))
x isa Tuple && return map(map_fn, x)
x isa NamedTuple && return NamedTuple{keys(x)}(map(map_fn, values(x)))
Expand All @@ -12,8 +15,8 @@ end

@inline function Lux.__dualify(::Type{Tag}, ::Type{T}, x, u) where {Tag, T}
if x isa AbstractArray
return ForwardDiff.Dual{
Tag, T, 1}.(x, ForwardDiff.Partials{1, T}.(tuple.(reshape(u, size(x)))))
bfn(xᵢ, uᵢ) = ForwardDiff.Dual{Tag, T, 1}(xᵢ, ForwardDiff.Partials{1, T}(uᵢ))
return bfn.(x, tuple.(reshape(u, size(x))))
end
x isa Tuple && return map((xᵢ, uᵢ) -> Lux.__dualify(Tag, T, xᵢ, uᵢ), x, u)
x isa NamedTuple &&
Expand Down
3 changes: 1 addition & 2 deletions src/layers/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,7 @@ function WrappedFunction(f::F) where {F}
# Not a depwarn but helpful to call this
Base.depwarn("The current default of `:direct_call` will be replaced with \
`:runtime_check` from v0.6). Please make sure that the assumptions of \
this function are correct or specific \
`WrappedFunction{:direct_call}(f)`",
this function are correct or specify `WrappedFunction{:direct_call}(f)`",
:WrappedFunction)
return WrappedFunction{:direct_call}(f)
end
Expand Down
16 changes: 0 additions & 16 deletions test/core_tests.jl

This file was deleted.

7 changes: 1 addition & 6 deletions test/helpers/batched_ad_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
rng = StableRNG(12345)

@testset "$mode" for (mode, aType, dev, ongpu) in MODES
# FIXME: AMDGPU takes too long right now
mode === "amdgpu" && continue

models = (
Chain(Conv((3, 3), 2 => 4, gelu; pad=SamePad()),
Conv((3, 3), 4 => 2, gelu; pad=SamePad()), FlattenLayer(), Dense(18 => 2)),
Expand Down Expand Up @@ -84,16 +81,14 @@ end
rng = StableRNG(12345)

@testset "$mode" for (mode, aType, dev, ongpu) in MODES
# FIXME: AMDGPU takes too long right now
mode === "amdgpu" && continue

models = (
Chain(Conv((3, 3), 2 => 4, gelu; pad=SamePad()),
Conv((3, 3), 4 => 2, gelu; pad=SamePad()), FlattenLayer(), Dense(18 => 2)),
Chain(Dense(2, 4, gelu), Dense(4, 2)))
Xs = (aType(randn(rng, Float32, 3, 3, 2, 4)), aType(randn(rng, Float32, 2, 4)))

for (model, X) in zip(models, Xs), backend in (AutoZygote(), AutoForwardDiff())
model = maybe_rewrite_to_crosscor(mode, model)
ps, st = Lux.setup(rng, model) |> dev

function loss_function_batched(model, x, ps, st)
Expand Down
28 changes: 9 additions & 19 deletions test/helpers/nestedad_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
rng = StableRNG(1234)

@testset "$mode" for (mode, aType, dev, ongpu) in MODES
# FIXME: AMDGPU takes too long right now
mode === "amdgpu" && continue

Xs = (aType(randn(rng, Float32, 3, 3, 2, 4)), aType(randn(rng, Float32, 2, 4)),
aType(randn(rng, Float32, 2, 4)), aType(randn(rng, Float32, 3, 3, 2, 4)))
models = (
Expand All @@ -22,6 +19,7 @@
BatchNorm(2), FlattenLayer(), Dense(18 => 1)))

for (X, model) in zip(Xs, models)
model = maybe_rewrite_to_crosscor(mode, model)
ps, st = Lux.setup(rng, model) |> dev

# smodel | ForwardDiff.jacobian
Expand Down Expand Up @@ -52,8 +50,9 @@
(loss_function1, loss_function2, loss_function3, loss_function4)

for loss_fn in loss_fns
@test_nowarn loss_fn(model, X, ps, st)
@test loss_fn(model, X, ps, st) isa Number
l = loss_fn(model, X, ps, st)
@test l isa Number
@test isfinite(l) && !isnan(l)

_, ∂x, ∂ps, _ = Zygote.gradient(loss_fn, model, X, ps, st)

Expand Down Expand Up @@ -84,9 +83,6 @@ end
rng = StableRNG(1234)

@testset "$mode" for (mode, aType, dev, ongpu) in MODES
# FIXME: AMDGPU takes too long right now
mode === "amdgpu" && continue

Xs = (aType(randn(rng, Float32, 3, 3, 2, 4)), aType(randn(rng, Float32, 2, 4)),
aType(randn(rng, Float32, 2, 4)), aType(randn(rng, Float32, 3, 3, 2, 4)))
models = (
Expand All @@ -100,6 +96,7 @@ end
BatchNorm(2), FlattenLayer(), Dense(18 => 1)))

for (X, model) in zip(Xs, models)
model = maybe_rewrite_to_crosscor(mode, model)
ps, st = Lux.setup(rng, model)
ps = ps |> ComponentArray |> dev
st = st |> dev
Expand Down Expand Up @@ -134,8 +131,9 @@ end
(loss_function1, loss_function2, loss_function3, loss_function4)

for loss_fn in loss_fns
@test_nowarn loss_fn(model, X, ps, st)
@test loss_fn(model, X, ps, st) isa Number
l = loss_fn(model, X, ps, st)
@test l isa Number
@test isfinite(l) && !isnan(l)

_, ∂x, ∂ps, _ = Zygote.gradient(loss_fn, model, X, ps, st)

Expand Down Expand Up @@ -166,9 +164,6 @@ end
rng = StableRNG(1234)

@testset "$mode" for (mode, aType, dev, ongpu) in MODES
# FIXME: AMDGPU takes too long right now
mode === "amdgpu" && continue

@testset "Structured Matrix: Issue LuxDL/Lux.jl#602" begin
model = @compact(; potential=Dense(5 => 5, gelu)) do x
@return reshape(diag(only(Zygote.jacobian(potential, x))), size(x))
Expand Down Expand Up @@ -206,16 +201,14 @@ end
rng = StableRNG(1234)

@testset "$mode" for (mode, aType, dev, ongpu) in MODES
# FIXME: AMDGPU takes too long right now
mode === "amdgpu" && continue

models = (
Chain(Conv((3, 3), 2 => 4, gelu; pad=SamePad()), BatchNorm(4),
Conv((3, 3), 4 => 1, gelu; pad=SamePad())),
Chain(Dense(2, 4, gelu), Dense(4, 1)))
Xs = (aType(randn(rng, Float32, 3, 3, 2, 4)), aType(randn(rng, Float32, 2, 4)))

for (model, X) in zip(models, Xs)
model = maybe_rewrite_to_crosscor(mode, model)
ps, st = Lux.setup(rng, model) |> dev

vjp_input = first(model(X, ps, st))
Expand Down Expand Up @@ -278,9 +271,6 @@ end
rng = StableRNG(1234)

@testset "$mode" for (mode, aType, dev, ongpu) in MODES
# FIXME: AMDGPU takes too long right now
mode === "amdgpu" && continue

x = rand(rng, 3, 3) |> aType
v = vec(rand(rng, 3, 3)) |> aType

Expand Down
11 changes: 7 additions & 4 deletions test/layers/containers_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

@testset "$mode" for (mode, aType, device, ongpu) in MODES
@testset "zero sum" begin
layer = SkipConnection(WrappedFunction(zero), (a, b) -> a .+ b)
layer = SkipConnection(
WrappedFunction{:direct_call}(Broadcast.BroadcastFunction(zero)), .+)
display(layer)
ps, st = Lux.setup(rng, layer) .|> device
x = randn(rng, 10, 10, 10, 10) |> aType
Expand All @@ -13,7 +14,7 @@

@jet layer(x, ps, st)
__f = x -> sum(first(layer(x, ps, st)))
@eval @test_gradients $__f $x atol=1.0f-3 rtol=1.0f-3 reverse_diff_broken=true gpu_testing=$ongpu
@eval @test_gradients $__f $x atol=1.0f-3 rtol=1.0f-3 gpu_testing=$ongpu
end

@testset "concat size" begin
Expand All @@ -36,7 +37,9 @@ end

@testset "$mode" for (mode, aType, device, ongpu) in MODES
@testset "zero sum" begin
layer = Parallel(+, WrappedFunction(zero), NoOpLayer())
layer = Parallel(
+, WrappedFunction{:direct_call}(Broadcast.BroadcastFunction(zero)),
NoOpLayer())
@test :layer_1 in keys(layer) && :layer_2 in keys(layer)
display(layer)
ps, st = Lux.setup(rng, layer) .|> device
Expand All @@ -46,7 +49,7 @@ end

@jet layer(x, ps, st)
__f = x -> sum(first(layer(x, ps, st)))
@eval @test_gradients $__f $x atol=1.0f-3 rtol=1.0f-3 reverse_diff_broken=true gpu_testing=$ongpu
@eval @test_gradients $__f $x atol=1.0f-3 rtol=1.0f-3 gpu_testing=$ongpu
end

@testset "concat size" begin
Expand Down
Loading
Loading