Skip to content

Commit

Permalink
Massive reformat
Browse files Browse the repository at this point in the history
  • Loading branch information
Saransh-cpp committed Oct 16, 2022
1 parent e502dba commit 3cea17e
Show file tree
Hide file tree
Showing 47 changed files with 5,189 additions and 4,866 deletions.
117 changes: 61 additions & 56 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,62 +1,67 @@
using Documenter, Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore

using Documenter, Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote,
ChainRulesCore

DocMeta.setdocmeta!(Flux, :DocTestSetup, :(using Flux); recursive = true)

makedocs(
modules = [Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Base],
doctest = false,
sitename = "Flux",
# strict = [:cross_references,],
pages = [
"Getting Started" => [
"Welcome" => "index.md",
"Quick Start" => "models/quickstart.md",
"Fitting a Line" => "models/overview.md",
"Gradients and Layers" => "models/basics.md",
],
"Building Models" => [
"Built-in Layers 📚" => "models/layers.md",
"Recurrence" => "models/recurrence.md",
"Activation Functions 📚" => "models/activation.md",
"NNlib.jl 📚 (`softmax`, `conv`, ...)" => "models/nnlib.md",
],
"Handling Data" => [
"MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
"OneHotArrays.jl 📚 (`onehot`, ...)" => "data/onehot.md",
],
"Training Models" => [
"Training" => "training/training.md",
"Regularisation" => "models/regularisation.md",
"Loss Functions 📚" => "models/losses.md",
"Optimisation Rules 📚" => "training/optimisers.md", # TODO move optimiser intro up to Training
"Callback Helpers 📚" => "training/callbacks.md",
"Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
],
"Model Tools" => [
"GPU Support" => "gpu.md",
"Saving & Loading" => "saving.md",
"Shape Inference 📚" => "outputsize.md",
"Weight Initialisation 📚" => "utilities.md",
"Flat vs. Nested 📚" => "destructure.md",
"Functors.jl 📚 (`fmap`, ...)" => "models/functors.md",
makedocs(modules = [
Flux,
NNlib,
Functors,
MLUtils,
BSON,
Optimisers,
OneHotArrays,
Zygote,
ChainRulesCore,
Base,
],
"Performance Tips" => "performance.md",
"Flux's Ecosystem" => "ecosystem.md",
"Tutorials" => [ # TODO, maybe
"Custom Layers" => "models/advanced.md", # TODO move freezing to Training
doctest = false,
sitename = "Flux",
# strict = [:cross_references,],
pages = [
"Getting Started" => [
"Welcome" => "index.md",
"Quick Start" => "models/quickstart.md",
"Fitting a Line" => "models/overview.md",
"Gradients and Layers" => "models/basics.md",
],
"Building Models" => [
"Built-in Layers 📚" => "models/layers.md",
"Recurrence" => "models/recurrence.md",
"Activation Functions 📚" => "models/activation.md",
"NNlib.jl 📚 (`softmax`, `conv`, ...)" => "models/nnlib.md",
],
"Handling Data" => [
"MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
"OneHotArrays.jl 📚 (`onehot`, ...)" => "data/onehot.md",
],
"Training Models" => [
"Training" => "training/training.md",
"Regularisation" => "models/regularisation.md",
"Loss Functions 📚" => "models/losses.md",
"Optimisation Rules 📚" => "training/optimisers.md", # TODO move optimiser intro up to Training
"Callback Helpers 📚" => "training/callbacks.md",
"Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
],
"Model Tools" => [
"GPU Support" => "gpu.md",
"Saving & Loading" => "saving.md",
"Shape Inference 📚" => "outputsize.md",
"Weight Initialisation 📚" => "utilities.md",
"Flat vs. Nested 📚" => "destructure.md",
"Functors.jl 📚 (`fmap`, ...)" => "models/functors.md",
],
"Performance Tips" => "performance.md",
"Flux's Ecosystem" => "ecosystem.md",
"Tutorials" => [ # TODO, maybe
"Custom Layers" => "models/advanced.md", # TODO move freezing to Training
],
],
],
format = Documenter.HTML(
sidebar_sitename = false,
analytics = "UA-36890222-9",
assets = ["assets/flux.css"],
prettyurls = get(ENV, "CI", nothing) == "true"
),
)
format = Documenter.HTML(sidebar_sitename = false,
analytics = "UA-36890222-9",
assets = ["assets/flux.css"],
prettyurls = get(ENV, "CI", nothing) == "true"))

deploydocs(
repo = "github.com/FluxML/Flux.jl.git",
target = "build",
push_preview = true
)
deploydocs(repo = "github.com/FluxML/Flux.jl.git",
target = "build",
push_preview = true)
25 changes: 14 additions & 11 deletions perf/bench_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,39 @@ using Flux
using CUDA
using Zygote: pullback, ignore


fw(m, x) = m(x)
bw(back) = back(1f0)
bw(back) = back(1.0f0)
fwbw(m, ps, x) = gradient(() -> sum(fw(m, x)), ps)
pb(m, ps, x) = pullback(() -> sum(fw(m, x)), ps)

function run_benchmark(model, x; cuda=true)

if cuda
function run_benchmark(model, x; cuda = true)
if cuda
model = model |> gpu
x = x |> gpu
end

ps = Flux.params(model)
y, back = pb(model, ps, x)

y, back = pb(model, ps, x)

if cuda
CUDA.allowscalar(false)
# CUDA.device!(3)
println(" forward")
fw(model, x); GC.gc(); CUDA.reclaim(); #warmup
fw(model, x)
GC.gc()
CUDA.reclaim() #warmup
@btime CUDA.@sync(fw($model, $x)) teardown=(GC.gc(); CUDA.reclaim())

println(" backward")
bw(back); GC.gc(); CUDA.reclaim(); #warmup
bw(back)
GC.gc()
CUDA.reclaim() #warmup
@btime CUDA.@sync(bw($back)) teardown=(GC.gc(); CUDA.reclaim())

println(" forw and back")
fwbw(model, ps, x); GC.gc(); CUDA.reclaim(); #warmup
fwbw(model, ps, x)
GC.gc()
CUDA.reclaim() #warmup
@btime CUDA.@sync(fwbw($model, $ps, $x)) teardown=(GC.gc(); CUDA.reclaim())
else
println(" forward")
Expand Down
6 changes: 3 additions & 3 deletions perf/conv.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
for ch in [1, 3, 16, 64]
x = rand(Float32, 64, 64, ch, 64)
model = Conv((3,3), ch=>ch)
model = Conv((3, 3), ch => ch)
println("CPU ch=$ch")
run_benchmark(model, x, cuda=false)
run_benchmark(model, x, cuda = false)
println("CUDA ch=$ch")
run_benchmark(model, x, cuda=true)
run_benchmark(model, x, cuda = true)
end
4 changes: 2 additions & 2 deletions perf/dense.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ for n in [2, 20, 200, 2000]
x = randn(Float32, n, n)
model = Dense(n, n)
println("CPU n=$n")
run_benchmark(model, x, cuda=false)
run_benchmark(model, x, cuda = false)
println("CUDA n=$n")
run_benchmark(model, x, cuda=true)
run_benchmark(model, x, cuda = true)
end
78 changes: 39 additions & 39 deletions perf/recurrent.jl
Original file line number Diff line number Diff line change
@@ -1,62 +1,62 @@


struct RNNWrapper{T}
rnn::T
rnn::T
end
Flux.@functor RNNWrapper

# Need to specialize for RNNWrapper.
fw(r::RNNWrapper, X::Vector{<:AbstractArray}) = begin
Flux.reset!(r.rnn)
[r.rnn(x) for x in X]
Flux.reset!(r.rnn)
[r.rnn(x) for x in X]
end

fw(r::RNNWrapper, X) = begin
Flux.reset!(r.rnn)
r.rnn(X)
Flux.reset!(r.rnn)
r.rnn(X)
end

fwbw(r::RNNWrapper, ps, X::Vector{<:AbstractArray}) = gradient(ps) do
y = fw(r, X)
sum(sum(y))
end
fwbw(r::RNNWrapper, ps, X::Vector{<:AbstractArray}) =
gradient(ps) do
y = fw(r, X)
return sum(sum(y))
end

pb(r::RNNWrapper, ps, X::Vector{<:AbstractArray}) = pullback(ps) do
y = fw(r, X)
sum(sum(y))
end
pb(r::RNNWrapper, ps, X::Vector{<:AbstractArray}) =
pullback(ps) do
y = fw(r, X)
return sum(sum(y))
end

function rnn_benchmark_sweep(data_creator::Function, rnn_type)
for n in [2, 20, 200, 1000], ts in [1, 4, 16, 64]
x, x_n = data_creator(n, ts)
model = RNNWrapper(rnn_type(n, n))

println("$rnn_type $x_n CPU n=$n, ts=$ts")
run_benchmark(model, x, cuda=false)

println("$rnn_type $x_n CUDA n=$n, ts=$ts")
try
run_benchmark(model, x, cuda=true)
catch ex
@show typeof(ex)
if ex isa OutOfGPUMemoryError
@warn "Not enough GPU memory to run test"
else
rethrow(ex)
end
for n in [2, 20, 200, 1000], ts in [1, 4, 16, 64]
x, x_n = data_creator(n, ts)
model = RNNWrapper(rnn_type(n, n))

println("$rnn_type $x_n CPU n=$n, ts=$ts")
run_benchmark(model, x, cuda = false)

println("$rnn_type $x_n CUDA n=$n, ts=$ts")
try
run_benchmark(model, x, cuda = true)
catch ex
@show typeof(ex)
if ex isa OutOfGPUMemoryError
@warn "Not enough GPU memory to run test"
else
rethrow(ex)
end
end
end
end
end

for rnn_type in [Flux.RNN, Flux.GRU, Flux.LSTM]
rnn_benchmark_sweep(rnn_type) do n, ts
[randn(Float32, n, n) for _ in 1:ts], "Vec"
end
rnn_benchmark_sweep(rnn_type) do n, ts
return [randn(Float32, n, n) for _ in 1:ts], "Vec"
end
end

for rnn_type in [Flux.RNN, Flux.GRU, Flux.LSTM]
rnn_benchmark_sweep(rnn_type) do n, ts
randn(Float32, n, n, ts), "Block"
end
rnn_benchmark_sweep(rnn_type) do n, ts
return randn(Float32, n, n, ts), "Block"
end
end

82 changes: 40 additions & 42 deletions perf/vgg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,50 +6,48 @@ using CUDA
using Zygote: pullback

function vgg16()
Chain(
Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(64),
Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(64),
MaxPool((2,2)),
Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(128),
Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(128),
MaxPool((2,2)),
Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(256),
Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(256),
Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(256),
MaxPool((2,2)),
Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
MaxPool((2,2)),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
BatchNorm(512),
MaxPool((2,2)),
flatten,
Dense(512, 4096, relu),
Dropout(0.5),
Dense(4096, 4096, relu),
Dropout(0.5),
Dense(4096, 10)
)
return Chain(Conv((3, 3), 3 => 64, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(64),
Conv((3, 3), 64 => 64, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(64),
MaxPool((2, 2)),
Conv((3, 3), 64 => 128, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(128),
Conv((3, 3), 128 => 128, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(128),
MaxPool((2, 2)),
Conv((3, 3), 128 => 256, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(256),
Conv((3, 3), 256 => 256, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(256),
Conv((3, 3), 256 => 256, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(256),
MaxPool((2, 2)),
Conv((3, 3), 256 => 512, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(512),
MaxPool((2, 2)),
Conv((3, 3), 512 => 512, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(512),
Conv((3, 3), 512 => 512, relu, pad = (1, 1), stride = (1, 1)),
BatchNorm(512),
MaxPool((2, 2)),
flatten,
Dense(512, 4096, relu),
Dropout(0.5),
Dense(4096, 4096, relu),
Dropout(0.5),
Dense(4096, 10))
end

let model=vgg16(), x=rand(Float32, 32, 32, 3, 64)
let model = vgg16(), x = rand(Float32, 32, 32, 3, 64)
println("CPU benchmark")
run_benchmark(model, x, cuda=false)
run_benchmark(model, x, cuda = false)
println("CUDA benchmark")
run_benchmark(model, x, cuda=true)
run_benchmark(model, x, cuda = true)
end
Loading

0 comments on commit 3cea17e

Please sign in to comment.