Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix various type-instabilities #329

Merged
merged 9 commits into from
Dec 16, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/rulesets/Base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ function rrule(::typeof(reshape), A::AbstractArray, dims::Int...)
A_dims = size(A)
function reshape_pullback(Ȳ)
∂A = reshape(Ȳ, A_dims)
return (NO_FIELDS, ∂A, fill(DoesNotExist(), length(dims))...)
∂dims = broadcast(_ -> DoesNotExist(), dims)
return (NO_FIELDS, ∂A, ∂dims...)
end
return reshape(A, dims...), reshape_pullback
end
Expand Down
12 changes: 7 additions & 5 deletions src/rulesets/Base/fastmath_able.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ let
## abs
function frule((_, Δx), ::typeof(abs), x::Union{Real, Complex})
Ω = abs(x)
signx = x isa Real ? sign(x) : x / ifelse(iszero(x), one(Ω), Ω)
# `ifelse` is applied only to denominator to ensure type-stability.
signx = x isa Real ? sign(x) : x / ifelse(iszero(x), one(Ω), Ω)
return Ω, _realconjtimes(signx, Δx)
end

Expand Down Expand Up @@ -108,7 +108,8 @@ let
function frule((_, Δx), ::typeof(angle), x)
Ω = angle(x)
# `ifelse` is applied only to denominator to ensure type-stability.
∂Ω = _imagconjtimes(x, Δx) / ifelse(iszero(x), one(x), abs2(x))
n = ifelse(iszero(x), one(real(x)), abs2(x))
∂Ω = _imagconjtimes(x, Δx) / n
return Ω, ∂Ω
end

Expand All @@ -127,8 +128,9 @@ let
function angle_pullback(ΔΩ)
x, y = reim(z)
Δu, Δv = reim(ΔΩ)
return (NO_FIELDS, (-y + im*x)*Δu/ifelse(iszero(z), one(z), abs2(z)))
# `ifelse` is applied only to denominator to ensure type-stability.
n = ifelse(iszero(z), one(real(z)), abs2(z))
return (NO_FIELDS, (-y + im*x)*Δu/n)
end
return angle(z), angle_pullback
end
Expand Down Expand Up @@ -185,14 +187,14 @@ let
# `sign`

function frule((_, Δx), ::typeof(sign), x)
n = ifelse(iszero(x), one(x), abs(x))
n = ifelse(iszero(x), one(real(x)), abs(x))
Ω = x isa Real ? sign(x) : x / n
∂Ω = Ω * (_imagconjtimes(Ω, Δx) / n) * im
return Ω, ∂Ω
end

function rrule(::typeof(sign), x)
n = ifelse(iszero(x), one(x), abs(x))
n = ifelse(iszero(x), one(real(x)), abs(x))
Ω = x isa Real ? sign(x) : x / n
function sign_pullback(ΔΩ)
∂x = Ω * (_imagconjtimes(Ω, ΔΩ) / n) * im
Expand Down
3 changes: 2 additions & 1 deletion src/rulesets/Base/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ function rrule(::typeof(getindex), x::Array{<:Number}, inds...)
@thunk(getindex_add!(zero(x))),
getindex_add!
)
return (NO_FIELDS, x̄, (DoesNotExist() for _ in inds)...)
īnds = broadcast(_ -> DoesNotExist(), inds)
return (NO_FIELDS, x̄, īnds...)
end

return y, getindex_pullback
Expand Down
14 changes: 6 additions & 8 deletions src/rulesets/LinearAlgebra/blas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@ rrule(::typeof(BLAS.dot), x, y) = rrule(dot, x, y)

function rrule(::typeof(BLAS.dot), n, X, incx, Y, incy)
Ω = BLAS.dot(n, X, incx, Y, incy)
function blas_dot_pullback(::Zero)
return (NO_FIELDS, DoesNotExist(), Zero(), DoesNotExist(), Zero(), DoesNotExist())
end
function blas_dot_pullback(ΔΩ)
if ΔΩ isa Zero
sethaxen marked this conversation as resolved.
Show resolved Hide resolved
∂X = Zero()
∂Y = Zero()
else
ΔΩ = extern(ΔΩ)
∂X = @thunk scal!(n, ΔΩ, blascopy!(n, Y, incy, _zeros(X), incx), incx)
∂Y = @thunk scal!(n, ΔΩ, blascopy!(n, X, incx, _zeros(Y), incy), incy)
end
ΔΩ = extern(ΔΩ)
sethaxen marked this conversation as resolved.
Show resolved Hide resolved
∂X = @thunk scal!(n, ΔΩ, blascopy!(n, Y, incy, _zeros(X), incx), incx)
∂Y = @thunk scal!(n, ΔΩ, blascopy!(n, X, incx, _zeros(Y), incy), incy)
return (NO_FIELDS, DoesNotExist(), ∂X, DoesNotExist(), ∂Y, DoesNotExist())
end
return Ω, blas_dot_pullback
Expand Down
15 changes: 12 additions & 3 deletions src/rulesets/LinearAlgebra/norm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ end

function _normp_back_x(x, p, y, Δy)
c = real(Δy) / y
∂x = broadcast(x) do xi
∂x = similar(x)
broadcast!(∂x, x) do xi
a = norm(xi)
∂xi = xi * ((a / y)^(p - 2) * c)
return ifelse(isfinite(∂xi), ∂xi, zero(∂xi))
Expand Down Expand Up @@ -181,7 +182,11 @@ function rrule(
return y, norm1_pullback
end

_norm1_back(x, y, Δy) = sign.(x) .* real(Δy)
function _norm1_back(x, y, Δy)
∂x = similar(x)
∂x .= sign.(x) .* real(Δy)
return ∂x
end
Comment on lines +185 to +189
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels bad that we have to do this.
Should we open an issue on JuliaLang/julia about it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I think this is necessarily type-unstable, e.g.

julia> x = Diagonal([1.0, 2.0, 3.0]);

julia> x .* 0
3×3 Diagonal{Float64,Array{Float64,1}}:
 0.0       
     0.0   
         0.0

julia> x .* 10
3×3 Diagonal{Float64,Array{Float64,1}}:
 10.0         
      20.0    
           30.0

julia> x .* Inf
3×3 Array{Float64,2}:
  Inf  NaN   NaN
 NaN    Inf  NaN
 NaN   NaN    Inf

julia> x .* NaN
3×3 Array{Float64,2}:
 NaN  NaN  NaN
 NaN  NaN  NaN
 NaN  NaN  NaN

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this breaks gradient(norm, [1,2,3]).


#####
##### `norm2`
Expand All @@ -206,7 +211,11 @@ function _norm2_forward(x, Δx, y)
∂y = real(dot(x, Δx)) * pinv(y)
return ∂y
end
_norm2_back(x, y, Δy) = x .* (real(Δy) * pinv(y))
function _norm2_back(x, y, Δy)
∂x = similar(x)
∂x .= x .* (real(Δy) * pinv(y))
return ∂x
end

#####
##### `normalize`
Expand Down
6 changes: 2 additions & 4 deletions src/rulesets/LinearAlgebra/structured.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ const SquareMatrix{T} = Union{Diagonal{T}, AbstractTriangular{T}}
function rrule(::typeof(/), A::AbstractMatrix{<:Real}, B::T) where T<:SquareMatrix{<:Real}
Y = A / B
function slash_pullback(Ȳ)
S = T.name.wrapper
∂A = @thunk Ȳ / B'
∂B = @thunk S(-Y' * (Ȳ / B'))
∂B = @thunk _unionallof(T)(-Y' * (Ȳ / B'))
return (NO_FIELDS, ∂A, ∂B)
end
return Y, slash_pullback
Expand All @@ -19,8 +18,7 @@ end
function rrule(::typeof(\), A::T, B::AbstractVecOrMat{<:Real}) where T<:SquareMatrix{<:Real}
Y = A \ B
function backslash_pullback(Ȳ)
S = T.name.wrapper
∂A = @thunk S(-(A' \ Ȳ) * Y')
∂A = @thunk _unionallof(T)(-(A' \ Ȳ) * Y')
∂B = @thunk A' \ Ȳ
return NO_FIELDS, ∂A, ∂B
end
Expand Down
2 changes: 1 addition & 1 deletion src/rulesets/LinearAlgebra/symmetric.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ end
function rrule(T::Type{<:LinearAlgebra.HermOrSym}, A::AbstractMatrix, uplo)
Ω = T(A, uplo)
function HermOrSym_pullback(ΔΩ)
return (NO_FIELDS, _symherm_back(T, ΔΩ, Ω.uplo), DoesNotExist())
return (NO_FIELDS, _symherm_back(typeof(Ω), ΔΩ, Ω.uplo), DoesNotExist())
end
return Ω, HermOrSym_pullback
end
Expand Down
2 changes: 2 additions & 0 deletions src/rulesets/LinearAlgebra/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ function _eyesubx!(X::AbstractMatrix)
end

_extract_imag(x) = complex(0, imag(x))

_unionallof(::Type{T}) where {T} = T.name.wrapper
sethaxen marked this conversation as resolved.
Show resolved Hide resolved