-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixes gradient, Jacobian, Hessian, and vjp tests #2
Changes from all commits
512fd2d
caa72e1
4a763e3
b6f4804
5ed5c4e
2991b8a
ddf2863
4269f83
959fa01
5102f01
6941f5c
c744f92
5c0a147
48af108
a9f1538
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,16 +31,32 @@ primalvalue(x::Tuple) = map(primalvalue, x) | |
primalvalue(x) = x | ||
|
||
function derivative(ab::AbstractBackend, f, xs::Number...) | ||
return getindex.(jacobian(lowest(ab), f, xs...), 1) | ||
der = getindex.(jacobian(lowest(ab), f, xs...), 1) | ||
if der isa Tuple | ||
return der | ||
else | ||
return (der,) | ||
end | ||
end | ||
|
||
function gradient(ab::AbstractBackend, f, xs...) | ||
return adjoint.(jacobian(lowest(ab), f, xs...)) | ||
return reshape.(adjoint.(jacobian(lowest(ab), f, xs...)),size.(xs)) | ||
end | ||
function jacobian(ab::AbstractBackend, f, xs...) end | ||
function hessian(ab::AbstractBackend, f, xs...) | ||
return jacobian(secondlowest(ab), (xs...,) -> begin | ||
gradient(lowest(ab), f, xs...) | ||
end, xs...) | ||
xss = collect((xs...,)) | ||
counter = 0 | ||
# gradient returns tuple of gradient values with respect to inputs x,y ∈ xs | ||
# Hessian is the Jacobian of the individual gradients, i.e., the tuple of matrices | ||
# defined by ∂x∂x f, ∂y∂y f, in the case of a scalar valued function `f`. | ||
hess = map((xs...,)) do x | ||
counter += 1 | ||
_f = _x->f(setindex!(deepcopy(xss),_x,counter)...) | ||
return jacobian(secondlowest(ab),(x,)-> begin | ||
return gradient(lowest(ab), _f, x) | ||
end, x)[1] | ||
end | ||
return hess | ||
end | ||
|
||
function value_and_derivative(ab::AbstractBackend, f, xs::Number...) | ||
|
@@ -49,11 +65,15 @@ function value_and_derivative(ab::AbstractBackend, f, xs::Number...) | |
end | ||
function value_and_gradient(ab::AbstractBackend, f, xs...) | ||
value, jacs = value_and_jacobian(lowest(ab), f, xs...) | ||
return value, adjoint.(jacs) | ||
return value, reshape.(adjoint.(jacs),size.(xs)) | ||
end | ||
function value_and_jacobian(ab::AbstractBackend, f, xs...) | ||
local value | ||
primalcalled = false | ||
if lowest(ab) isa AbstractFiniteDifference | ||
value = primalvalue(ab, nothing, f, xs) | ||
primalcalled = true | ||
end | ||
jacs = jacobian(lowest(ab), (_xs...,) -> begin | ||
v = f(_xs...) | ||
if !primalcalled | ||
|
@@ -62,11 +82,16 @@ function value_and_jacobian(ab::AbstractBackend, f, xs...) | |
end | ||
return v | ||
end, xs...) | ||
|
||
return value, jacs | ||
end | ||
function value_and_hessian(ab::AbstractBackend, f, xs...) | ||
local value | ||
primalcalled = false | ||
if ab isa AbstractFiniteDifference | ||
value = primalvalue(ab, nothing, f, xs) | ||
primalcalled = true | ||
end | ||
hess = jacobian(secondlowest(ab), (_xs...,) -> begin | ||
v, g = value_and_gradient(lowest(ab), f, _xs...) | ||
if !primalcalled | ||
|
@@ -145,6 +170,10 @@ function value_and_pushforward_function( | |
@assert ds isa Tuple && length(ds) == length(xs) | ||
local value | ||
primalcalled = false | ||
if ab isa AbstractFiniteDifference | ||
value = primalvalue(ab, nothing, f, xs) | ||
primalcalled = true | ||
end | ||
pf = pushforward_function(lowest(ab), (_xs...,) -> begin | ||
vs = f(_xs...) | ||
if !primalcalled | ||
|
@@ -199,6 +228,10 @@ function value_and_pullback_function( | |
return (ws) -> begin | ||
local value | ||
primalcalled = false | ||
if ab isa AbstractFiniteDifference | ||
value = primalvalue(ab, nothing, f, xs) | ||
primalcalled = true | ||
end | ||
if ws === nothing | ||
vs = f(xs...) | ||
if !primalcalled | ||
|
@@ -224,63 +257,169 @@ struct LazyDerivative{B, F, X} | |
f::F | ||
xs::X | ||
end | ||
|
||
function Base.:*(d::LazyDerivative, y) | ||
return derivative(d.ab, d.f, d.xs...) * y | ||
return derivative(d.backend, d.f, d.xs...) * y | ||
end | ||
|
||
function Base.:*(y, d::LazyDerivative) | ||
return y * derivative(d.ab, d.f, d.xs...) | ||
return y * derivative(d.backend, d.f, d.xs...) | ||
end | ||
|
||
function Base.:*(d::LazyDerivative, y::Union{Number,Tuple}) | ||
return derivative(d.backend, d.f, d.xs...) .* y | ||
end | ||
|
||
function Base.:*(y::Union{Number,Tuple}, d::LazyDerivative) | ||
return y .* derivative(d.backend, d.f, d.xs...) | ||
end | ||
|
||
function Base.:*(d::LazyDerivative, y::AbstractArray) | ||
return map((d)-> d*y, derivative(d.backend, d.f, d.xs...)) | ||
end | ||
|
||
function Base.:*(y::AbstractArray, d::LazyDerivative) | ||
return map((d)-> y*d, derivative(d.backend, d.f, d.xs...)) | ||
end | ||
|
||
|
||
struct LazyGradient{B, F, X} | ||
backend::B | ||
f::F | ||
xs::X | ||
end | ||
Base.:*(d::LazyGradient, y) = gradient(d.ab, d.f, d.xs...) * y | ||
Base.:*(y, d::LazyGradient) = y * gradient(d.ab, d.f, d.xs...) | ||
Base.:*(d::LazyGradient, y) = gradient(d.backend, d.f, d.xs...) * y | ||
Base.:*(y, d::LazyGradient) = y * gradient(d.backend, d.f, d.xs...) | ||
|
||
function Base.:*(d::LazyGradient, y::Union{Number,Tuple}) | ||
if d.xs isa Tuple | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to support: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. |
||
return gradient(d.backend, d.f, d.xs...) .* y | ||
else | ||
return gradient(d.backend, d.f, d.xs) .* y | ||
end | ||
end | ||
|
||
function Base.:*(y::Union{Number,Tuple}, d::LazyGradient) | ||
if d.xs isa Tuple | ||
return y .* gradient(d.backend, d.f, d.xs...) | ||
else | ||
return y .* gradient(d.backend, d.f, d.xs) | ||
end | ||
end | ||
|
||
|
||
struct LazyJacobian{B, F, X} | ||
backend::B | ||
f::F | ||
xs::X | ||
end | ||
|
||
function Base.:*(d::LazyJacobian, ys) | ||
return pushforward_function(d.ab, d.f, d.xs...)(ys) | ||
if !(ys isa Tuple) | ||
ys = (ys, ) | ||
end | ||
if d.xs isa Tuple | ||
vjp = pushforward_function(d.backend, d.f, d.xs...)(ys) | ||
else | ||
vjp = pushforward_function(d.backend, d.f, d.xs)(ys) | ||
end | ||
if vjp isa Tuple | ||
return vjp | ||
else | ||
return (vjp,) | ||
end | ||
end | ||
|
||
function Base.:*(ys, d::LazyJacobian) | ||
if ys isa Tuple | ||
ya = adjoint.(ys) | ||
else | ||
ya = adjoint(ys) | ||
end | ||
return pullback_function(d.ab, d.f, d.xs...)(ya) | ||
if d.xs isa Tuple | ||
return pullback_function(d.backend, d.f, d.xs...)(ya) | ||
else | ||
return pullback_function(d.backend, d.f, d.xs)(ya) | ||
end | ||
end | ||
|
||
function Base.:*(d::LazyJacobian, ys::Number) | ||
if d.xs isa Tuple | ||
return jacobian(d.backend, d.f, d.xs...) .* ys | ||
else | ||
return jacobian(d.backend, d.f, d.xs) .* ys | ||
end | ||
end | ||
|
||
function Base.:*(ys::Number, d::LazyJacobian) | ||
if d.xs isa Tuple | ||
return jacobian(d.backend, d.f, d.xs...) .* ys | ||
else | ||
return jacobian(d.backend, d.f, d.xs) .* ys | ||
end | ||
end | ||
|
||
|
||
struct LazyHessian{B, F, X} | ||
backend::B | ||
f::F | ||
xs::X | ||
end | ||
|
||
function Base.:*(d::LazyHessian, ys) | ||
return pushforward_function( | ||
secondlowest(d.ab), | ||
(xs...,) -> gradient(lowest(d.ab), d.f, xs...), | ||
d.xs..., | ||
)(ys) | ||
if !(ys isa Tuple) | ||
ys = (ys, ) | ||
end | ||
|
||
if d.xs isa Tuple | ||
return pushforward_function( | ||
secondlowest(d.backend), | ||
(xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)(ys) | ||
else | ||
return pushforward_function( | ||
secondlowest(d.backend), | ||
(xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)(ys) | ||
end | ||
end | ||
|
||
function Base.:*(ys, d::LazyHessian) | ||
if ys isa Tuple | ||
ya = adjoint.(ys) | ||
else | ||
ya = adjoint(ys) | ||
end | ||
return pullback_function( | ||
secondlowest(d.ab), | ||
(xs...,) -> gradient(lowest(d.ab), d.f, xs...), | ||
d.xs..., | ||
)(ya) | ||
if d.xs isa Tuple | ||
return pullback_function( | ||
secondlowest(d.backend), | ||
(xs...,) -> gradient(lowest(d.backend), d.f, xs...), | ||
d.xs..., | ||
)(ya) | ||
else | ||
return pullback_function( | ||
secondlowest(d.backend), | ||
(xs,) -> gradient(lowest(d.backend), d.f, xs)[1], | ||
d.xs, | ||
)(ya) | ||
end | ||
end | ||
|
||
function Base.:*(d::LazyHessian, ys::Number) | ||
if d.xs isa Tuple | ||
return hessian(d.backend, d.f, d.xs...).*ys | ||
else | ||
return hessian(d.backend, d.f, d.xs).*ys | ||
end | ||
end | ||
|
||
function Base.:*(ys::Number, d::LazyHessian) | ||
if d.xs isa Tuple | ||
return ys.*hessian(d.backend, d.f, d.xs...) | ||
else | ||
return ys.*hessian(d.backend, d.f, d.xs) | ||
end | ||
end | ||
|
||
|
||
function lazyderivative(ab::AbstractBackend, f, xs::Number...) | ||
return LazyDerivative(ab, f, xs) | ||
end | ||
|
@@ -350,6 +489,20 @@ function define_pushforward_function_and_friends(fdef) | |
pff(cols) | ||
end | ||
end | ||
elseif eltype(identity_like) <: AbstractMatrix | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please comment this part? It's not clear to me what's happening here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was the fix I added for the computation of the Hessian. identity_like = ([1.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 1.0],)
identity_like[1] = [1.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 1.0]
cols = [1.0, 0.0, 0.0, 0.0, 0.0]
cols = [0.0, 1.0, 0.0, 0.0, 0.0]
cols = [0.0, 0.0, 1.0, 0.0, 0.0]
cols = [0.0, 0.0, 0.0, 1.0, 0.0]
cols = [0.0, 0.0, 0.0, 0.0, 1.0] so it was mainly to fix the input/output for the additional There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but I think you are right that this needs some additional care.. I think the current version will break down once we'd like to compute a tuple of Hessians. |
||
# needed for the computation of the Hessian and Jacobian | ||
ret = hcat.(mapslices(identity_like[1], dims=1) do cols | ||
# cols loop over basis states | ||
pf = pff((cols,)) | ||
if typeof(pf) <: AbstractVector | ||
# to make the hcat. work / get correct matrix-like, non-flat output dimension | ||
return (pf, ) | ||
else | ||
return pf | ||
end | ||
end ...) | ||
return ret isa Tuple ? ret : (ret,) | ||
|
||
else | ||
return pff(identity_like) | ||
end | ||
|
@@ -373,6 +526,13 @@ function define_pullback_function_and_friends(fdef) | |
value_and_pbf(cols)[2]' | ||
end | ||
end | ||
elseif eltype(identity_like) <: AbstractMatrix | ||
# needed for Hessian computation: | ||
# value is a (grad,). Then, identity_like is a (matrix,). | ||
# cols loops over columns of the matrix | ||
return vcat.(mapslices(identity_like[1], dims=1) do cols | ||
adjoint.(value_and_pbf((cols,))[2]) | ||
end ...) | ||
else | ||
return adjoint.(value_and_pbf(identity_like)[2]) | ||
end | ||
|
@@ -427,4 +587,4 @@ function zero_matrix_like(x) | |
throw("The function `zero_matrix_like` is not defined for the type $(typeof(x)).") | ||
end | ||
|
||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should support multiplication if the derivative returns a tuple. This part of the code can be simplified.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we want to support functions like
fder(x, y) = exp(y) * x + y * log(x)
for the LazyOperators as well?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm I guess we could.