diff --git a/Project.toml b/Project.toml index 8efed07..38eefd4 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,8 @@ julia = "1" [extras] FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "FiniteDifferences"] +test = ["Test", "FiniteDifferences", "Random"] diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 6992955..15ba75b 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -31,16 +31,32 @@ primalvalue(x::Tuple) = map(primalvalue, x) primalvalue(x) = x function derivative(ab::AbstractBackend, f, xs::Number...) - return getindex.(jacobian(lowest(ab), f, xs...), 1) + der = getindex.(jacobian(lowest(ab), f, xs...), 1) + if der isa Tuple + return der + else + return (der,) + end end + function gradient(ab::AbstractBackend, f, xs...) - return adjoint.(jacobian(lowest(ab), f, xs...)) + return reshape.(adjoint.(jacobian(lowest(ab), f, xs...)),size.(xs)) end function jacobian(ab::AbstractBackend, f, xs...) end function hessian(ab::AbstractBackend, f, xs...) - return jacobian(secondlowest(ab), (xs...,) -> begin - gradient(lowest(ab), f, xs...) - end, xs...) + xss = collect((xs...,)) + counter = 0 + # gradient returns tuple of gradient values with respect to inputs x,y ∈ xs + # Hessian is the Jacobian of the individual gradients, i.e., the tuple of matrices + # defined by ∂x∂x f, ∂y∂y f, in the case of a scalar valued function `f`. + hess = map((xs...,)) do x + counter += 1 + _f = _x->f(setindex!(deepcopy(xss),_x,counter)...) + return jacobian(secondlowest(ab),(x,)-> begin + return gradient(lowest(ab), _f, x) + end, x)[1] + end + return hess end function value_and_derivative(ab::AbstractBackend, f, xs::Number...) @@ -49,11 +65,15 @@ function value_and_derivative(ab::AbstractBackend, f, xs::Number...) end function value_and_gradient(ab::AbstractBackend, f, xs...) value, jacs = value_and_jacobian(lowest(ab), f, xs...) - return value, adjoint.(jacs) + return value, reshape.(adjoint.(jacs),size.(xs)) end function value_and_jacobian(ab::AbstractBackend, f, xs...) local value primalcalled = false + if lowest(ab) isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end jacs = jacobian(lowest(ab), (_xs...,) -> begin v = f(_xs...) if !primalcalled @@ -62,11 +82,16 @@ function value_and_jacobian(ab::AbstractBackend, f, xs...) end return v end, xs...) + return value, jacs end function value_and_hessian(ab::AbstractBackend, f, xs...) local value primalcalled = false + if ab isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end hess = jacobian(secondlowest(ab), (_xs...,) -> begin v, g = value_and_gradient(lowest(ab), f, _xs...) if !primalcalled @@ -145,6 +170,10 @@ function value_and_pushforward_function( @assert ds isa Tuple && length(ds) == length(xs) local value primalcalled = false + if ab isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end pf = pushforward_function(lowest(ab), (_xs...,) -> begin vs = f(_xs...) if !primalcalled @@ -199,6 +228,10 @@ function value_and_pullback_function( return (ws) -> begin local value primalcalled = false + if ab isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end if ws === nothing vs = f(xs...) if !primalcalled @@ -224,63 +257,169 @@ struct LazyDerivative{B, F, X} f::F xs::X end + function Base.:*(d::LazyDerivative, y) - return derivative(d.ab, d.f, d.xs...) * y + return derivative(d.backend, d.f, d.xs...) * y end + function Base.:*(y, d::LazyDerivative) - return y * derivative(d.ab, d.f, d.xs...) + return y * derivative(d.backend, d.f, d.xs...) +end + +function Base.:*(d::LazyDerivative, y::Union{Number,Tuple}) + return derivative(d.backend, d.f, d.xs...) .* y +end + +function Base.:*(y::Union{Number,Tuple}, d::LazyDerivative) + return y .* derivative(d.backend, d.f, d.xs...) end +function Base.:*(d::LazyDerivative, y::AbstractArray) + return map((d)-> d*y, derivative(d.backend, d.f, d.xs...)) +end + +function Base.:*(y::AbstractArray, d::LazyDerivative) + return map((d)-> y*d, derivative(d.backend, d.f, d.xs...)) +end + + struct LazyGradient{B, F, X} backend::B f::F xs::X end -Base.:*(d::LazyGradient, y) = gradient(d.ab, d.f, d.xs...) * y -Base.:*(y, d::LazyGradient) = y * gradient(d.ab, d.f, d.xs...) +Base.:*(d::LazyGradient, y) = gradient(d.backend, d.f, d.xs...) * y +Base.:*(y, d::LazyGradient) = y * gradient(d.backend, d.f, d.xs...) + +function Base.:*(d::LazyGradient, y::Union{Number,Tuple}) + if d.xs isa Tuple + return gradient(d.backend, d.f, d.xs...) .* y + else + return gradient(d.backend, d.f, d.xs) .* y + end +end + +function Base.:*(y::Union{Number,Tuple}, d::LazyGradient) + if d.xs isa Tuple + return y .* gradient(d.backend, d.f, d.xs...) + else + return y .* gradient(d.backend, d.f, d.xs) + end +end + struct LazyJacobian{B, F, X} backend::B f::F xs::X end + function Base.:*(d::LazyJacobian, ys) - return pushforward_function(d.ab, d.f, d.xs...)(ys) + if !(ys isa Tuple) + ys = (ys, ) + end + if d.xs isa Tuple + vjp = pushforward_function(d.backend, d.f, d.xs...)(ys) + else + vjp = pushforward_function(d.backend, d.f, d.xs)(ys) + end + if vjp isa Tuple + return vjp + else + return (vjp,) + end end + function Base.:*(ys, d::LazyJacobian) if ys isa Tuple ya = adjoint.(ys) else ya = adjoint(ys) end - return pullback_function(d.ab, d.f, d.xs...)(ya) + if d.xs isa Tuple + return pullback_function(d.backend, d.f, d.xs...)(ya) + else + return pullback_function(d.backend, d.f, d.xs)(ya) + end +end + +function Base.:*(d::LazyJacobian, ys::Number) + if d.xs isa Tuple + return jacobian(d.backend, d.f, d.xs...) .* ys + else + return jacobian(d.backend, d.f, d.xs) .* ys + end end +function Base.:*(ys::Number, d::LazyJacobian) + if d.xs isa Tuple + return jacobian(d.backend, d.f, d.xs...) .* ys + else + return jacobian(d.backend, d.f, d.xs) .* ys + end +end + + struct LazyHessian{B, F, X} backend::B f::F xs::X end + function Base.:*(d::LazyHessian, ys) - return pushforward_function( - secondlowest(d.ab), - (xs...,) -> gradient(lowest(d.ab), d.f, xs...), - d.xs..., - )(ys) + if !(ys isa Tuple) + ys = (ys, ) + end + + if d.xs isa Tuple + return pushforward_function( + secondlowest(d.backend), + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)(ys) + else + return pushforward_function( + secondlowest(d.backend), + (xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)(ys) + end end + function Base.:*(ys, d::LazyHessian) if ys isa Tuple ya = adjoint.(ys) else ya = adjoint(ys) end - return pullback_function( - secondlowest(d.ab), - (xs...,) -> gradient(lowest(d.ab), d.f, xs...), - d.xs..., - )(ya) + if d.xs isa Tuple + return pullback_function( + secondlowest(d.backend), + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), + d.xs..., + )(ya) + else + return pullback_function( + secondlowest(d.backend), + (xs,) -> gradient(lowest(d.backend), d.f, xs)[1], + d.xs, + )(ya) + end end +function Base.:*(d::LazyHessian, ys::Number) + if d.xs isa Tuple + return hessian(d.backend, d.f, d.xs...).*ys + else + return hessian(d.backend, d.f, d.xs).*ys + end +end + +function Base.:*(ys::Number, d::LazyHessian) + if d.xs isa Tuple + return ys.*hessian(d.backend, d.f, d.xs...) + else + return ys.*hessian(d.backend, d.f, d.xs) + end +end + + function lazyderivative(ab::AbstractBackend, f, xs::Number...) return LazyDerivative(ab, f, xs) end @@ -350,6 +489,20 @@ function define_pushforward_function_and_friends(fdef) pff(cols) end end + elseif eltype(identity_like) <: AbstractMatrix + # needed for the computation of the Hessian and Jacobian + ret = hcat.(mapslices(identity_like[1], dims=1) do cols + # cols loop over basis states + pf = pff((cols,)) + if typeof(pf) <: AbstractVector + # to make the hcat. work / get correct matrix-like, non-flat output dimension + return (pf, ) + else + return pf + end + end ...) + return ret isa Tuple ? ret : (ret,) + else return pff(identity_like) end @@ -373,6 +526,13 @@ function define_pullback_function_and_friends(fdef) value_and_pbf(cols)[2]' end end + elseif eltype(identity_like) <: AbstractMatrix + # needed for Hessian computation: + # value is a (grad,). Then, identity_like is a (matrix,). + # cols loops over columns of the matrix + return vcat.(mapslices(identity_like[1], dims=1) do cols + adjoint.(value_and_pbf((cols,))[2]) + end ...) else return adjoint.(value_and_pbf(identity_like)[2]) end @@ -427,4 +587,4 @@ function zero_matrix_like(x) throw("The function `zero_matrix_like` is not defined for the type $(typeof(x)).") end -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 324a265..23ce6dc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,7 @@ using AbstractDifferentiation using Test, FiniteDifferences, LinearAlgebra +using Random +Random.seed!(1234) const FDM = FiniteDifferences @@ -19,7 +21,9 @@ end FDMBackend2() = FDMBackend2(central_fdm(5, 1)) const fdm_backend2 = FDMBackend2() AD.@primitive function pushforward_function(ab::FDMBackend2, f, xs...) - return (vs) -> jvp(ab.alg, f, tuple.(xs, vs)...) + return function (vs) + jvp(ab.alg, f, tuple.(xs, vs)...) + end end struct FDMBackend3{A} <: AD.AbstractFiniteDifference @@ -30,16 +34,39 @@ const fdm_backend3 = FDMBackend3() AD.@primitive function pullback_function(ab::FDMBackend3, f, xs...) return function (vs) # Supports only single output - @assert length(vs) == 1 - return j′vp(ab.alg, f, vs[1], xs...) + if vs isa AbstractVector + return j′vp(ab.alg, f, vs, xs...) + else + @assert length(vs) == 1 + return j′vp(ab.alg, f, vs[1], xs...) + + end end end fder(x, y) = exp(y) * x + y * log(x) +dfderdx(x, y) = exp(y) + y * 1/x +dfderdy(x, y) = exp(y) * x + log(x) + fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) +dfgraddx(x, y) = prod(x)./x +dfgraddy(x, y) = one(eltype(y)) ./ (1:length(y)) +dfgraddxdx(x, y) = prod(x)./(x*x') - Diagonal(diag(prod(x)./(x*x'))) +dfgraddydy(x, y) = zeros(length(y),length(y)) + function fjac(x, y) x + Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) * y end +dfjacdx(x, y) = I(length(x)) +dfjacdy(x, y) = Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) + +# Jvp +jxvp(x,y,v) = dfjacdx(x,y)*v +jyvp(x,y,v) = dfjacdy(x,y)*v + +# vJp +vJxp(x,y,v) = dfjacdx(x,y)'*v +vJyp(x,y,v) = dfjacdy(x,y)'*v const xscalar = rand() const yscalar = rand() @@ -47,6 +74,10 @@ const yscalar = rand() const xvec = rand(5) const yvec = rand(5) +# to check if vectors get mutated +xvec2 = deepcopy(xvec) +yvec2 = deepcopy(yvec) + function test_fdm_derivatives(fdm_backend) der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) der2 = ( @@ -57,6 +88,15 @@ function test_fdm_derivatives(fdm_backend) valscalar, der3 = AD.value_and_derivative(fdm_backend, fder, xscalar, yscalar) @test valscalar == fder(xscalar, yscalar) @test der3 .- der1 == (0, 0) + der_exact = (dfderdx(xscalar,yscalar), dfderdy(xscalar,yscalar)) + @test minimum(isapprox.(der_exact, der1, rtol=1e-10)) + # test if single input (no tuple works) + valscalara, dera = AD.value_and_derivative(fdm_backend, x -> fder(x, yscalar), xscalar) + valscalarb, derb = AD.value_and_derivative(fdm_backend, y -> fder(xscalar, y), yscalar) + @test valscalar == valscalara + @test valscalar == valscalarb + @test isapprox(dera[1], der1[1], rtol=1e-10) + @test isapprox(derb[1], der1[2], rtol=1e-10) end function test_fdm_gradients(fdm_backend) @@ -66,6 +106,17 @@ function test_fdm_gradients(fdm_backend) valscalar, grad3 = AD.value_and_gradient(fdm_backend, fgrad, xvec, yvec) @test valscalar == fgrad(xvec, yvec) @test norm.(grad3 .- grad1) == (0, 0) + grad_exact = (dfgraddx(xvec,yvec), dfgraddy(xvec,yvec)) + @test minimum(isapprox.(grad_exact, grad1, rtol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 + # test if single input (no tuple works) + valscalara, grada = AD.value_and_gradient(fdm_backend, x -> fgrad(x, yvec), xvec) + valscalarb, gradb = AD.value_and_gradient(fdm_backend, y -> fgrad(xvec, y), yvec) + @test valscalar == valscalara + @test valscalar == valscalarb + @test isapprox(grada[1], grad1[1], rtol=1e-10) + @test isapprox(gradb[1], grad1[2], rtol=1e-10) end function test_fdm_jacobians(fdm_backend) @@ -75,9 +126,25 @@ function test_fdm_jacobians(fdm_backend) valvec, jac3 = AD.value_and_jacobian(fdm_backend, fjac, xvec, yvec) @test valvec == fjac(xvec, yvec) @test norm.(jac3 .- jac1) == (0, 0) + grad_exact = (dfjacdx(xvec, yvec), dfjacdy(xvec, yvec)) + @test minimum(isapprox.(grad_exact, jac1, rtol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 + # test if single input (no tuple works) + valveca, jaca = AD.value_and_jacobian(fdm_backend, x -> fjac(x, yvec), xvec) + valvecb, jacb = AD.value_and_jacobian(fdm_backend, y -> fjac(xvec, y), yvec) + @test valvec == valveca + @test valvec == valvecb + @test isapprox(jaca[1], jac1[1], rtol=1e-10) + @test isapprox(jacb[1], jac1[2], rtol=1e-10) end function test_fdm_hessians(fdm_backend) + H1 = AD.hessian(fdm_backend, fgrad, xvec, yvec) + @test dfgraddxdx(xvec,yvec) ≈ H1[1] atol=1e-10 + @test dfgraddydy(xvec,yvec) ≈ H1[2] atol=1e-10 + + # test if single input (no tuple works) fhess = x -> fgrad(x, yvec) hess1 = AD.hessian(fdm_backend, fhess, xvec) hess2 = FDM.jacobian( @@ -99,19 +166,42 @@ function test_fdm_hessians(fdm_backend) @test valscalar == fgrad(xvec, yvec) @test norm.(grad .- AD.gradient(fdm_backend, fhess, xvec)) == (0,) @test norm.(hess4 .- hess1) == (0,) + @test dfgraddxdx(xvec,yvec) ≈ hess1[1] atol=1e-10 + @test xvec == xvec2 + @test yvec == yvec2 + fhess2 = x-> dfgraddx(x, yvec) + hess5 = AD.jacobian(fdm_backend, fhess2, xvec) + @test minimum(isapprox.(hess5, hess1, atol=1e-10)) end function test_fdm_jvp(fdm_backend) v = (rand(length(xvec)), rand(length(yvec))) - pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + + if fdm_backend isa FDMBackend2 # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end + + pf1 = map(v->AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v), vaug) + ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v), vaug) + else + pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + ((valvec1, pf3x), (valvec2, pf3y)) = (valvec, pf3[1]), (valvec, pf3[2]) + end pf2 = ( FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), ) @test norm.(pf1 .- pf2) == (0, 0) - valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) - @test valvec == fjac(xvec, yvec) - @test norm.(pf3 .- pf1) == (0, 0) + + @test valvec1 == fjac(xvec, yvec) + @test valvec2 == fjac(xvec, yvec) + @test norm.((pf3x,pf3y) .- pf1) == (0, 0) + @test minimum(isapprox.(pf1, (jxvp(xvec,yvec,v[1]), jyvp(xvec,yvec,v[2])), atol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 end function test_fdm_j′vp(fdm_backend) @@ -122,6 +212,218 @@ function test_fdm_j′vp(fdm_backend) valvec, pb3 = AD.value_and_pullback_function(fdm_backend, fjac, xvec, yvec)(w) @test valvec == fjac(xvec, yvec) @test norm.(pb3 .- pb1) == (0, 0) + @test minimum(isapprox.(pb1, (vJxp(xvec,yvec,w), vJyp(xvec,yvec,w)), atol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 +end + +function test_fdm_lazy_derivatives(fdm_backend) + # single input function + der1 = AD.derivative(fdm_backend, x->fder(x, yscalar), xscalar) + der2 = ( + fdm_backend.alg(x -> fder(x, yscalar), xscalar), + fdm_backend.alg(y -> fder(xscalar, y), yscalar), + ) + + lazyder = AD.LazyDerivative(fdm_backend, x->fder(x, yscalar), xscalar) + + # multiplication with scalar + @test der1[1]*yscalar == der2[1]*yscalar + @test lazyder*yscalar == der1.*yscalar + @test lazyder*yscalar isa Tuple + + @test yscalar*der1[1] == yscalar*der2[1] + @test yscalar*lazyder == yscalar.*der1 + @test yscalar*lazyder isa Tuple + + # multiplication with array + @test der1[1]*yvec == der2[1]*yvec + @test lazyder*yvec == (der1.*yvec,) + @test lazyder*yvec isa Tuple + + @test yvec*der1[1] == yvec*der2[1] + @test yvec*lazyder == (yvec.*der1,) + @test yvec*lazyder isa Tuple + + # multiplication with tuple + @test lazyder*(yscalar,) == lazyder*yscalar + @test lazyder*(yvec,) == lazyder*yvec + + @test (yscalar,)*lazyder == yscalar*lazyder + @test (yvec,)*lazyder == yvec*lazyder + + # two input function + der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) + der2 = ( + fdm_backend.alg(x -> fder(x, yscalar), xscalar), + fdm_backend.alg(y -> fder(xscalar, y), yscalar), + ) + + lazyder = AD.LazyDerivative(fdm_backend, fder, (xscalar, yscalar)) + + # multiplication with scalar + @test der1.*yscalar == der2.*yscalar + @test lazyder*yscalar == der1.*yscalar + @test lazyder*yscalar isa Tuple + + @test yscalar.*der1 == yscalar.*der2 + @test yscalar*lazyder == yscalar.*der1 + @test yscalar*lazyder isa Tuple + + # multiplication with array + @test (der1[1]*yvec, der1[2]*yvec) == (der2[1]*yvec, der2[2]*yvec) + @test lazyder*yvec == (der1[1]*yvec, der1[2]*yvec) + @test lazyder*yvec isa Tuple + + @test (yvec*der1[1], yvec*der1[2]) == (yvec*der2[1], yvec*der2[2]) + @test yvec*lazyder == (yvec*der1[1], yvec*der1[2]) + @test lazyder*yvec isa Tuple + + # multiplication with tuple + @test lazyder*(yscalar,) == lazyder*yscalar + @test lazyder*(yvec,) == lazyder*yvec + + @test (yscalar,)*lazyder == yscalar*lazyder + @test (yvec,)*lazyder == yvec*lazyder +end + +function test_fdm_lazy_gradients(fdm_backend) + # single input function + grad1 = AD.gradient(fdm_backend, x->fgrad(x, yvec), xvec) + grad2 = FDM.grad(fdm_backend.alg, x->fgrad(x, yvec), xvec) + lazygrad = AD.LazyGradient(fdm_backend, x->fgrad(x, yvec), xvec) + + # multiplication with scalar + @test norm.(grad1.*yscalar .- grad2.*yscalar) == (0,) + @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,) + @test lazygrad*yscalar isa Tuple + + @test norm.(yscalar.*grad1 .- yscalar.*grad2) == (0,) + @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,) + @test yscalar*lazygrad isa Tuple + + # multiplication with tuple + @test lazygrad*(yscalar,) == lazygrad*yscalar + @test (yscalar,)*lazygrad == yscalar*lazygrad + + # two input function + grad1 = AD.gradient(fdm_backend, fgrad, xvec, yvec) + grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) + lazygrad = AD.LazyGradient(fdm_backend, fgrad, (xvec, yvec)) + + # multiplication with scalar + @test norm.(grad1.*yscalar .- grad2.*yscalar) == (0,0) + @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,0) + @test lazygrad*yscalar isa Tuple + + @test norm.(yscalar.*grad1 .- yscalar.*grad2) == (0,0) + @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,0) + @test yscalar*lazygrad isa Tuple + + # multiplication with tuple + @test lazygrad*(yscalar,) == lazygrad*yscalar + @test (yscalar,)*lazygrad == yscalar*lazygrad +end + +function test_fdm_lazy_jacobians(fdm_backend) + # single input function + jac1 = AD.jacobian(fdm_backend, x->fjac(x, yvec), xvec) + jac2 = FDM.jacobian(fdm_backend.alg, x->fjac(x, yvec), xvec) + lazyjac = AD.LazyJacobian(fdm_backend, x->fjac(x, yvec), xvec) + + # multiplication with scalar + @test norm.(jac1.*yscalar .- jac2.*yscalar) == (0,) + @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,) + @test lazyjac*yscalar isa Tuple + + @test norm.(yscalar.*jac1 .- yscalar.*jac2) == (0,) + @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,) + @test yscalar*lazyjac isa Tuple + + w = adjoint(rand(length(fjac(xvec, yvec)))) + v = (rand(length(xvec)),rand(length(xvec))) + + # vjp + pb1 = FDM.j′vp(fdm_backend.alg, x -> fjac(x, yvec), w, xvec) + res = w*lazyjac + @test minimum(isapprox.(pb1, res, atol=1e-10)) + @test res isa Tuple + + # jvp + pf1 = (FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])),) + res = lazyjac*v[1] + @test minimum(isapprox.(pf1, res, atol=1e-10)) + @test res isa Tuple + + # two input function + jac1 = AD.jacobian(fdm_backend, fjac, xvec, yvec) + jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) + lazyjac = AD.LazyJacobian(fdm_backend, fjac, (xvec, yvec)) + + # multiplication with scalar + @test norm.(jac1.*yscalar .- jac2.*yscalar) == (0,0) + @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,0) + @test lazyjac*yscalar isa Tuple + + @test norm.(yscalar.*jac1 .- yscalar.*jac2) == (0,0) + @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,0) + @test yscalar*lazyjac isa Tuple + + # vjp + pb1 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) + res = w*lazyjac + @test minimum(isapprox.(pb1, res, atol=1e-10)) + @test res isa Tuple + + # jvp + pf1 = ( + FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), + FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), + ) + + if fdm_backend isa FDMBackend2 # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end + + res = map(v->(lazyjac*v)[1], vaug) + else + res = lazyjac*v + end + + @test minimum(isapprox.(pf1, res, atol=1e-10)) + @test res isa Tuple +end + +function test_fdm_lazy_hessians(fdm_backend) + # single input function + fhess = x -> fgrad(x, yvec) + hess1 = (dfgraddxdx(xvec,yvec),) + lazyhess = AD.LazyHessian(fdm_backend, x->fgrad(x, yvec), xvec) + + # multiplication with scalar + @test minimum(isapprox.(lazyhess*yscalar, hess1.*yscalar, atol=1e-10)) + @test lazyhess*yscalar isa Tuple + + # multiplication with scalar + @test minimum(isapprox.(yscalar*lazyhess, yscalar.*hess1, atol=1e-10)) + @test yscalar*lazyhess isa Tuple + + w = adjoint(rand(length(xvec))) + v = rand(length(xvec)) + + # Hvp + Hv = map(h->h*v, hess1) + res = lazyhess*v + @test minimum(isapprox.(Hv, res, atol=1e-10)) + @test res isa Tuple + + # H′vp + wH = map(h->h'*adjoint(w), hess1) + res = w*lazyhess + @test minimum(isapprox.(wH, res, atol=1e-10)) + @test res isa Tuple end @testset "AbstractDifferentiation.jl" begin @@ -139,29 +441,43 @@ end @testset "Jacobian" begin test_fdm_jacobians(fdm_backend1) test_fdm_jacobians(fdm_backend2) - # Errors test_fdm_jacobians(fdm_backend3) end @testset "Hessian" begin # Works but super slow test_fdm_hessians(fdm_backend1) - # Errors test_fdm_hessians(fdm_backend2) - # Errors test_fdm_hessians(fdm_backend3) end @testset "jvp" begin test_fdm_jvp(fdm_backend1) - # Errors test_fdm_jvp(fdm_backend2) - # Errors test_fdm_jvp(fdm_backend3) end @testset "j′vp" begin test_fdm_j′vp(fdm_backend1) test_fdm_j′vp(fdm_backend2) - # Errors test_fdm_j′vp(fdm_backend3) end + @testset "Lazy Derivative" begin + test_fdm_lazy_derivatives(fdm_backend1) + test_fdm_lazy_derivatives(fdm_backend2) + test_fdm_lazy_derivatives(fdm_backend3) + end + @testset "Lazy Gradient" begin + test_fdm_lazy_gradients(fdm_backend1) + test_fdm_lazy_gradients(fdm_backend2) + test_fdm_lazy_gradients(fdm_backend3) + end + @testset "Lazy Jacobian" begin + test_fdm_lazy_jacobians(fdm_backend1) + test_fdm_lazy_jacobians(fdm_backend2) + test_fdm_lazy_jacobians(fdm_backend3) + end + @testset "Lazy Hessian" begin + test_fdm_lazy_hessians(fdm_backend1) + test_fdm_lazy_hessians(fdm_backend2) + test_fdm_lazy_hessians(fdm_backend3) + end end end