diff --git a/Changelog.md b/Changelog.md index 464bb6ef39..449930cfae 100644 --- a/Changelog.md +++ b/Changelog.md @@ -9,10 +9,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -* Introduce `sub_kwargs` and `sub_stopping_criterion` for `trust_regions` as noticed in [#336](https://github.com/JuliaManifolds/Manopt.jl/discussions/336) +* Introduce `sub_kwargs` and `sub_stopping_criterion` for `trust_regions` as noticed in [#336](https:// ### Changed +* `WolfePowellLineSearch`, `ArmijoLineSearch` step sizes now allocate less +* `linesearch_backtrack!` is now available +* Quasi Newton Updates can work inplace of a direction vector as well. * Faster `safe_indices` in L-BFGS. ## [0.4.44] December 12, 2023 diff --git a/Project.toml b/Project.toml index eda73b98e0..857ceae1c2 100644 --- a/Project.toml +++ b/Project.toml @@ -45,7 +45,7 @@ JuMP = "1.15" LinearAlgebra = "1.6" LRUCache = "1.4" ManifoldDiff = "0.3.8" -Manifolds = "0.9" +Manifolds = "0.9.11" ManifoldsBase = "0.15" ManoptExamples = "0.1.4" Markdown = "1.6" diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index 08c82eee42..d224ccfe3d 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -287,33 +287,63 @@ InverseBroyden(φ::Float64) = InverseBroyden(φ, :constant) @doc raw""" QuasiNewtonMatrixDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate -These [`AbstractQuasiNewtonDirectionUpdate`](@ref)s represent any quasi-Newton update rule, where the operator is stored as a matrix. A distinction is made between the update of the approximation of the Hessian, ``H_k \mapsto H_{k+1}``, and the update of the approximation of the Hessian inverse, ``B_k \mapsto B_{k+1}``. For the first case, the coordinates of the search direction ``η_k`` with respect to a basis ``\{b_i\}^{n}_{i=1}`` are determined by solving a linear system of equations, i.e. +The `QuasiNewtonMatrixDirectionUpdate` representa a quasi-Newton update rule, +where the operator is stored as a matrix. A distinction is made between the update of the +approximation of the Hessian, ``H_k \mapsto H_{k+1}``, and the update of the approximation +of the Hessian inverse, ``B_k \mapsto B_{k+1}``. +For the first case, the coordinates of the search direction ``η_k`` with respect to +a basis ``\{b_i\}^{n}_{i=1}`` are determined by solving a linear system of equations ```math -\text{Solve} \quad \hat{η_k} = - H_k \widehat{\operatorname{grad}f(x_k)} +\text{Solve} \quad \hat{η_k} = - H_k \widehat{\operatorname{grad}f(x_k)}, ``` -where ``H_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` and ``\widehat{\operatorname{grad}f(x_k)}`` represents the coordinates of the gradient of the objective function ``f`` in ``x_k`` with respect to the basis ``\{b_i\}^{n}_{i=1}``. -If a method is chosen where Hessian inverse is approximated, the coordinates of the search direction ``η_k`` with respect to a basis ``\{b_i\}^{n}_{i=1}`` are obtained simply by matrix-vector multiplication, i.e. +where ``H_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` +and ``\widehat{\operatorname{grad}f(x_k)}`` represents the coordinates of the gradient of +the objective function ``f`` in ``x_k`` with respect to the basis ``\{b_i\}^{n}_{i=1}``. +If a method is chosen where Hessian inverse is approximated, the coordinates of the search +direction ``η_k`` with respect to a basis ``\{b_i\}^{n}_{i=1}`` are obtained simply by +matrix-vector multiplication ```math -\hat{η_k} = - B_k \widehat{\operatorname{grad}f(x_k)} +\hat{η_k} = - B_k \widehat{\operatorname{grad}f(x_k)}, ``` -where ``B_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` and ``\widehat{\operatorname{grad}f(x_k)}`` as above. In the end, the search direction ``η_k`` is generated from the coordinates ``\hat{eta_k}`` and the vectors of the basis ``\{b_i\}^{n}_{i=1}`` in both variants. -The [`AbstractQuasiNewtonUpdateRule`](@ref) indicates which quasi-Newton update rule is used. In all of them, the Euclidean update formula is used to generate the matrix ``H_{k+1}`` and ``B_{k+1}``, and the basis ``\{b_i\}^{n}_{i=1}`` is transported into the upcoming tangent space ``T_{x_{k+1}} \mathcal{M}``, preferably with an isometric vector transport, or generated there. +where ``B_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` +and ``\widehat{\operatorname{grad}f(x_k)}`` as above. In the end, the search direction ``η_k`` +is generated from the coordinates ``\hat{eta_k}`` and the vectors of the basis ``\{b_i\}^{n}_{i=1}`` +in both variants. +The [`AbstractQuasiNewtonUpdateRule`](@ref) indicates which quasi-Newton update rule is used. +In all of them, the Euclidean update formula is used to generate the matrix ``H_{k+1}`` +and ``B_{k+1}``, and the basis ``\{b_i\}^{n}_{i=1}`` is transported into the upcoming tangent +space ``T_{x_{k+1}} \mathcal{M}``, preferably with an isometric vector transport, or generated there. + +# Provided functors + +* `(mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction +* `(η, mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction inplace of `η` # Fields -* `update` – a [`AbstractQuasiNewtonUpdateRule`](@ref). -* `basis` – the basis. -* `matrix` – (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) + +* `basis` an `AbstractBasis` to use in the tangent spaces +* `matrix` (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) the matrix which represents the approximating operator. -* `scale` – (`true) indicates whether the initial matrix (= identity matrix) should be scaled before the first update. -* `vector_transport_method` – (`vector_transport_method`)an `AbstractVectorTransportMethod` +* `scale` (`true) indicates whether the initial matrix (= identity matrix) should be scaled before the first update. +* `update` a [`AbstractQuasiNewtonUpdateRule`](@ref). +* `vector_transport_method` (`vector_transport_method`)an `AbstractVectorTransportMethod` # Constructor - QuasiNewtonMatrixDirectionUpdate(M::AbstractManifold, update, basis, matrix; - scale=true, vector_transport_method=default_vector_transport_method(M)) + QuasiNewtonMatrixDirectionUpdate( + M::AbstractManifold, + update, + basis::B=DefaultOrthonormalBasis(), + m=Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M)); + kwargs... + ) + +## Keyword Arguments + +* `scale`, `vector_transport_method` for the two fields above Generate the Update rule with defaults from a manifold and the names corresponding to the fields above. @@ -362,43 +392,63 @@ function QuasiNewtonMatrixDirectionUpdate( basis, m, scale, update, vector_transport_method ) end +function (d::QuasiNewtonMatrixDirectionUpdate)(mp, st) + r = zero_vector(get_manifold(mp), get_iterate(st)) + return d(r, mp, st) +end function (d::QuasiNewtonMatrixDirectionUpdate{T})( - mp, st + r, mp, st ) where {T<:Union{InverseBFGS,InverseDFP,InverseSR1,InverseBroyden}} M = get_manifold(mp) p = get_iterate(st) X = get_gradient(st) - return get_vector(M, p, -d.matrix * get_coordinates(M, p, X, d.basis), d.basis) + get_vector!(M, r, p, -d.matrix * get_coordinates(M, p, X, d.basis), d.basis) + return r end function (d::QuasiNewtonMatrixDirectionUpdate{T})( - mp, st + r, mp, st ) where {T<:Union{BFGS,DFP,SR1,Broyden}} M = get_manifold(mp) p = get_iterate(st) X = get_gradient(st) - return get_vector(M, p, -d.matrix \ get_coordinates(M, p, X, d.basis), d.basis) + get_vector!(M, r, p, -d.matrix \ get_coordinates(M, p, X, d.basis), d.basis) + return r end @doc raw""" QuasiNewtonLimitedMemoryDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate -This [`AbstractQuasiNewtonDirectionUpdate`](@ref) represents the limited-memory Riemannian BFGS update, where the approximating operator is represented by ``m`` stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` in the ``k``-th iteration. -For the calculation of the search direction ``η_k``, the generalisation of the two-loop recursion is used (see [Huang, Gallican, Absil, SIAM J. Optim., 2015](@cite HuangGallivanAbsil:2015)), since it only requires inner products and linear combinations of tangent vectors in ``T_{x_k} \mathcal{M}``. For that the stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, the gradient ``\operatorname{grad}f(x_k)`` of the objective function ``f`` in ``x_k`` and the positive definite self-adjoint operator +This [`AbstractQuasiNewtonDirectionUpdate`](@ref) represents the limited-memory Riemannian BFGS update, +where the approximating operator is represented by ``m`` stored pairs of tangent +vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` in the ``k``-th iteration. +For the calculation of the search direction ``η_k``, the generalisation of the two-loop recursion +is used (see [Huang, Gallican, Absil, SIAM J. Optim., 2015](@cite HuangGallivanAbsil:2015)), +since it only requires inner products and linear combinations of tangent vectors in ``T_{x_k} \mathcal{M}``. +For that the stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, +the gradient ``\operatorname{grad}f(x_k)`` of the objective function ``f`` in ``x_k`` +and the positive definite self-adjoint operator ```math \mathcal{B}^{(0)}_k[⋅] = \frac{g_{x_k}(s_{k-1}, y_{k-1})}{g_{x_k}(y_{k-1}, y_{k-1})} \; \mathrm{id}_{T_{x_k} \mathcal{M}}[⋅] ``` -are used. The two-loop recursion can be understood as that the [`InverseBFGS`](@ref) update is executed ``m`` times in a row on ``\mathcal{B}^{(0)}_k[⋅]`` using the tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, and in the same time the resulting operator ``\mathcal{B}^{LRBFGS}_k [⋅]`` is directly applied on ``\operatorname{grad}f(x_k)``. +are used. The two-loop recursion can be understood as that the [`InverseBFGS`](@ref) update +is executed ``m`` times in a row on ``\mathcal{B}^{(0)}_k[⋅]`` using the tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, and in the same time the resulting operator ``\mathcal{B}^{LRBFGS}_k [⋅]`` is directly applied on ``\operatorname{grad}f(x_k)``. When updating there are two cases: if there is still free memory, i.e. ``k < m``, the previously stored vector pairs ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` have to be transported into the upcoming tangent space ``T_{x_{k+1}} \mathcal{M}``; if there is no free memory, the oldest pair ``\{ \widetilde{s}_{k−m}, \widetilde{y}_{k−m}\}`` has to be discarded and then all the remaining vector pairs ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m+1}^{k-1}`` are transported into the tangent space ``T_{x_{k+1}} \mathcal{M}``. After that we calculate and store ``s_k = \widetilde{s}_k = T^{S}_{x_k, α_k η_k}(α_k η_k)`` and ``y_k = \widetilde{y}_k``. This process ensures that new information about the objective function is always included and the old, probably no longer relevant, information is discarded. +# Provided functors + +* `(mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction +* `(η, mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction inplace of `η` + # Fields -* `memory_s` – the set of the stored (and transported) search directions times step size ``\{ \widetilde{s}_i\}_{i=k-m}^{k-1}``. -* `memory_y` – set of the stored gradient differences ``\{ \widetilde{y}_i\}_{i=k-m}^{k-1}``. -* `ξ` – a variable used in the two-loop recursion. -* `ρ` – a variable used in the two-loop recursion. -* `scale` – -* `vector_transport_method` – a `AbstractVectorTransportMethod` -* `message` – a string containing a potential warning that might have appeared + +* `memory_s` the set of the stored (and transported) search directions times step size ``\{ \widetilde{s}_i\}_{i=k-m}^{k-1}``. +* `memory_y` set of the stored gradient differences ``\{ \widetilde{y}_i\}_{i=k-m}^{k-1}``. +* `ξ` a variable used in the two-loop recursion. +* `ρ` a variable used in the two-loop recursion. +* `scale` initial scaling of the Hessian +* `vector_transport_method` a `AbstractVectorTransportMethod` +* `message` a string containing a potential warning that might have appeared # Constructor QuasiNewtonLimitedMemoryDirectionUpdate( @@ -409,7 +459,7 @@ When updating there are two cases: if there is still free memory, i.e. ``k < m`` initial_vector=zero_vector(M,x), scale=1.0 project=true - ) + ) # See also @@ -468,12 +518,19 @@ function status_summary(d::QuasiNewtonLimitedMemoryDirectionUpdate{T}) where {T} return s end function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(mp, st) + r = zero_vector(get_manifold(mp), get_iterate(st)) + return d(r, mp, st) +end +function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st) isempty(d.message) || (d.message = "") # reset message M = get_manifold(mp) p = get_iterate(st) - r = copy(M, p, get_gradient(st)) + copyto!(M, r, p, get_gradient(st)) m = length(d.memory_s) - m == 0 && return -r + if m == 0 + r .*= -1 + return r + end for i in m:-1:1 # what if we divide by zero here? Setting to zero ignores this in the next step # precompute in case inner is expensive @@ -540,6 +597,11 @@ If [`InverseBFGS`](@ref) or [`InverseBFGS`](@ref) is chosen as update, then the method follows the method of [Huang, Absil, Gallivan, SIAM J. Optim., 2018](@cite HuangAbsilGallivan:2018), taking into account that the corresponding step size is chosen. +# Provided functors + +* `(mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction +* `(η, mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction inplace of `η` + # Fields * `update` – an [`AbstractQuasiNewtonDirectionUpdate`](@ref) @@ -572,6 +634,7 @@ function QuasiNewtonCautiousDirectionUpdate( return QuasiNewtonCautiousDirectionUpdate{U}(update, θ) end (d::QuasiNewtonCautiousDirectionUpdate)(mp, st) = d.update(mp, st) +(d::QuasiNewtonCautiousDirectionUpdate)(r, mp, st) = d.update(r, mp, st) # access the inner vector transport method function get_update_vector_transport(u::AbstractQuasiNewtonDirectionUpdate) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index 27d51d675f..30dd65e0d7 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -200,26 +200,29 @@ end @doc raw""" ArmijoLinesearch <: Linesearch -A functor representing Armijo line search including the last runs state, i.e. a -last step size. +A functor representing Armijo line search including the last runs state string the last stepsize. # Fields -* `initial_stepsize` – (`1.0`) and initial step size -* `retraction_method` – (`default_retraction_method(M)`) the retraction to use -* `contraction_factor` – (`0.95`) exponent for line search reduction -* `sufficient_decrease` – (`0.1`) gain within Armijo's rule -* `last_stepsize` – (`initialstepsize`) the last step size we start the search with -* `initial_guess` - (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, +* `initial_stepsize` (`1.0`) and initial step size +* `retraction_method` (`default_retraction_method(M)`) the retraction to use +* `contraction_factor` (`0.95`) exponent for line search reduction +* `sufficient_decrease` (`0.1`) gain within Armijo's rule +* `last_stepsize` (`initialstepsize`) the last step size we start the search with +* `initial_guess` (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, [`AbstractManoptSolverState`](@ref) `s` and a current iterate `i` and a last step size `l`, this returns an initial guess. The default uses the last obtained stepsize +as well as for internal use + +* `candidate_point` (`allocate_result(M, rand)`) to store an interims result + Furthermore the following fields act as safeguards -* `stop_when_stepsize_less` - (`0.0`) smallest stepsize when to stop (the last one before is taken) -* `stop_when_stepsize_exceeds - ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize when to stop. -* `stop_increasing_at_step` - (`100`) last step to increase the stepsize (phase 1), -* `stop_decreasing_at_step` - (`1000`) last step size to decrease the stepsize (phase 2), +* `stop_when_stepsize_less` (`0.0`) smallest stepsize when to stop (the last one before is taken) +* `stop_when_stepsize_exceeds ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize when to stop. +* `stop_increasing_at_step` (`100`) last step to increase the stepsize (phase 1), +* `stop_decreasing_at_step` (`1000`) last step size to decrease the stepsize (phase 2), Pass `:Messages` to a `debug=` to see `@info`s when these happen. @@ -229,50 +232,58 @@ Pass `:Messages` to a `debug=` to see `@info`s when these happen. with the Fields above as keyword arguments and the retraction is set to the default retraction on `M`. -The constructors return the functor to perform Armijo line search, where two interfaces are available: -* based on a tuple `(amp, ams, i)` of a [`AbstractManoptProblem`](@ref) `amp`, [`AbstractManoptSolverState`](@ref) `ams` - and a current iterate `i`. -* with `(M, x, F, gradFx[,η=-gradFx]) -> s` where `M`, a current - point `x` a function `F`, that maps from the manifold to the reals, - its gradient (a tangent vector) `gradFx```=\operatorname{grad}F(x)`` at `x` and an optional - search direction tangent vector `η=-gradFx` are the arguments. +The constructors return the functor to perform Armijo line search, where + + (a::ArmijoLinesearch)(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) + +of a [`AbstractManoptProblem`](@ref) `amp`, [`AbstractManoptSolverState`](@ref) `ams` and a current iterate `i` +with keywords + +## Keyword Arguments + + * `candidate_point` (`allocate_result(M, rand)`) to pass memory for the candidate point + * `η` (`-get_gradient(mp, get_iterate(s));`) the search direction to use, + by default the steepest descent direction. """ -mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,F} <: Linesearch +mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,F} <: Linesearch + candidate_point::P + contraction_factor::Float64 + initial_guess::F initial_stepsize::Float64 + last_stepsize::Float64 + message::String retraction_method::TRM - contraction_factor::Float64 sufficient_decrease::Float64 - last_stepsize::Float64 stop_when_stepsize_less::Float64 stop_when_stepsize_exceeds::Float64 stop_increasing_at_step::Int stop_decreasing_at_step::Int - initial_guess::F - message::String function ArmijoLinesearch( M::AbstractManifold=DefaultManifold(); - initial_stepsize::Float64=1.0, - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - contraction_factor::Float64=0.95, - sufficient_decrease::Float64=0.1, - stop_when_stepsize_less=0.0, - stop_when_stepsize_exceeds=max_stepsize(M), - stop_increasing_at_step=100, - stop_decreasing_at_step=1000, + candidate_point::P=allocate_result(M, rand), + contraction_factor::Real=0.95, + initial_stepsize::Real=1.0, initial_guess=armijo_initial_guess, - ) - return new{typeof(retraction_method),typeof(initial_guess)}( + retraction_method::TRM=default_retraction_method(M), + stop_when_stepsize_less::Real=0.0, + stop_when_stepsize_exceeds::Real=max_stepsize(M), + stop_increasing_at_step::Int=100, + stop_decreasing_at_step::Int=1000, + sufficient_decrease=0.1, + ) where {TRM,P} + return new{TRM,P,typeof(initial_guess)}( + candidate_point, + contraction_factor, + initial_guess, + initial_stepsize, initial_stepsize, + "", # initilize an empty message retraction_method, - contraction_factor, sufficient_decrease, - initial_stepsize, stop_when_stepsize_less, stop_when_stepsize_exceeds, stop_increasing_at_step, stop_decreasing_at_step, - initial_guess, - "", ) end end @@ -285,16 +296,17 @@ function (a::ArmijoLinesearch)( ) p = get_iterate(s) X = get_gradient!(mp, get_gradient(s), p) - (a.last_stepsize, a.message) = linesearch_backtrack( + (a.last_stepsize, a.message) = linesearch_backtrack!( get_manifold(mp), + a.candidate_point, (M, p) -> get_cost_function(get_objective(mp))(M, p), p, X, a.initial_guess(mp, s, i, a.last_stepsize), a.sufficient_decrease, a.contraction_factor, - a.retraction_method, η; + retraction_method=a.retraction_method, stop_when_stepsize_less=a.stop_when_stepsize_less / norm(get_manifold(mp), p, η), stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / norm(get_manifold(mp), p, η), @@ -321,62 +333,81 @@ end get_message(a::ArmijoLinesearch) = a.message @doc raw""" - (s, msg) = linesearch_backtrack( - M, F, x, gradFx, s, decrease, contract, retr, η = -gradFx, f0 = F(x); - stop_when_stepsize_less=0.0, - stop_when_stepsize_exceeds=max_stepsize(M, p), - stop_increasing_at_step = 100, - stop_decreasing_at_step = 1000, - ) - -perform a linesearch for -* a manifold `M` -* a cost function `f`, -* an iterate `p` -* the gradient ``\operatorname{grad}F(x)`` -* an initial stepsize `s` usually called ``γ`` + (s, msg) = linesearch_backtrack(M, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + +perform a linesearch +* on manifold `M` +* for the cost function `f`, +* at the current point `p` +* with current gradient providedd in `X` +* an initial stepsize `s` * a sufficient `decrease` * a `contract`ion factor ``σ`` * a `retr`action, which defaults to the `default_retraction_method(M)` -* a search direction ``η = -\operatorname{grad}F(x)`` +* a search direction ``η = -X`` * an offset, ``f_0 = F(x)`` -And use the 4 keywords to limit the maximal increase and decrease steps as well as -a maximal stepsize (especially on non-Hadamard manifolds) and a minimal one. +the method can also be performed in-place of `q`, that is the resulting best point one reaches +with the step size `s` as second argument. + +## Keywords + +* `retraction_method` (`default_retraction_method(M)`) the retraction to use. +* `stop_when_stepsize_less (`0.0`) to avoid numerical underflow +* `stop_when_stepsize_exceeds` ([`max_stepsize`](@ref)`(M, p) / norm(M, p, η)`) to avoid leaving the injectivity radius on a manifold +* `stop_increasing_at_step` (`100`) stop the inicial increase of step size after these many steps +* `stop_decreasing_at_step` (`1000`) stop the decreasing search after these many steps + +These keywords are used as safeguards, where only the max stepsize is a very manifold specific one. # Return value A stepsize `s` and a message `msg` (in case any of the 4 criteria hit) """ function linesearch_backtrack( + M::AbstractManifold, f, p, X::T, s, decrease, contract, η::T=-X, f0=f(M, p); kwargs... +) where {T} + q = allocate(M, p) + return linesearch_backtrack!(M, q, f, p, X, s, decrease, contract, η, f0; kwargs...) +end + +""" + (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + +Perform a linesearch backtrack in-place of `q`. +For all details and options, see [`linesearch_backtrack`](@ref) +""" +function linesearch_backtrack!( M::AbstractManifold, + q, f::TF, p, - grad_f_at_p::T, + X::T, s, decrease, contract, - retr::AbstractRetractionMethod=default_retraction_method(M), - η::T=-grad_f_at_p, + η::T=-X, f0=f(M, p); + retraction_method::AbstractRetractionMethod=default_retraction_method(M), stop_when_stepsize_less=0.0, stop_when_stepsize_exceeds=max_stepsize(M, p) / norm(M, p, η), stop_increasing_at_step=100, stop_decreasing_at_step=1000, ) where {TF,T} msg = "" - p_new = retract(M, p, η, s, retr) - fNew = f(M, p_new) - search_dir_inner = real(inner(M, p, η, grad_f_at_p)) + retract!(M, q, p, η, s, retraction_method) + f_q = f(M, q) + search_dir_inner = real(inner(M, p, η, X)) if search_dir_inner >= 0 msg = "The search direction η might not be a descent direction, since ⟨η, grad_f(p)⟩ ≥ 0." end i = 0 - while fNew < f0 + decrease * s * search_dir_inner # increase + while f_q < f0 + decrease * s * search_dir_inner # increase i = i + 1 s = s / contract - retract!(M, p_new, p, η, s, retr) - fNew = f(M, p_new) + retract!(M, q, p, η, s, retraction_method) + f_q = f(M, q) if i == stop_increasing_at_step (length(msg) > 0) && (msg = "$msg\n") msg = "$(msg)Max increase steps ($(stop_increasing_at_step)) reached" @@ -390,11 +421,11 @@ function linesearch_backtrack( end end i = 0 - while fNew > f0 + decrease * s * search_dir_inner # decrease + while f_q > f0 + decrease * s * search_dir_inner # decrease i = i + 1 s = contract * s - retract!(M, p_new, p, η, s, retr) - fNew = f(M, p_new) + retract!(M, q, p, η, s, retraction_method) + f_q = f(M, q) if i == stop_decreasing_at_step (length(msg) > 0) && (msg = "$msg\n") msg = "$(msg)Max decrease steps ($(stop_decreasing_at_step)) reached" @@ -478,6 +509,10 @@ and ``γ`` is the sufficient decrease parameter ``∈(0,1)``. We can then find t * `sufficient_decrease` – (`1e-4`) sufficient decrease parameter contained in the interval (0,1) * `vector_transport_method` – (`ParallelTransport()`) the vector transport method to use +as well as for internal use + +* `candidate_point` (`allocate_result(M, rand)`) to store an interims result + Furthermore the following fields act as safeguards * `stop_when_stepsize_less - (`0.0`) smallest stepsize when to stop (the last one before is taken) @@ -492,6 +527,8 @@ Pass `:Messages` to a `debug=` to see `@info`s when these happen. NonmonotoneLinesearch() with the Fields above in their order as optional arguments (deprecated). +THis is deprecated, since both defaults above and the memory allocation for the candidate +would be for the default manifold. NonmonotoneLinesearch(M) @@ -505,43 +542,44 @@ mutable struct NonmonotoneLinesearch{ VTM<:AbstractVectorTransportMethod, T<:AbstractVector, TSSA<:StoreStateAction, + P, } <: Linesearch - retraction_method::TRM - vector_transport_method::VTM - stepsize_reduction::Float64 - sufficient_decrease::Float64 bb_min_stepsize::Float64 bb_max_stepsize::Float64 + candiate_point::P initial_stepsize::Float64 + message::String old_costs::T - strategy::Symbol - storage::TSSA - stop_when_stepsize_less::Float64 - stop_when_stepsize_exceeds::Float64 - stop_increasing_at_step::Int + retraction_method::TRM + stepsize_reduction::Float64 stop_decreasing_at_step::Int - message::String + stop_increasing_at_step::Int + stop_when_stepsize_exceeds::Float64 + stop_when_stepsize_less::Float64 + storage::TSSA + strategy::Symbol + sufficient_decrease::Float64 + vector_transport_method::VTM function NonmonotoneLinesearch( M::AbstractManifold=DefaultManifold(); - initial_stepsize::Float64=1.0, - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - vector_transport_method::AbstractVectorTransportMethod=default_vector_transport_method( - M - ), - stepsize_reduction::Float64=0.5, - sufficient_decrease::Float64=1e-4, - memory_size::Int=10, bb_min_stepsize::Float64=1e-3, bb_max_stepsize::Float64=1e3, - strategy::Symbol=:direct, + candidate_point::P=allocate_result(M, rand), + initial_stepsize::Float64=1.0, + memory_size::Int=10, + retraction_method::TRM=default_retraction_method(M), + stepsize_reduction::Float64=0.5, + stop_when_stepsize_less::Float64=0.0, + stop_when_stepsize_exceeds::Float64=max_stepsize(M), + stop_increasing_at_step::Int=100, + stop_decreasing_at_step::Int=1000, storage::Union{Nothing,StoreStateAction}=StoreStateAction( M; store_fields=[:Iterate, :Gradient] ), - stop_when_stepsize_less::Float64=0.0, - stop_when_stepsize_exceeds::Float64=max_stepsize(M), - stop_increasing_at_step=100, - stop_decreasing_at_step=1000, - ) + strategy::Symbol=:direct, + sufficient_decrease::Float64=1e-4, + vector_transport_method::VTM=default_vector_transport_method(M), + ) where {TRM,VTM,P} if strategy ∉ [:direct, :inverse, :alternating] @warn string( "The strategy '", @@ -569,27 +607,23 @@ mutable struct NonmonotoneLinesearch{ if memory_size <= 0 throw(DomainError(memory_size, "The memory_size has to be greater than zero.")) end - return new{ - typeof(retraction_method), - typeof(vector_transport_method), - Vector{Float64}, - typeof(storage), - }( - retraction_method, - vector_transport_method, - stepsize_reduction, - sufficient_decrease, + return new{TRM,VTM,Vector{Float64},typeof(storage),P}( bb_min_stepsize, bb_max_stepsize, + candidate_point, initial_stepsize, + "", zeros(memory_size), - strategy, - storage, - stop_when_stepsize_less, - stop_when_stepsize_exceeds, - stop_increasing_at_step, + retraction_method, + stepsize_reduction, stop_decreasing_at_step, - "", + stop_increasing_at_step, + stop_when_stepsize_exceeds, + stop_when_stepsize_less, + storage, + strategy, + sufficient_decrease, + vector_transport_method, ) end end @@ -622,21 +656,20 @@ function (a::NonmonotoneLinesearch)( ) end function (a::NonmonotoneLinesearch)( - M::mT, x, F::TF, gradFx::T, η::T, old_x, old_gradient, iter::Int; kwargs... + M::mT, p, f::TF, X::T, η::T, old_p, old_X, iter::Int; kwargs... ) where {mT<:AbstractManifold,TF,T} #find the difference between the current and previous gradient after the previous gradient is transported to the current tangent space - grad_diff = - gradFx - vector_transport_to(M, old_x, old_gradient, x, a.vector_transport_method) + grad_diff = X - vector_transport_to(M, old_p, old_X, p, a.vector_transport_method) #transport the previous step into the tangent space of the current manifold point x_diff = -a.initial_stepsize * - vector_transport_to(M, old_x, old_gradient, x, a.vector_transport_method) + vector_transport_to(M, old_p, old_X, p, a.vector_transport_method) #compute the new Barzilai-Borwein step size - s1 = real(inner(M, x, x_diff, grad_diff)) - s2 = real(inner(M, x, grad_diff, grad_diff)) + s1 = real(inner(M, p, x_diff, grad_diff)) + s2 = real(inner(M, p, grad_diff, grad_diff)) s2 = s2 == 0 ? 1.0 : s2 - s3 = real(inner(M, x, x_diff, x_diff)) + s3 = real(inner(M, p, x_diff, x_diff)) #indirect strategy if a.strategy == :inverse if s1 > 0 @@ -674,26 +707,27 @@ function (a::NonmonotoneLinesearch)( memory_size = length(a.old_costs) if iter <= memory_size - a.old_costs[iter] = F(M, x) + a.old_costs[iter] = f(M, p) else a.old_costs[1:(memory_size - 1)] = a.old_costs[2:memory_size] - a.old_costs[memory_size] = F(M, x) + a.old_costs[memory_size] = f(M, p) end #compute the new step size with the help of the Barzilai-Borwein step size - (a.initial_stepsize, a.message) = linesearch_backtrack( + (a.initial_stepsize, a.message) = linesearch_backtrack!( M, - F, - x, - gradFx, + a.candiate_point, + f, + p, + X, BarzilaiBorwein_stepsize, a.sufficient_decrease, a.stepsize_reduction, - a.retraction_method, η, maximum([a.old_costs[j] for j in 1:min(iter, memory_size)]); - stop_when_stepsize_less=a.stop_when_stepsize_less / norm(M, x, η), - stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / norm(M, x, η), + retraction_method=a.retraction_method, + stop_when_stepsize_less=a.stop_when_stepsize_less / norm(M, p, η), + stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / norm(M, p, η), stop_increasing_at_step=a.stop_increasing_at_step, stop_decreasing_at_step=a.stop_decreasing_at_step, ) @@ -737,40 +771,65 @@ There exist two constructors, where, when prodivind the manifold `M` as a first parameter, its default retraction and vector transport are the default. In this case the retraction and the vector transport are also keyword arguments for ease of use. The other constructor is kept for backward compatibility. -Note that the `linesearch_stopsize` to stop for too small stepsizes is only available in the +Note that the `stop_when_stepsize_less` to stop for too small stepsizes is only available in the new signature including `M`. - WolfePowellLinesearch( - M, - c1::Float64=10^(-4), - c2::Float64=0.999; - retraction_method = default_retraction_method(M), - vector_transport_method = default_vector_transport(M), - linesearch_stopsize = 0.0 - ) + WolfePowellLinesearch(M, c1::Float64=10^(-4), c2::Float64=0.999; kwargs... + +Generate a Wolfe-Powell linesearch + +## Keyword Arguments + +* `candidate_point` (`allocate_result(M, rand)`) memory for an internims candidate +* `candidate_tangent` (`allocate_result(M, zero_vector, candidate_point)`) memory for a gradient +* `candidate_direcntion` (`allocate_result(M, zero_vector, candidate_point)`) memory for a direction +* `max_stepsize` ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize allowed here. +* `retraction_method` (`ExponentialRetraction()`) the retraction to use +* `stop_when_stepsize_less` (`0.0`) smallest stepsize when to stop (the last one before is taken) +* `vector_transport_method` (`ParallelTransport()`) the vector transport method to use """ mutable struct WolfePowellLinesearch{ - TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod + TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod,P,T } <: Linesearch - retraction_method::TRM - vector_transport_method::VTM c1::Float64 c2::Float64 + candidate_direction::T + candidate_point::P + candidate_tangent::T last_stepsize::Float64 - linesearch_stopsize::Float64 + max_stepsize::Float64 + retraction_method::TRM + stop_when_stepsize_less::Float64 + vector_transport_method::VTM function WolfePowellLinesearch( M::AbstractManifold=DefaultManifold(), c1::Float64=10^(-4), c2::Float64=0.999; - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - vector_transport_method::AbstractVectorTransportMethod=default_vector_transport_method( - M - ), - linesearch_stopsize::Float64=0.0, - ) - return new{typeof(retraction_method),typeof(vector_transport_method)}( - retraction_method, vector_transport_method, c1, c2, 0.0, linesearch_stopsize + candidate_point::P=allocate_result(M, rand), + candidate_tangent::T=allocate_result(M, zero_vector, candidate_point), + candidate_direction::T=allocate_result(M, zero_vector, candidate_point), + max_stepsize::Real=max_stepsize(M, candidate_point), + retraction_method::TRM=default_retraction_method(M), + vector_transport_method::VTM=default_vector_transport_method(M), + linesearch_stopsize::Float64=0.0, # deprecated remove on next breaking change + stop_when_stepsize_less::Float64=linesearch_stopsize, # + ) where {TRM,VTM,P,T} + (linesearch_stopsize > 0.0) && Base.depwarn( + WolfePowellLinesearch, + "`linesearch_backtrack` is deprecated – use `stop_when_stepsize_less` instead´.", + ) + return new{TRM,VTM,P,T}( + c1, + c2, + candidate_direction, + candidate_point, + candidate_tangent, + 0.0, + max_stepsize, + retraction_method, + stop_when_stepsize_less, + vector_transport_method, ) end end @@ -781,65 +840,70 @@ function (a::WolfePowellLinesearch)( η=-get_gradient(mp, get_iterate(ams)); kwargs..., ) + # For readability extract a few variables M = get_manifold(mp) - cur_p = get_iterate(ams) - grad_norm = norm(M, cur_p, η) - max_step = max_stepsize(M, cur_p) - # max_step_increase is the upper limit for s_plus - max_step_increase = ifelse(isfinite(max_step), min(1e9, max_step / grad_norm), 1e9) - step = ifelse(isfinite(max_step), min(1.0, max_step / grad_norm), 1.0) + p = get_iterate(ams) + X = get_gradient(ams) + l = real(inner(M, p, η, X)) + grad_norm = norm(M, p, η) + max_step_increase = ifelse( + isfinite(a.max_stepsize), min(1e9, a.max_stepsize / grad_norm), 1e9 + ) + step = ifelse(isfinite(a.max_stepsize), min(1.0, a.max_stepsize / grad_norm), 1.0) s_plus = step s_minus = step - f0 = get_cost(mp, cur_p) - p_new = retract(M, cur_p, η, step, a.retraction_method) - fNew = get_cost(mp, p_new) - η_xNew = vector_transport_to(M, cur_p, η, p_new, a.vector_transport_method) - if fNew > f0 + a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) - while ( - fNew > f0 + a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) - ) && (s_minus > 10^(-9)) # decrease + f0 = get_cost(mp, p) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) + vector_transport_to!( + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method + ) + if fNew > f0 + a.c1 * step * l + while (fNew > f0 + a.c1 * step * l) && (s_minus > 10^(-9)) # decrease s_minus = s_minus * 0.5 step = s_minus - retract!(M, p_new, get_iterate(ams), η, step, a.retraction_method) - fNew = get_cost(mp, p_new) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) end s_plus = 2.0 * s_minus else vector_transport_to!( - M, η_xNew, get_iterate(ams), η, p_new, a.vector_transport_method + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method ) - if real(inner(M, p_new, get_gradient(mp, p_new), η_xNew)) < - a.c2 * real(inner(M, get_iterate(ams), η, get_gradient(ams))) - while fNew <= - f0 + - a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) && - (s_plus < max_step_increase)# increase + get_gradient!(mp, a.candidate_tangent, a.candidate_point) + if real(inner(M, a.candidate_point, a.candidate_tangent, a.candidate_direction)) < + a.c2 * l + while fNew <= f0 + a.c1 * step * l && (s_plus < max_step_increase)# increase s_plus = s_plus * 2.0 step = s_plus - retract!(M, p_new, get_iterate(ams), η, step, a.retraction_method) - fNew = get_cost(mp, p_new) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) end s_minus = s_plus / 2.0 end end - retract!(M, p_new, get_iterate(ams), η, s_minus, a.retraction_method) - vector_transport_to!(M, η_xNew, get_iterate(ams), η, p_new, a.vector_transport_method) - while real(inner(M, p_new, get_gradient(mp, p_new), η_xNew)) < - a.c2 * real(inner(M, get_iterate(ams), η, get_gradient(ams))) + retract!(M, a.candidate_point, p, η, s_minus, a.retraction_method) + vector_transport_to!( + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method + ) + get_gradient!(mp, a.candidate_tangent, a.candidate_point) + while real(inner(M, a.candidate_point, a.candidate_tangent, a.candidate_direction)) < + a.c2 * l step = (s_minus + s_plus) / 2 - retract!(M, p_new, get_iterate(ams), η, step, a.retraction_method) - fNew = get_cost(mp, p_new) - if fNew <= f0 + a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) + if fNew <= f0 + a.c1 * step * l s_minus = step else s_plus = step end - abs(s_plus - s_minus) <= a.linesearch_stopsize && break - retract!(M, p_new, get_iterate(ams), η, s_minus, a.retraction_method) + abs(s_plus - s_minus) <= a.stop_when_stepsize_less && break + retract!(M, a.candidate_point, p, η, s_minus, a.retraction_method) vector_transport_to!( - M, η_xNew, get_iterate(ams), η, p_new, a.vector_transport_method + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method ) + get_gradient!(mp, a.candidate_tangent, a.candidate_point) end step = s_minus a.last_stepsize = step @@ -1034,7 +1098,7 @@ as well as the internal fields # Constructor - AdaptiveWNGrad(M=DefaultManifold, grad_f=(M,p) -> zero_vector(M,rand(M)), p=rand(M); kwargs...) + AdaptiveWNGrad(M=DefaultManifold, grad_f=(M, p) -> zero_vector(M, rand(M)), p=rand(M); kwargs...) Where all above fields with defaults are keyword arguments. An additional keyword arguments diff --git a/src/plans/stopping_criterion.jl b/src/plans/stopping_criterion.jl index b2046d28df..1c29e54b36 100644 --- a/src/plans/stopping_criterion.jl +++ b/src/plans/stopping_criterion.jl @@ -667,7 +667,7 @@ end function (c::StopWhenAny)(p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) (i == 0) && (c.reason = "") # reset on init if any(subC -> subC(p, s, i), c.criteria) - c.reason = string([get_reason(subC) for subC in c.criteria]...) + c.reason = string((get_reason(subC) for subC in c.criteria)...) return true end return false diff --git a/src/solvers/alternating_gradient_descent.jl b/src/solvers/alternating_gradient_descent.jl index fcc1b3d69f..8a533be741 100644 --- a/src/solvers/alternating_gradient_descent.jl +++ b/src/solvers/alternating_gradient_descent.jl @@ -129,15 +129,16 @@ function (a::ArmijoLinesearch)( M = get_manifold(amp) X = zero_vector(M, agds.p) get_gradient!(amp, X[M, agds.order[agds.k]], agds.p, agds.order[agds.k]) - (a.last_stepsize, a.message) = linesearch_backtrack( + (a.last_stepsize, a.message) = linesearch_backtrack!( M, + a.candidate_point, (M, p) -> get_cost(amp, p), agds.p, X, a.last_stepsize, a.sufficient_decrease, - a.contraction_factor, - a.retraction_method, + a.contraction_factor; + retraction_method=a.retraction_method, ) return a.last_stepsize end diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index 6019b2d5de..d80783b105 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -5,13 +5,20 @@ These Quasi Newton [`AbstractManoptSolverState`](@ref) represent any quasi-Newto used with any update rule for the direction. # Fields -* `p` – the current iterate, a point on a manifold -* `X` – the current gradient -* `sk` – the current step -* `yk` the current gradient difference -* `direction_update` - an [`AbstractQuasiNewtonDirectionUpdate`](@ref) rule. -* `retraction_method` – an `AbstractRetractionMethod` -* `stop` – a [`StoppingCriterion`](@ref) + +* `p` the current iterate, a point on a manifold +* `X` the current gradient +* `sk` the current step +* `yk` the current gradient difference +* `direction_update` an [`AbstractQuasiNewtonDirectionUpdate`](@ref) rule. +* `retraction_method` an `AbstractRetractionMethod` +* `stop` a [`StoppingCriterion`](@ref) + +as well as for internal use + +* `p_old` the last iterate +* `η` the current update direction +* `X_old` the last gradient # Constructor @@ -43,6 +50,7 @@ mutable struct QuasiNewtonState{ } <: AbstractGradientSolverState p::P p_old::P + η::T X::T sk::T yk::T @@ -82,6 +90,7 @@ function QuasiNewtonState( return QuasiNewtonState{P,typeof(sk_init),D,SC,typeof(stepsize),RM,VTM}( p, copy(M, p), + copy(M, p, initial_vector), initial_vector, sk_init, copy(M, sk_init), @@ -150,6 +159,7 @@ Perform a quasi Newton iteration for `f` on the manifold `M` starting in the point `p`. The ``k``th iteration consists of + 1. Compute the search direction ``η_k = -\mathcal{B}_k [\operatorname{grad}f (p_k)]`` or solve ``\mathcal{H}_k [η_k] = -\operatorname{grad}f (p_k)]``. 2. Determine a suitable stepsize ``α_k`` along the curve ``\gamma(α) = R_{p_k}(α η_k)`` e.g. by using [`WolfePowellLinesearch`](@ref). 3. Compute `p_{k+1} = R_{p_k}(α_k η_k)``. @@ -157,37 +167,38 @@ The ``k``th iteration consists of 5. Compute the new approximate Hessian ``H_{k+1}`` or its inverse ``B_k``. # Input -* `M` – a manifold ``\mathcal{M}``. -* `f` – a cost function ``F : \mathcal{M} →ℝ`` to minimize. -* `grad_f`– the gradient ``\operatorname{grad}F : \mathcal{M} →T_x\mathcal M`` of ``F``. -* `p` – an initial value ``p ∈ \mathcal{M}``. + +* `M` a manifold ``\mathcal{M}``. +* `f` a cost function ``F : \mathcal{M} →ℝ`` to minimize. +* `grad_f` the gradient ``\operatorname{grad}F : \mathcal{M} →T_x\mathcal M`` of ``F``. +* `p` an initial value ``p ∈ \mathcal{M}``. # Optional -* `basis` – (`DefaultOrthonormalBasis()`) basis within the tangent space(s) to represent the - Hessian (inverse). -* `cautious_update` – (`false`) – whether or not to use + +* `basis` (`DefaultOrthonormalBasis()`) basis within the tangent space(s) + to represent the Hessian (inverse). +* `cautious_update` (`false`) – whether or not to use a [`QuasiNewtonCautiousDirectionUpdate`](@ref) -* `cautious_function` – (`(x) -> x*10^(-4)`) – a monotone increasing function that is zero +* `cautious_function` (`(x) -> x*10^(-4)`) – a monotone increasing function that is zero at 0 and strictly increasing at 0 for the cautious update. -* `direction_update` – ([`InverseBFGS`](@ref)`()`) the update rule to use. -* `evaluation` – ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by +* `direction_update` ([`InverseBFGS`](@ref)`()`) the update rule to use. +* `evaluation` ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `gradF(M, x)` or [`InplaceEvaluation`](@ref) in place, i.e. is of the form `gradF!(M, X, x)`. -* `initial_operator` – (`Matrix{Float64}(I,n,n)`) initial matrix to use die the +* `initial_operator` (`Matrix{Float64}(I,n,n)`) initial matrix to use die the approximation, where `n=manifold_dimension(M)`, see also `scale_initial_operator`. -* `memory_size` – (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative +* `memory_size` (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative value to use a full memory representation -* `retraction_method` – (`default_retraction_method(M, typeof(p))`) a retraction method to use, by default - the exponential map. -* `scale_initial_operator` - (`true`) scale initial operator with +* `retraction_method` (`default_retraction_method(M, typeof(p))`) a retraction method to use +* `scale_initial_operator` (`true`) scale initial operator with ``\frac{⟨s_k,y_k⟩_{p_k}}{\lVert y_k\rVert_{p_k}}`` in the computation -* `stabilize` – (`true`) stabilize the method numerically by projecting computed (Newton-) +* `stabilize` (`true`) stabilize the method numerically by projecting computed (Newton-) directions to the tangent space to reduce numerical errors -* `stepsize` – ([`WolfePowellLinesearch`](@ref)`(retraction_method, vector_transport_method)`) +* `stepsize` ([`WolfePowellLinesearch`](@ref)`(retraction_method, vector_transport_method)`) specify a [`Stepsize`](@ref). -* `stopping_criterion` - ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) +* `stopping_criterion` ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) specify a [`StoppingCriterion`](@ref) -* `vector_transport_method` – (`default_vector_transport_method(M, typeof(p))`) a vector transport to use. +* `vector_transport_method` (`default_vector_transport_method(M, typeof(p))`) a vector transport to use. # Output @@ -233,10 +244,10 @@ Perform a quasi Newton iteration for `F` on the manifold `M` starting in the point `x` using a retraction ``R`` and a vector transport ``T``. # Input -* `M` – a manifold ``\mathcal{M}``. -* `F` – a cost function ``F: \mathcal{M} →ℝ`` to minimize. -* `gradF`– the gradient ``\operatorname{grad}F : \mathcal{M} → T_x\mathcal M`` of ``F`` implemented as `gradF(M,p)`. -* `x` – an initial value ``x ∈ \mathcal{M}``. +* `M` a manifold ``\mathcal{M}``. +* `F` a cost function ``F: \mathcal{M} →ℝ`` to minimize. +* `gradF` the gradient ``\operatorname{grad}F : \mathcal{M} → T_x\mathcal M`` of ``F`` implemented as `gradF(M,p)`. +* `x` an initial value ``x ∈ \mathcal{M}``. For all optional parameters, see [`quasi_Newton`](@ref). """ @@ -333,16 +344,21 @@ end function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) M = get_manifold(mp) get_gradient!(mp, qns.X, qns.p) - η = qns.direction_update(mp, qns) - α = qns.stepsize(mp, qns, iter, η) + qns.direction_update(qns.η, mp, qns) + α = qns.stepsize(mp, qns, iter, qns.η) copyto!(M, qns.p_old, get_iterate(qns)) - αη = α * η - retract!(M, qns.p, qns.p, η, α, qns.retraction_method) + retract!(M, qns.p, qns.p, qns.η, α, qns.retraction_method) + qns.η .*= α β = locking_condition_scale( - M, qns.direction_update, qns.p_old, αη, qns.p, qns.vector_transport_method + M, qns.direction_update, qns.p_old, qns.η, qns.p, qns.vector_transport_method ) vector_transport_to!( - M, qns.sk, qns.p_old, αη, qns.p, get_update_vector_transport(qns.direction_update) + M, + qns.sk, + qns.p_old, + qns.η, + qns.p, + get_update_vector_transport(qns.direction_update), ) vector_transport_to!( M, qns.X, qns.p_old, qns.X, qns.p, get_update_vector_transport(qns.direction_update) diff --git a/test/plans/test_stepsize.jl b/test/plans/test_stepsize.jl index 87ba49a81e..c4209881f2 100644 --- a/test/plans/test_stepsize.jl +++ b/test/plans/test_stepsize.jl @@ -36,7 +36,15 @@ using Manopt, Manifolds, Test ) @test startswith(s1[2], "Max decrease") s2 = Manopt.linesearch_backtrack( - M, f, p, grad_f(M, p), 1.0, 1.0, 0.5, ExponentialRetraction(), grad_f(M, p); + M, + f, + p, + grad_f(M, p), + 1.0, + 1.0, + 0.5, + grad_f(M, p); + retraction_method=ExponentialRetraction(), ) @test startswith(s2[2], "The search direction") s3 = Manopt.linesearch_backtrack( diff --git a/test/solvers/test_quasi_Newton.jl b/test/solvers/test_quasi_Newton.jl index a949a6fd72..97f0e96a22 100644 --- a/test/solvers/test_quasi_Newton.jl +++ b/test/solvers/test_quasi_Newton.jl @@ -37,10 +37,16 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal stopping_criterion=StopWhenGradientNormLess(10^(-6)), return_state=true, ) + # Check newton update direction works also allocating + dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f)) + p_star = get_solver_result(lrbfgs_s) + D = zero_vector(M, p_star) + lrbfgs_s.direction_update(D, dmp, lrbfgs_s) + @test isapprox(M, p_star, D, lrbfgs_s.direction_update(dmp, lrbfgs_s)) + @test startswith( repr(lrbfgs_s), "# Solver state for `Manopt.jl`s Quasi Newton Method\n" ) - dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f)) @test get_last_stepsize(dmp, lrbfgs_s, lrbfgs_s.stepsize) > 0 @test Manopt.get_iterate(lrbfgs_s) == x_lrbfgs set_gradient!(lrbfgs_s, M, p, grad_f(M, p)) @@ -68,14 +74,21 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal ) @test isapprox(M, x_lrbfgs_cached_2, x_lrbfgs; atol=1e-5) - x_clrbfgs = quasi_Newton( + clrbfgs_s = quasi_Newton( M, f, grad_f, p; cautious_update=true, stopping_criterion=StopWhenGradientNormLess(10^(-6)), + return_state=true, ) + # Test direction passthrough + x_clrbfgs = get_solver_result(clrbfgs_s) + D = zero_vector(M, x_clrbfgs) + clrbfgs_s.direction_update(D, dmp, clrbfgs_s) + @test isapprox(M, x_clrbfgs, D, clrbfgs_s.direction_update(dmp, clrbfgs_s)) + @test norm(x_clrbfgs - x_solution) ≈ 0 atol = 10.0^(-14) x_rbfgs_Huang = quasi_Newton( @@ -95,7 +108,7 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal for T in [InverseBFGS(), BFGS(), InverseDFP(), DFP(), InverseSR1(), SR1()] for c in [true, false] - x_direction = quasi_Newton( + x_state = quasi_Newton( M, f, grad_f, @@ -104,7 +117,12 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal cautious_update=c, memory_size=-1, stopping_criterion=StopWhenGradientNormLess(10^(-12)), + return_state=true, ) + x_direction = get_solver_result(x_state) + D = zero_vector(M, x_direction) + x_state.direction_update(D, dmp, x_state) + @test isapprox(M, x_direction, D, x_state.direction_update(dmp, x_state)) @test norm(x_direction - x_solution) ≈ 0 atol = 10.0^(-14) end end