From 2810598827507c0a57e1ecff89783121ce189673 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 16 Dec 2023 10:03:08 +0100 Subject: [PATCH 01/18] Resolving #333 point 6, linesearch_backtrack! can now be used and is used in the previous places. --- src/plans/stepsize.jl | 265 +++++++++++--------- src/solvers/alternating_gradient_descent.jl | 7 +- test/plans/test_stepsize.jl | 2 +- 3 files changed, 155 insertions(+), 119 deletions(-) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index 27d51d675f..58dc65099b 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -205,15 +205,18 @@ last step size. # Fields -* `initial_stepsize` – (`1.0`) and initial step size -* `retraction_method` – (`default_retraction_method(M)`) the retraction to use -* `contraction_factor` – (`0.95`) exponent for line search reduction -* `sufficient_decrease` – (`0.1`) gain within Armijo's rule -* `last_stepsize` – (`initialstepsize`) the last step size we start the search with -* `initial_guess` - (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, +* `initial_stepsize` (`1.0`) and initial step size +* `retraction_method` (`default_retraction_method(M)`) the retraction to use +* `contraction_factor` (`0.95`) exponent for line search reduction +* `sufficient_decrease` (`0.1`) gain within Armijo's rule +* `last_stepsize` (`initialstepsize`) the last step size we start the search with +* `initial_guess` (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, [`AbstractManoptSolverState`](@ref) `s` and a current iterate `i` and a last step size `l`, this returns an initial guess. The default uses the last obtained stepsize +as well as for internal use +* `candidate_point` (`rand(M)`) + Furthermore the following fields act as safeguards * `stop_when_stepsize_less` - (`0.0`) smallest stepsize when to stop (the last one before is taken) @@ -229,50 +232,54 @@ Pass `:Messages` to a `debug=` to see `@info`s when these happen. with the Fields above as keyword arguments and the retraction is set to the default retraction on `M`. -The constructors return the functor to perform Armijo line search, where two interfaces are available: -* based on a tuple `(amp, ams, i)` of a [`AbstractManoptProblem`](@ref) `amp`, [`AbstractManoptSolverState`](@ref) `ams` - and a current iterate `i`. -* with `(M, x, F, gradFx[,η=-gradFx]) -> s` where `M`, a current - point `x` a function `F`, that maps from the manifold to the reals, - its gradient (a tangent vector) `gradFx```=\operatorname{grad}F(x)`` at `x` and an optional - search direction tangent vector `η=-gradFx` are the arguments. +The constructors return the functor to perform Armijo line search, where + + (a::ArmijoLinesearch)(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) + +of a [`AbstractManoptProblem`](@ref) `amp`, [`AbstractManoptSolverState`](@ref) `ams` and a current iterate `i` +with keywords + * `p` (`a.candidate_point`) to pass memory for the candidate, which in the end also stores the resulting point, + * `η` (`-get_gradient(mp, get_iterate(s));`) the search direction to use, by default the steepest descent direction. """ -mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,F} <: Linesearch +mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,F} <: Linesearch + candidate_point::P + contraction_factor::Float64 + initial_guess::F initial_stepsize::Float64 + last_stepsize::Float64 + message::String retraction_method::TRM - contraction_factor::Float64 sufficient_decrease::Float64 - last_stepsize::Float64 stop_when_stepsize_less::Float64 stop_when_stepsize_exceeds::Float64 stop_increasing_at_step::Int stop_decreasing_at_step::Int - initial_guess::F - message::String function ArmijoLinesearch( M::AbstractManifold=DefaultManifold(); - initial_stepsize::Float64=1.0, - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - contraction_factor::Float64=0.95, - sufficient_decrease::Float64=0.1, + candidate_point::P=rand(M), + contraction_factor=0.95, + initial_stepsize=1.0, + initial_guess=armijo_initial_guess, + retraction_method::TRM=default_retraction_method(M), stop_when_stepsize_less=0.0, stop_when_stepsize_exceeds=max_stepsize(M), - stop_increasing_at_step=100, - stop_decreasing_at_step=1000, - initial_guess=armijo_initial_guess, - ) - return new{typeof(retraction_method),typeof(initial_guess)}( + stop_increasing_at_step::Int=100, + stop_decreasing_at_step::Int=1000, + sufficient_decrease=0.1, + ) where {TRM, P} + return new{TRM, P, typeof(initial_guess)}( + candidate_point, + contraction_factor, + initial_guess, + initial_stepsize, initial_stepsize, + "", # initilize an empty message retraction_method, - contraction_factor, sufficient_decrease, - initial_stepsize, stop_when_stepsize_less, stop_when_stepsize_exceeds, stop_increasing_at_step, stop_decreasing_at_step, - initial_guess, - "", ) end end @@ -280,21 +287,23 @@ function (a::ArmijoLinesearch)( mp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int, + p=a.candidate_point, η=-get_gradient(mp, get_iterate(s)); kwargs..., ) p = get_iterate(s) X = get_gradient!(mp, get_gradient(s), p) - (a.last_stepsize, a.message) = linesearch_backtrack( + (a.last_stepsize, a.message) = linesearch_backtrack!( get_manifold(mp), + a.candidate_point, (M, p) -> get_cost_function(get_objective(mp))(M, p), p, X, a.initial_guess(mp, s, i, a.last_stepsize), a.sufficient_decrease, a.contraction_factor, - a.retraction_method, η; + retraction_method=a.retraction_method, stop_when_stepsize_less=a.stop_when_stepsize_less / norm(get_manifold(mp), p, η), stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / norm(get_manifold(mp), p, η), @@ -321,28 +330,33 @@ end get_message(a::ArmijoLinesearch) = a.message @doc raw""" - (s, msg) = linesearch_backtrack( - M, F, x, gradFx, s, decrease, contract, retr, η = -gradFx, f0 = F(x); - stop_when_stepsize_less=0.0, - stop_when_stepsize_exceeds=max_stepsize(M, p), - stop_increasing_at_step = 100, - stop_decreasing_at_step = 1000, - ) - -perform a linesearch for -* a manifold `M` -* a cost function `f`, -* an iterate `p` -* the gradient ``\operatorname{grad}F(x)`` -* an initial stepsize `s` usually called ``γ`` + (s, msg) = linesearch_backtrack(M, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + (s, msg) = linesearch_backtrack(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + +perform a linesearch +* on manifold `M` +* for the cost function `f`, +* at the current point `p` +* with current gradient providedd in `X` +* an initial stepsize `s` * a sufficient `decrease` * a `contract`ion factor ``σ`` * a `retr`action, which defaults to the `default_retraction_method(M)` -* a search direction ``η = -\operatorname{grad}F(x)`` +* a search direction ``η = -X`` * an offset, ``f_0 = F(x)`` -And use the 4 keywords to limit the maximal increase and decrease steps as well as -a maximal stepsize (especially on non-Hadamard manifolds) and a minimal one. +the method can also be performed in-place of `q`, that is the resulting best point one reaches +with the step size `s` as second argument. + +## Keywords + +* `retraction_method` (`default_retraction_method(M)`) the retraction to use. +* `stop_when_stepsize_less (`0.0`) to avoid numerical underflow +* `stop_when_stepsize_exceeds` ([`max_stepsize`](@ref)`(M, p) / norm(M, p, η)`) to avoid leaving the injectivity radius on a manifold +* `stop_increasing_at_step` (`100`) stop the inicial increase of step size after these many steps +* `stop_decreasing_at_step` (`1000`) stop the decreasing search after these many steps + +These keywords are used as safeguards, where only the max stepsize is a very manifold specific one. # Return value @@ -350,33 +364,54 @@ A stepsize `s` and a message `msg` (in case any of the 4 criteria hit) """ function linesearch_backtrack( M::AbstractManifold, + f, + p, + X::T, + s, + decrease, + contract, + η::T=-X, + f0=f(M, p); kwargs... +) where {T} + q = copy(M, p) + return linesearch_backtrack!(M, q, f, p, X, s, decrease, contract, η, f0; kwargs...) +end + +""" + (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + +Perform a linesearch bachtrack in-place of `q`. For all details and options, see [`linesearch_backtrack`](@ref) +""" +function linesearch_backtrack!( + M::AbstractManifold, + q, f::TF, p, - grad_f_at_p::T, + X::T, s, decrease, contract, - retr::AbstractRetractionMethod=default_retraction_method(M), - η::T=-grad_f_at_p, + η::T=-X, f0=f(M, p); + retraction_method::AbstractRetractionMethod=default_retraction_method(M), stop_when_stepsize_less=0.0, stop_when_stepsize_exceeds=max_stepsize(M, p) / norm(M, p, η), stop_increasing_at_step=100, stop_decreasing_at_step=1000, ) where {TF,T} msg = "" - p_new = retract(M, p, η, s, retr) - fNew = f(M, p_new) - search_dir_inner = real(inner(M, p, η, grad_f_at_p)) + retract!(M, q, p, η, s, retraction_method) + f_q = f(M, q) + search_dir_inner = real(inner(M, p, η, X)) if search_dir_inner >= 0 msg = "The search direction η might not be a descent direction, since ⟨η, grad_f(p)⟩ ≥ 0." end i = 0 - while fNew < f0 + decrease * s * search_dir_inner # increase + while f_q < f0 + decrease * s * search_dir_inner # increase i = i + 1 s = s / contract - retract!(M, p_new, p, η, s, retr) - fNew = f(M, p_new) + retract!(M, q, p, η, s, retraction_method) + f_q = f(M, q) if i == stop_increasing_at_step (length(msg) > 0) && (msg = "$msg\n") msg = "$(msg)Max increase steps ($(stop_increasing_at_step)) reached" @@ -390,11 +425,11 @@ function linesearch_backtrack( end end i = 0 - while fNew > f0 + decrease * s * search_dir_inner # decrease + while f_q > f0 + decrease * s * search_dir_inner # decrease i = i + 1 s = contract * s - retract!(M, p_new, p, η, s, retr) - fNew = f(M, p_new) + retract!(M, q, p, η, s, retraction_method) + f_q = f(M, q) if i == stop_decreasing_at_step (length(msg) > 0) && (msg = "$msg\n") msg = "$(msg)Max decrease steps ($(stop_decreasing_at_step)) reached" @@ -505,43 +540,44 @@ mutable struct NonmonotoneLinesearch{ VTM<:AbstractVectorTransportMethod, T<:AbstractVector, TSSA<:StoreStateAction, + P, } <: Linesearch - retraction_method::TRM - vector_transport_method::VTM - stepsize_reduction::Float64 - sufficient_decrease::Float64 bb_min_stepsize::Float64 bb_max_stepsize::Float64 + candiate_point::P initial_stepsize::Float64 + message::String old_costs::T - strategy::Symbol - storage::TSSA - stop_when_stepsize_less::Float64 - stop_when_stepsize_exceeds::Float64 - stop_increasing_at_step::Int + retraction_method::TRM + stepsize_reduction::Float64 stop_decreasing_at_step::Int - message::String + stop_increasing_at_step::Int + stop_when_stepsize_exceeds::Float64 + stop_when_stepsize_less::Float64 + storage::TSSA + strategy::Symbol + sufficient_decrease::Float64 + vector_transport_method::VTM function NonmonotoneLinesearch( M::AbstractManifold=DefaultManifold(); - initial_stepsize::Float64=1.0, - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - vector_transport_method::AbstractVectorTransportMethod=default_vector_transport_method( - M - ), - stepsize_reduction::Float64=0.5, - sufficient_decrease::Float64=1e-4, - memory_size::Int=10, bb_min_stepsize::Float64=1e-3, bb_max_stepsize::Float64=1e3, - strategy::Symbol=:direct, + candidate_point::P=rand(M), + initial_stepsize::Float64=1.0, + memory_size::Int=10, + retraction_method::TRM=default_retraction_method(M), + stepsize_reduction::Float64=0.5, + stop_when_stepsize_less::Float64=0.0, + stop_when_stepsize_exceeds::Float64=max_stepsize(M), + stop_increasing_at_step::Int=100, + stop_decreasing_at_step::Int=1000, storage::Union{Nothing,StoreStateAction}=StoreStateAction( M; store_fields=[:Iterate, :Gradient] ), - stop_when_stepsize_less::Float64=0.0, - stop_when_stepsize_exceeds::Float64=max_stepsize(M), - stop_increasing_at_step=100, - stop_decreasing_at_step=1000, - ) + strategy::Symbol=:direct, + sufficient_decrease::Float64=1e-4, + vector_transport_method::VTM=default_vector_transport_method(M), + ) where {TRM, VTM, P} if strategy ∉ [:direct, :inverse, :alternating] @warn string( "The strategy '", @@ -570,26 +606,24 @@ mutable struct NonmonotoneLinesearch{ throw(DomainError(memory_size, "The memory_size has to be greater than zero.")) end return new{ - typeof(retraction_method), - typeof(vector_transport_method), - Vector{Float64}, - typeof(storage), + TRM, VTM, Vector{Float64}, typeof(storage), P, }( - retraction_method, - vector_transport_method, - stepsize_reduction, - sufficient_decrease, bb_min_stepsize, bb_max_stepsize, + candidate_point, initial_stepsize, + "", zeros(memory_size), - strategy, - storage, - stop_when_stepsize_less, - stop_when_stepsize_exceeds, - stop_increasing_at_step, + retraction_method, + stepsize_reduction, stop_decreasing_at_step, - "", + stop_increasing_at_step, + stop_when_stepsize_exceeds, + stop_when_stepsize_less, + storage, + strategy, + sufficient_decrease, + vector_transport_method, ) end end @@ -622,21 +656,21 @@ function (a::NonmonotoneLinesearch)( ) end function (a::NonmonotoneLinesearch)( - M::mT, x, F::TF, gradFx::T, η::T, old_x, old_gradient, iter::Int; kwargs... + M::mT, p, f::TF, X::T, η::T, old_p, old_X, iter::Int; kwargs... ) where {mT<:AbstractManifold,TF,T} #find the difference between the current and previous gradient after the previous gradient is transported to the current tangent space grad_diff = - gradFx - vector_transport_to(M, old_x, old_gradient, x, a.vector_transport_method) + X - vector_transport_to(M, old_p, old_X, p, a.vector_transport_method) #transport the previous step into the tangent space of the current manifold point x_diff = -a.initial_stepsize * - vector_transport_to(M, old_x, old_gradient, x, a.vector_transport_method) + vector_transport_to(M, old_p, old_X, p, a.vector_transport_method) #compute the new Barzilai-Borwein step size - s1 = real(inner(M, x, x_diff, grad_diff)) - s2 = real(inner(M, x, grad_diff, grad_diff)) + s1 = real(inner(M, p, x_diff, grad_diff)) + s2 = real(inner(M, p, grad_diff, grad_diff)) s2 = s2 == 0 ? 1.0 : s2 - s3 = real(inner(M, x, x_diff, x_diff)) + s3 = real(inner(M, p, x_diff, x_diff)) #indirect strategy if a.strategy == :inverse if s1 > 0 @@ -674,26 +708,27 @@ function (a::NonmonotoneLinesearch)( memory_size = length(a.old_costs) if iter <= memory_size - a.old_costs[iter] = F(M, x) + a.old_costs[iter] = f(M, p) else a.old_costs[1:(memory_size - 1)] = a.old_costs[2:memory_size] - a.old_costs[memory_size] = F(M, x) + a.old_costs[memory_size] = f(M, p) end #compute the new step size with the help of the Barzilai-Borwein step size - (a.initial_stepsize, a.message) = linesearch_backtrack( + (a.initial_stepsize, a.message) = linesearch_backtrack!( M, - F, - x, - gradFx, + a.candiate_point, + f, + p, + X, BarzilaiBorwein_stepsize, a.sufficient_decrease, a.stepsize_reduction, - a.retraction_method, η, maximum([a.old_costs[j] for j in 1:min(iter, memory_size)]); - stop_when_stepsize_less=a.stop_when_stepsize_less / norm(M, x, η), - stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / norm(M, x, η), + retraction_method=a.retraction_method, + stop_when_stepsize_less=a.stop_when_stepsize_less / norm(M, p, η), + stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / norm(M, p, η), stop_increasing_at_step=a.stop_increasing_at_step, stop_decreasing_at_step=a.stop_decreasing_at_step, ) diff --git a/src/solvers/alternating_gradient_descent.jl b/src/solvers/alternating_gradient_descent.jl index fcc1b3d69f..8a533be741 100644 --- a/src/solvers/alternating_gradient_descent.jl +++ b/src/solvers/alternating_gradient_descent.jl @@ -129,15 +129,16 @@ function (a::ArmijoLinesearch)( M = get_manifold(amp) X = zero_vector(M, agds.p) get_gradient!(amp, X[M, agds.order[agds.k]], agds.p, agds.order[agds.k]) - (a.last_stepsize, a.message) = linesearch_backtrack( + (a.last_stepsize, a.message) = linesearch_backtrack!( M, + a.candidate_point, (M, p) -> get_cost(amp, p), agds.p, X, a.last_stepsize, a.sufficient_decrease, - a.contraction_factor, - a.retraction_method, + a.contraction_factor; + retraction_method=a.retraction_method, ) return a.last_stepsize end diff --git a/test/plans/test_stepsize.jl b/test/plans/test_stepsize.jl index 87ba49a81e..56437a4951 100644 --- a/test/plans/test_stepsize.jl +++ b/test/plans/test_stepsize.jl @@ -36,7 +36,7 @@ using Manopt, Manifolds, Test ) @test startswith(s1[2], "Max decrease") s2 = Manopt.linesearch_backtrack( - M, f, p, grad_f(M, p), 1.0, 1.0, 0.5, ExponentialRetraction(), grad_f(M, p); + M, f, p, grad_f(M, p), 1.0, 1.0, 0.5, grad_f(M, p); retraction_method=ExponentialRetraction(), ) @test startswith(s2[2], "The search direction") s3 = Manopt.linesearch_backtrack( From d3812fb7f50c372784f7e8e11b08009c52131118 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 16 Dec 2023 11:02:39 +0100 Subject: [PATCH 02/18] Fix #333 point 1, 2, 5, and one further memory to use. --- src/plans/stepsize.jl | 165 ++++++++++++++++++++---------------- test/plans/test_stepsize.jl | 10 ++- 2 files changed, 101 insertions(+), 74 deletions(-) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index 58dc65099b..734b1e9be2 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -266,8 +266,8 @@ mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,F} <: Linesearch stop_increasing_at_step::Int=100, stop_decreasing_at_step::Int=1000, sufficient_decrease=0.1, - ) where {TRM, P} - return new{TRM, P, typeof(initial_guess)}( + ) where {TRM,P} + return new{TRM,P,typeof(initial_guess)}( candidate_point, contraction_factor, initial_guess, @@ -363,15 +363,7 @@ These keywords are used as safeguards, where only the max stepsize is a very man A stepsize `s` and a message `msg` (in case any of the 4 criteria hit) """ function linesearch_backtrack( - M::AbstractManifold, - f, - p, - X::T, - s, - decrease, - contract, - η::T=-X, - f0=f(M, p); kwargs... + M::AbstractManifold, f, p, X::T, s, decrease, contract, η::T=-X, f0=f(M, p); kwargs... ) where {T} q = copy(M, p) return linesearch_backtrack!(M, q, f, p, X, s, decrease, contract, η, f0; kwargs...) @@ -577,7 +569,7 @@ mutable struct NonmonotoneLinesearch{ strategy::Symbol=:direct, sufficient_decrease::Float64=1e-4, vector_transport_method::VTM=default_vector_transport_method(M), - ) where {TRM, VTM, P} + ) where {TRM,VTM,P} if strategy ∉ [:direct, :inverse, :alternating] @warn string( "The strategy '", @@ -605,9 +597,7 @@ mutable struct NonmonotoneLinesearch{ if memory_size <= 0 throw(DomainError(memory_size, "The memory_size has to be greater than zero.")) end - return new{ - TRM, VTM, Vector{Float64}, typeof(storage), P, - }( + return new{TRM,VTM,Vector{Float64},typeof(storage),P}( bb_min_stepsize, bb_max_stepsize, candidate_point, @@ -659,8 +649,7 @@ function (a::NonmonotoneLinesearch)( M::mT, p, f::TF, X::T, η::T, old_p, old_X, iter::Int; kwargs... ) where {mT<:AbstractManifold,TF,T} #find the difference between the current and previous gradient after the previous gradient is transported to the current tangent space - grad_diff = - X - vector_transport_to(M, old_p, old_X, p, a.vector_transport_method) + grad_diff = X - vector_transport_to(M, old_p, old_X, p, a.vector_transport_method) #transport the previous step into the tangent space of the current manifold point x_diff = -a.initial_stepsize * @@ -772,40 +761,65 @@ There exist two constructors, where, when prodivind the manifold `M` as a first parameter, its default retraction and vector transport are the default. In this case the retraction and the vector transport are also keyword arguments for ease of use. The other constructor is kept for backward compatibility. -Note that the `linesearch_stopsize` to stop for too small stepsizes is only available in the +Note that the `stop_when_stepsize_less` to stop for too small stepsizes is only available in the new signature including `M`. - WolfePowellLinesearch( - M, - c1::Float64=10^(-4), - c2::Float64=0.999; - retraction_method = default_retraction_method(M), - vector_transport_method = default_vector_transport(M), - linesearch_stopsize = 0.0 - ) + WolfePowellLinesearch(M, c1::Float64=10^(-4), c2::Float64=0.999; kwargs... + +Generate a Wolfe-Powell linesearch + +## Keyword Arguments + +* `candidate_point` (`rand(M)`) memory for an internims candidate +* `candidate_tangent` (`zero_vector(M; vector_at=candidate_point)`) memory for a gradient +* `candidate_direcntion` (`zero_vector(M; vector_at=candidate_point)`) memory for a direction +* `max_stepsize` ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize allowed here. +* `retraction_method` – (`ExponentialRetraction()`) the retraction to use +* `stop_when_stepsize_less` - (`0.0`) smallest stepsize when to stop (the last one before is taken) +* `vector_transport_method` – (`ParallelTransport()`) the vector transport method to use """ mutable struct WolfePowellLinesearch{ - TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod + TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod,P,T } <: Linesearch - retraction_method::TRM - vector_transport_method::VTM c1::Float64 c2::Float64 + candidate_direction::T + candidate_point::P + candidate_tangent::T last_stepsize::Float64 - linesearch_stopsize::Float64 + max_stepsize::Float64 + retraction_method::TRM + stop_when_stepsize_less::Float64 + vector_transport_method::VTM function WolfePowellLinesearch( M::AbstractManifold=DefaultManifold(), c1::Float64=10^(-4), c2::Float64=0.999; - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - vector_transport_method::AbstractVectorTransportMethod=default_vector_transport_method( - M - ), - linesearch_stopsize::Float64=0.0, - ) - return new{typeof(retraction_method),typeof(vector_transport_method)}( - retraction_method, vector_transport_method, c1, c2, 0.0, linesearch_stopsize + candidate_point::P=rand(M), + candidate_tangent::T=zero_vector(M, candidate_point), + candidate_direction::T=zero_vector(M, candidate_point), + max_stepsize=max_stepsize(M, candidate_point), + retraction_method::TRM=default_retraction_method(M), + vector_transport_method::VTM=default_vector_transport_method(M), + linesearch_stopsize::Float64=0.0, # deprecated remove on next breaking change + stop_when_stepsize_less=linesearch_stopsize, # + ) where {TRM,VTM,P,T} + (linesearch_stopsize > 0.0) && Base.depwarn( + WolfePowellLinesearch, + "`linesearch_backtrack` is deprecated – use `stop_when_stepsize_less` instead´.", + ) + return new{TRM,VTM,P,T}( + c1, + c2, + candidate_direction, + candidate_point, + candidate_tangent, + 0.0, + max_stepsize, + retraction_method, + stop_when_stepsize_less, + vector_transport_method, ) end end @@ -816,65 +830,70 @@ function (a::WolfePowellLinesearch)( η=-get_gradient(mp, get_iterate(ams)); kwargs..., ) + # For readability extract a few variables M = get_manifold(mp) - cur_p = get_iterate(ams) - grad_norm = norm(M, cur_p, η) - max_step = max_stepsize(M, cur_p) - # max_step_increase is the upper limit for s_plus - max_step_increase = ifelse(isfinite(max_step), min(1e9, max_step / grad_norm), 1e9) - step = ifelse(isfinite(max_step), min(1.0, max_step / grad_norm), 1.0) + p = get_iterate(ams) + X = get_gradient(ams) + l = real(inner(M, p, η, X)) + grad_norm = norm(M, p, η) + max_step_increase = ifelse( + isfinite(a.max_stepsize), min(1e9, a.max_stepsize / grad_norm), 1e9 + ) + step = ifelse(isfinite(a.max_stepsize), min(1.0, a.max_stepsize / grad_norm), 1.0) s_plus = step s_minus = step - f0 = get_cost(mp, cur_p) - p_new = retract(M, cur_p, η, step, a.retraction_method) - fNew = get_cost(mp, p_new) - η_xNew = vector_transport_to(M, cur_p, η, p_new, a.vector_transport_method) - if fNew > f0 + a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) - while ( - fNew > f0 + a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) - ) && (s_minus > 10^(-9)) # decrease + f0 = get_cost(mp, p) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) + vector_transport_to!( + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method + ) + if fNew > f0 + a.c1 * step * l + while (fNew > f0 + a.c1 * step * l) && (s_minus > 10^(-9)) # decrease s_minus = s_minus * 0.5 step = s_minus - retract!(M, p_new, get_iterate(ams), η, step, a.retraction_method) - fNew = get_cost(mp, p_new) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) end s_plus = 2.0 * s_minus else vector_transport_to!( - M, η_xNew, get_iterate(ams), η, p_new, a.vector_transport_method + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method ) - if real(inner(M, p_new, get_gradient(mp, p_new), η_xNew)) < - a.c2 * real(inner(M, get_iterate(ams), η, get_gradient(ams))) - while fNew <= - f0 + - a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) && - (s_plus < max_step_increase)# increase + get_gradient!(mp, a.candidate_tangent, a.candidate_point) + if real(inner(M, a.candidate_point, a.candidate_tangent, a.candidate_direction)) < + a.c2 * l + while fNew <= f0 + a.c1 * step * l && (s_plus < max_step_increase)# increase s_plus = s_plus * 2.0 step = s_plus - retract!(M, p_new, get_iterate(ams), η, step, a.retraction_method) - fNew = get_cost(mp, p_new) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) end s_minus = s_plus / 2.0 end end - retract!(M, p_new, get_iterate(ams), η, s_minus, a.retraction_method) - vector_transport_to!(M, η_xNew, get_iterate(ams), η, p_new, a.vector_transport_method) - while real(inner(M, p_new, get_gradient(mp, p_new), η_xNew)) < - a.c2 * real(inner(M, get_iterate(ams), η, get_gradient(ams))) + retract!(M, a.candidate_point, p, η, s_minus, a.retraction_method) + vector_transport_to!( + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method + ) + get_gradient!(mp, a.candidate_tangent, a.candidate_point) + while real(inner(M, a.candidate_point, a.candidate_tangent, a.candidate_direction)) < + a.c2 * l step = (s_minus + s_plus) / 2 - retract!(M, p_new, get_iterate(ams), η, step, a.retraction_method) - fNew = get_cost(mp, p_new) - if fNew <= f0 + a.c1 * step * real(inner(M, get_iterate(ams), η, get_gradient(ams))) + retract!(M, a.candidate_point, p, η, step, a.retraction_method) + fNew = get_cost(mp, a.candidate_point) + if fNew <= f0 + a.c1 * step * l s_minus = step else s_plus = step end - abs(s_plus - s_minus) <= a.linesearch_stopsize && break - retract!(M, p_new, get_iterate(ams), η, s_minus, a.retraction_method) + abs(s_plus - s_minus) <= a.stop_when_stepsize_less && break + retract!(M, a.candidate_point, p, η, s_minus, a.retraction_method) vector_transport_to!( - M, η_xNew, get_iterate(ams), η, p_new, a.vector_transport_method + M, a.candidate_direction, p, η, a.candidate_point, a.vector_transport_method ) + get_gradient!(mp, a.candidate_tangent, a.candidate_point) end step = s_minus a.last_stepsize = step diff --git a/test/plans/test_stepsize.jl b/test/plans/test_stepsize.jl index 56437a4951..c4209881f2 100644 --- a/test/plans/test_stepsize.jl +++ b/test/plans/test_stepsize.jl @@ -36,7 +36,15 @@ using Manopt, Manifolds, Test ) @test startswith(s1[2], "Max decrease") s2 = Manopt.linesearch_backtrack( - M, f, p, grad_f(M, p), 1.0, 1.0, 0.5, grad_f(M, p); retraction_method=ExponentialRetraction(), + M, + f, + p, + grad_f(M, p), + 1.0, + 1.0, + 0.5, + grad_f(M, p); + retraction_method=ExponentialRetraction(), ) @test startswith(s2[2], "The search direction") s3 = Manopt.linesearch_backtrack( From 55c5456f4d196fac0682e7bd9c527c56fdd7d6df Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 16 Dec 2023 11:31:32 +0100 Subject: [PATCH 03/18] documentation formatting. --- src/plans/quasi_newton_plan.jl | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index 2676dc88ee..7a6d4e42a0 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -381,24 +381,32 @@ end @doc raw""" QuasiNewtonLimitedMemoryDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate -This [`AbstractQuasiNewtonDirectionUpdate`](@ref) represents the limited-memory Riemannian BFGS update, where the approximating operator is represented by ``m`` stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` in the ``k``-th iteration. -For the calculation of the search direction ``η_k``, the generalisation of the two-loop recursion is used (see [Huang, Gallican, Absil, SIAM J. Optim., 2015](@cite HuangGallivanAbsil:2015)), since it only requires inner products and linear combinations of tangent vectors in ``T_{x_k} \mathcal{M}``. For that the stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, the gradient ``\operatorname{grad}f(x_k)`` of the objective function ``f`` in ``x_k`` and the positive definite self-adjoint operator +This [`AbstractQuasiNewtonDirectionUpdate`](@ref) represents the limited-memory Riemannian BFGS update, +where the approximating operator is represented by ``m`` stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` +in the ``k``-th iteration. +For the calculation of the search direction ``η_k``, the generalisation of the two-loop recursion +is used (see [Huang, Gallican, Absil, SIAM J. Optim., 2015](@cite HuangGallivanAbsil:2015)), +since it only requires inner products and linear combinations of tangent vectors in ``T_{x_k} \mathcal{M}``. +For that the stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, +the gradient ``\operatorname{grad}f(x_k)`` of the objective function ``f`` in ``x_k`` +and the positive definite self-adjoint operator ```math \mathcal{B}^{(0)}_k[⋅] = \frac{g_{x_k}(s_{k-1}, y_{k-1})}{g_{x_k}(y_{k-1}, y_{k-1})} \; \mathrm{id}_{T_{x_k} \mathcal{M}}[⋅] ``` -are used. The two-loop recursion can be understood as that the [`InverseBFGS`](@ref) update is executed ``m`` times in a row on ``\mathcal{B}^{(0)}_k[⋅]`` using the tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, and in the same time the resulting operator ``\mathcal{B}^{LRBFGS}_k [⋅]`` is directly applied on ``\operatorname{grad}f(x_k)``. +are used. The two-loop recursion can be understood as that the [`InverseBFGS`](@ref) update +is executed ``m`` times in a row on ``\mathcal{B}^{(0)}_k[⋅]`` using the tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, and in the same time the resulting operator ``\mathcal{B}^{LRBFGS}_k [⋅]`` is directly applied on ``\operatorname{grad}f(x_k)``. When updating there are two cases: if there is still free memory, i.e. ``k < m``, the previously stored vector pairs ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` have to be transported into the upcoming tangent space ``T_{x_{k+1}} \mathcal{M}``; if there is no free memory, the oldest pair ``\{ \widetilde{s}_{k−m}, \widetilde{y}_{k−m}\}`` has to be discarded and then all the remaining vector pairs ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m+1}^{k-1}`` are transported into the tangent space ``T_{x_{k+1}} \mathcal{M}``. After that we calculate and store ``s_k = \widetilde{s}_k = T^{S}_{x_k, α_k η_k}(α_k η_k)`` and ``y_k = \widetilde{y}_k``. This process ensures that new information about the objective function is always included and the old, probably no longer relevant, information is discarded. # Fields -* `memory_s` – the set of the stored (and transported) search directions times step size ``\{ \widetilde{s}_i\}_{i=k-m}^{k-1}``. -* `memory_y` – set of the stored gradient differences ``\{ \widetilde{y}_i\}_{i=k-m}^{k-1}``. -* `ξ` – a variable used in the two-loop recursion. -* `ρ` – a variable used in the two-loop recursion. -* `scale` – -* `vector_transport_method` – a `AbstractVectorTransportMethod` -* `message` – a string containing a potential warning that might have appeared +* `memory_s` the set of the stored (and transported) search directions times step size ``\{ \widetilde{s}_i\}_{i=k-m}^{k-1}``. +* `memory_y` set of the stored gradient differences ``\{ \widetilde{y}_i\}_{i=k-m}^{k-1}``. +* `ξ` a variable used in the two-loop recursion. +* `ρ` a variable used in the two-loop recursion. +* `scale` initial scaling of the Hessian +* `vector_transport_method` a `AbstractVectorTransportMethod` +* `message` a string containing a potential warning that might have appeared # Constructor QuasiNewtonLimitedMemoryDirectionUpdate( From 5745c6d97b5f30c589f8971dccbfc58df55ccdb8 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 16 Dec 2023 13:38:11 +0100 Subject: [PATCH 04/18] IN theory resolves #333 points 4 and 7, in practice there seems to be a side effect. --- src/plans/quasi_newton_plan.jl | 20 +++++++++++++++----- src/solvers/quasi_Newton.jl | 19 +++++++++++++------ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index 7a6d4e42a0..836fc9fa8f 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -362,21 +362,27 @@ function QuasiNewtonMatrixDirectionUpdate( basis, m, scale, update, vector_transport_method ) end +function (d::QuasiNewtonMatrixDirectionUpdate)(mp, st) + r = zero_vector(get_manifold(mp), get_iterate(st)) + return d(r, mp, st) +end function (d::QuasiNewtonMatrixDirectionUpdate{T})( - mp, st + r, mp, st ) where {T<:Union{InverseBFGS,InverseDFP,InverseSR1,InverseBroyden}} M = get_manifold(mp) p = get_iterate(st) X = get_gradient(st) - return get_vector(M, p, -d.matrix * get_coordinates(M, p, X, d.basis), d.basis) + get_vector!(M, r, p, -d.matrix * get_coordinates(M, p, X, d.basis), d.basis) + return r end function (d::QuasiNewtonMatrixDirectionUpdate{T})( - mp, st + r, mp, st ) where {T<:Union{BFGS,DFP,SR1,Broyden}} M = get_manifold(mp) p = get_iterate(st) X = get_gradient(st) - return get_vector(M, p, -d.matrix \ get_coordinates(M, p, X, d.basis), d.basis) + get_vector!(M, r, p, -d.matrix \ get_coordinates(M, p, X, d.basis), d.basis) + return r end @doc raw""" QuasiNewtonLimitedMemoryDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate @@ -476,10 +482,14 @@ function status_summary(d::QuasiNewtonLimitedMemoryDirectionUpdate{T}) where {T} return s end function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(mp, st) + r = zero_vector(get_manifold(mp), get_iterate(st)) + return d(r, mp, st) +end +function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st) isempty(d.message) || (d.message = "") # reset message M = get_manifold(mp) p = get_iterate(st) - r = copy(M, p, get_gradient(st)) + copyto!(M, r, p, get_gradient(st)) m = length(d.memory_s) m == 0 && return -r for i in m:-1:1 diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index 6019b2d5de..b28a9b2a62 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -43,6 +43,7 @@ mutable struct QuasiNewtonState{ } <: AbstractGradientSolverState p::P p_old::P + η::T X::T sk::T yk::T @@ -82,6 +83,7 @@ function QuasiNewtonState( return QuasiNewtonState{P,typeof(sk_init),D,SC,typeof(stepsize),RM,VTM}( p, copy(M, p), + copy(M, p, initial_vector), initial_vector, sk_init, copy(M, sk_init), @@ -333,16 +335,21 @@ end function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) M = get_manifold(mp) get_gradient!(mp, qns.X, qns.p) - η = qns.direction_update(mp, qns) - α = qns.stepsize(mp, qns, iter, η) + qns.direction_update(qns.η, mp, qns) + α = qns.stepsize(mp, qns, iter, qns.η) copyto!(M, qns.p_old, get_iterate(qns)) - αη = α * η - retract!(M, qns.p, qns.p, η, α, qns.retraction_method) + retract!(M, qns.p, qns.p, qns.η, α, qns.retraction_method) + qns.η .*= α β = locking_condition_scale( - M, qns.direction_update, qns.p_old, αη, qns.p, qns.vector_transport_method + M, qns.direction_update, qns.p_old, qns.η, qns.p, qns.vector_transport_method ) vector_transport_to!( - M, qns.sk, qns.p_old, αη, qns.p, get_update_vector_transport(qns.direction_update) + M, + qns.sk, + qns.p_old, + qns.η, + qns.p, + get_update_vector_transport(qns.direction_update), ) vector_transport_to!( M, qns.X, qns.p_old, qns.X, qns.p, get_update_vector_transport(qns.direction_update) From 1b30f2fd25097544423fc031b902acc2b9550010 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 16 Dec 2023 13:48:16 +0100 Subject: [PATCH 05/18] =?UTF-8?q?first=20back=20to=20copying=20eta=20?= =?UTF-8?q?=E2=80=93=C2=A0then=20tests=20from=20before=20work=20again.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/solvers/quasi_Newton.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index b28a9b2a62..0497f5ae75 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -335,7 +335,8 @@ end function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) M = get_manifold(mp) get_gradient!(mp, qns.X, qns.p) - qns.direction_update(qns.η, mp, qns) + #qns.direction_update(qns.η, mp, qns) + qns.η = qns.direction_update(mp, qns) α = qns.stepsize(mp, qns, iter, qns.η) copyto!(M, qns.p_old, get_iterate(qns)) retract!(M, qns.p, qns.p, qns.η, α, qns.retraction_method) From ca7b1ef282e14b83dd89b316e62e305c074114cd Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 16 Dec 2023 14:04:28 +0100 Subject: [PATCH 06/18] Start changelog entry. --- Changelog.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Changelog.md b/Changelog.md index 3425f1763d..96d28c1653 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,14 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.45] unreleased + +## Changed + +* `WolfePowellLineSearch`, `ArmijoLineSearch` step sizes now allocate less +* `linesearch_backtrack!` is now available +* Quasi Newton Updates can work inplace of a direction vector as well. + ## [0.4.44] December 12, 2023 Formally one could consider this version breaking, since a few functions From d0a6e39f419f9f4527b6cf28952f3ff8530f0cba Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Sat, 16 Dec 2023 14:35:41 +0100 Subject: [PATCH 07/18] partial fix for Circle --- src/plans/stepsize.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index 734b1e9be2..ecbde826fb 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -256,13 +256,13 @@ mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,F} <: Linesearch stop_decreasing_at_step::Int function ArmijoLinesearch( M::AbstractManifold=DefaultManifold(); - candidate_point::P=rand(M), - contraction_factor=0.95, - initial_stepsize=1.0, + candidate_point::P=allocate_result(M, rand), + contraction_factor::Real=0.95, + initial_stepsize::Real=1.0, initial_guess=armijo_initial_guess, retraction_method::TRM=default_retraction_method(M), - stop_when_stepsize_less=0.0, - stop_when_stepsize_exceeds=max_stepsize(M), + stop_when_stepsize_less::Real=0.0, + stop_when_stepsize_exceeds::Real=max_stepsize(M), stop_increasing_at_step::Int=100, stop_decreasing_at_step::Int=1000, sufficient_decrease=0.1, @@ -796,14 +796,14 @@ mutable struct WolfePowellLinesearch{ M::AbstractManifold=DefaultManifold(), c1::Float64=10^(-4), c2::Float64=0.999; - candidate_point::P=rand(M), - candidate_tangent::T=zero_vector(M, candidate_point), - candidate_direction::T=zero_vector(M, candidate_point), - max_stepsize=max_stepsize(M, candidate_point), + candidate_point::P=allocate_result(M, rand), + candidate_tangent::T=allocate_result(M, zero_vector, candidate_point), + candidate_direction::T=allocate_result(M, zero_vector, candidate_point), + max_stepsize::Real=max_stepsize(M, candidate_point), retraction_method::TRM=default_retraction_method(M), vector_transport_method::VTM=default_vector_transport_method(M), linesearch_stopsize::Float64=0.0, # deprecated remove on next breaking change - stop_when_stepsize_less=linesearch_stopsize, # + stop_when_stepsize_less::Float64=linesearch_stopsize, # ) where {TRM,VTM,P,T} (linesearch_stopsize > 0.0) && Base.depwarn( WolfePowellLinesearch, From bc795aeee736081d11b977af41dad73fdca0a105 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Sat, 16 Dec 2023 14:56:08 +0100 Subject: [PATCH 08/18] rand -> allocate_result --- src/plans/stepsize.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index ecbde826fb..a1189a3aa1 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -215,7 +215,7 @@ last step size. this returns an initial guess. The default uses the last obtained stepsize as well as for internal use -* `candidate_point` (`rand(M)`) +* `candidate_point` (`allocate_result(M, rand)`) Furthermore the following fields act as safeguards @@ -554,7 +554,7 @@ mutable struct NonmonotoneLinesearch{ M::AbstractManifold=DefaultManifold(); bb_min_stepsize::Float64=1e-3, bb_max_stepsize::Float64=1e3, - candidate_point::P=rand(M), + candidate_point::P=allocate_result(M, rand), initial_stepsize::Float64=1.0, memory_size::Int=10, retraction_method::TRM=default_retraction_method(M), @@ -770,9 +770,9 @@ Generate a Wolfe-Powell linesearch ## Keyword Arguments -* `candidate_point` (`rand(M)`) memory for an internims candidate -* `candidate_tangent` (`zero_vector(M; vector_at=candidate_point)`) memory for a gradient -* `candidate_direcntion` (`zero_vector(M; vector_at=candidate_point)`) memory for a direction +* `candidate_point` (`allocate_result(M, rand)`) memory for an internims candidate +* `candidate_tangent` (`allocate_result(M, zero_vector, candidate_point)`) memory for a gradient +* `candidate_direcntion` (`allocate_result(M, zero_vector, candidate_point)`) memory for a direction * `max_stepsize` ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize allowed here. * `retraction_method` – (`ExponentialRetraction()`) the retraction to use * `stop_when_stepsize_less` - (`0.0`) smallest stepsize when to stop (the last one before is taken) @@ -1088,7 +1088,7 @@ as well as the internal fields # Constructor - AdaptiveWNGrad(M=DefaultManifold, grad_f=(M,p) -> zero_vector(M,rand(M)), p=rand(M); kwargs...) + AdaptiveWNGrad(M=DefaultManifold, grad_f=(M, p) -> zero_vector(M, rand(M)), p=rand(M); kwargs...) Where all above fields with defaults are keyword arguments. An additional keyword arguments From 5175979355338fc1ed022ffbe741747fa5ae80c9 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 16 Dec 2023 15:22:14 +0100 Subject: [PATCH 09/18] Fix a positional argument bug. --- src/plans/stepsize.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index ecbde826fb..a12bf3774e 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -287,7 +287,6 @@ function (a::ArmijoLinesearch)( mp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int, - p=a.candidate_point, η=-get_gradient(mp, get_iterate(s)); kwargs..., ) From 070b2b4c32f2f9e5f8e2f7f63fc1b7e30fdbb7a5 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Sat, 16 Dec 2023 15:24:20 +0100 Subject: [PATCH 10/18] fix typo --- src/plans/stepsize.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index a1189a3aa1..417f47a0b9 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -372,7 +372,7 @@ end """ (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) -Perform a linesearch bachtrack in-place of `q`. For all details and options, see [`linesearch_backtrack`](@ref) +Perform a linesearch backtrack in-place of `q`. For all details and options, see [`linesearch_backtrack`](@ref) """ function linesearch_backtrack!( M::AbstractManifold, From 36cb062a384ed4be0d6b8feda1464ca1d223c1e3 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Sat, 16 Dec 2023 15:33:49 +0100 Subject: [PATCH 11/18] copy -> allocate --- src/plans/stepsize.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index dbefbd5a34..20d1f428c3 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -364,7 +364,7 @@ A stepsize `s` and a message `msg` (in case any of the 4 criteria hit) function linesearch_backtrack( M::AbstractManifold, f, p, X::T, s, decrease, contract, η::T=-X, f0=f(M, p); kwargs... ) where {T} - q = copy(M, p) + q = allocate(M, p) return linesearch_backtrack!(M, q, f, p, X, s, decrease, contract, η, f0; kwargs...) end From a6015be7a5c55dc42cf21c48a6f32a01d9eceb61 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Sat, 16 Dec 2023 16:27:20 +0100 Subject: [PATCH 12/18] fix qN direction update --- src/plans/quasi_newton_plan.jl | 6 +++++- src/solvers/quasi_Newton.jl | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index 325710144e..e0a1ca275b 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -491,7 +491,10 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st) p = get_iterate(st) copyto!(M, r, p, get_gradient(st)) m = length(d.memory_s) - m == 0 && return -r + if m == 0 + r .*= -1 + return r + end for i in m:-1:1 # what if we divide by zero here? Setting to zero ignores this in the next step # precompute in case inner is expensive @@ -590,6 +593,7 @@ function QuasiNewtonCautiousDirectionUpdate( return QuasiNewtonCautiousDirectionUpdate{U}(update, θ) end (d::QuasiNewtonCautiousDirectionUpdate)(mp, st) = d.update(mp, st) +(d::QuasiNewtonCautiousDirectionUpdate)(r, mp, st) = d.update(r, mp, st) # access the inner vector transport method function get_update_vector_transport(u::AbstractQuasiNewtonDirectionUpdate) diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index 0497f5ae75..b28a9b2a62 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -335,8 +335,7 @@ end function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) M = get_manifold(mp) get_gradient!(mp, qns.X, qns.p) - #qns.direction_update(qns.η, mp, qns) - qns.η = qns.direction_update(mp, qns) + qns.direction_update(qns.η, mp, qns) α = qns.stepsize(mp, qns, iter, qns.η) copyto!(M, qns.p_old, get_iterate(qns)) retract!(M, qns.p, qns.p, qns.η, α, qns.retraction_method) From 4340a51e2d0a36b0af6c93fc7b90436dce802749 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Sat, 16 Dec 2023 17:00:25 +0100 Subject: [PATCH 13/18] save one allocation --- src/plans/stopping_criterion.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plans/stopping_criterion.jl b/src/plans/stopping_criterion.jl index b2046d28df..1c29e54b36 100644 --- a/src/plans/stopping_criterion.jl +++ b/src/plans/stopping_criterion.jl @@ -667,7 +667,7 @@ end function (c::StopWhenAny)(p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) (i == 0) && (c.reason = "") # reset on init if any(subC -> subC(p, s, i), c.criteria) - c.reason = string([get_reason(subC) for subC in c.criteria]...) + c.reason = string((get_reason(subC) for subC in c.criteria)...) return true end return false From 417f95664b19eb6248e6385c1bf77ace589847a1 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sun, 17 Dec 2023 10:56:11 +0100 Subject: [PATCH 14/18] Adapt Documentation. --- src/plans/quasi_newton_plan.jl | 75 ++++++++++++++++++++++++++-------- src/plans/stepsize.jl | 67 +++++++++++++++++------------- src/solvers/quasi_Newton.jl | 69 +++++++++++++++++-------------- 3 files changed, 136 insertions(+), 75 deletions(-) diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index e0a1ca275b..d224ccfe3d 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -287,33 +287,63 @@ InverseBroyden(φ::Float64) = InverseBroyden(φ, :constant) @doc raw""" QuasiNewtonMatrixDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate -These [`AbstractQuasiNewtonDirectionUpdate`](@ref)s represent any quasi-Newton update rule, where the operator is stored as a matrix. A distinction is made between the update of the approximation of the Hessian, ``H_k \mapsto H_{k+1}``, and the update of the approximation of the Hessian inverse, ``B_k \mapsto B_{k+1}``. For the first case, the coordinates of the search direction ``η_k`` with respect to a basis ``\{b_i\}^{n}_{i=1}`` are determined by solving a linear system of equations, i.e. +The `QuasiNewtonMatrixDirectionUpdate` representa a quasi-Newton update rule, +where the operator is stored as a matrix. A distinction is made between the update of the +approximation of the Hessian, ``H_k \mapsto H_{k+1}``, and the update of the approximation +of the Hessian inverse, ``B_k \mapsto B_{k+1}``. +For the first case, the coordinates of the search direction ``η_k`` with respect to +a basis ``\{b_i\}^{n}_{i=1}`` are determined by solving a linear system of equations ```math -\text{Solve} \quad \hat{η_k} = - H_k \widehat{\operatorname{grad}f(x_k)} +\text{Solve} \quad \hat{η_k} = - H_k \widehat{\operatorname{grad}f(x_k)}, ``` -where ``H_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` and ``\widehat{\operatorname{grad}f(x_k)}`` represents the coordinates of the gradient of the objective function ``f`` in ``x_k`` with respect to the basis ``\{b_i\}^{n}_{i=1}``. -If a method is chosen where Hessian inverse is approximated, the coordinates of the search direction ``η_k`` with respect to a basis ``\{b_i\}^{n}_{i=1}`` are obtained simply by matrix-vector multiplication, i.e. +where ``H_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` +and ``\widehat{\operatorname{grad}f(x_k)}`` represents the coordinates of the gradient of +the objective function ``f`` in ``x_k`` with respect to the basis ``\{b_i\}^{n}_{i=1}``. +If a method is chosen where Hessian inverse is approximated, the coordinates of the search +direction ``η_k`` with respect to a basis ``\{b_i\}^{n}_{i=1}`` are obtained simply by +matrix-vector multiplication ```math -\hat{η_k} = - B_k \widehat{\operatorname{grad}f(x_k)} +\hat{η_k} = - B_k \widehat{\operatorname{grad}f(x_k)}, ``` -where ``B_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` and ``\widehat{\operatorname{grad}f(x_k)}`` as above. In the end, the search direction ``η_k`` is generated from the coordinates ``\hat{eta_k}`` and the vectors of the basis ``\{b_i\}^{n}_{i=1}`` in both variants. -The [`AbstractQuasiNewtonUpdateRule`](@ref) indicates which quasi-Newton update rule is used. In all of them, the Euclidean update formula is used to generate the matrix ``H_{k+1}`` and ``B_{k+1}``, and the basis ``\{b_i\}^{n}_{i=1}`` is transported into the upcoming tangent space ``T_{x_{k+1}} \mathcal{M}``, preferably with an isometric vector transport, or generated there. +where ``B_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` +and ``\widehat{\operatorname{grad}f(x_k)}`` as above. In the end, the search direction ``η_k`` +is generated from the coordinates ``\hat{eta_k}`` and the vectors of the basis ``\{b_i\}^{n}_{i=1}`` +in both variants. +The [`AbstractQuasiNewtonUpdateRule`](@ref) indicates which quasi-Newton update rule is used. +In all of them, the Euclidean update formula is used to generate the matrix ``H_{k+1}`` +and ``B_{k+1}``, and the basis ``\{b_i\}^{n}_{i=1}`` is transported into the upcoming tangent +space ``T_{x_{k+1}} \mathcal{M}``, preferably with an isometric vector transport, or generated there. + +# Provided functors + +* `(mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction +* `(η, mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction inplace of `η` # Fields -* `update` – a [`AbstractQuasiNewtonUpdateRule`](@ref). -* `basis` – the basis. -* `matrix` – (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) + +* `basis` an `AbstractBasis` to use in the tangent spaces +* `matrix` (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) the matrix which represents the approximating operator. -* `scale` – (`true) indicates whether the initial matrix (= identity matrix) should be scaled before the first update. -* `vector_transport_method` – (`vector_transport_method`)an `AbstractVectorTransportMethod` +* `scale` (`true) indicates whether the initial matrix (= identity matrix) should be scaled before the first update. +* `update` a [`AbstractQuasiNewtonUpdateRule`](@ref). +* `vector_transport_method` (`vector_transport_method`)an `AbstractVectorTransportMethod` # Constructor - QuasiNewtonMatrixDirectionUpdate(M::AbstractManifold, update, basis, matrix; - scale=true, vector_transport_method=default_vector_transport_method(M)) + QuasiNewtonMatrixDirectionUpdate( + M::AbstractManifold, + update, + basis::B=DefaultOrthonormalBasis(), + m=Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M)); + kwargs... + ) + +## Keyword Arguments + +* `scale`, `vector_transport_method` for the two fields above Generate the Update rule with defaults from a manifold and the names corresponding to the fields above. @@ -388,8 +418,8 @@ end QuasiNewtonLimitedMemoryDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate This [`AbstractQuasiNewtonDirectionUpdate`](@ref) represents the limited-memory Riemannian BFGS update, -where the approximating operator is represented by ``m`` stored pairs of tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` -in the ``k``-th iteration. +where the approximating operator is represented by ``m`` stored pairs of tangent +vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` in the ``k``-th iteration. For the calculation of the search direction ``η_k``, the generalisation of the two-loop recursion is used (see [Huang, Gallican, Absil, SIAM J. Optim., 2015](@cite HuangGallivanAbsil:2015)), since it only requires inner products and linear combinations of tangent vectors in ``T_{x_k} \mathcal{M}``. @@ -405,7 +435,13 @@ are used. The two-loop recursion can be understood as that the [`InverseBFGS`](@ is executed ``m`` times in a row on ``\mathcal{B}^{(0)}_k[⋅]`` using the tangent vectors ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}``, and in the same time the resulting operator ``\mathcal{B}^{LRBFGS}_k [⋅]`` is directly applied on ``\operatorname{grad}f(x_k)``. When updating there are two cases: if there is still free memory, i.e. ``k < m``, the previously stored vector pairs ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m}^{k-1}`` have to be transported into the upcoming tangent space ``T_{x_{k+1}} \mathcal{M}``; if there is no free memory, the oldest pair ``\{ \widetilde{s}_{k−m}, \widetilde{y}_{k−m}\}`` has to be discarded and then all the remaining vector pairs ``\{ \widetilde{s}_i, \widetilde{y}_i\}_{i=k-m+1}^{k-1}`` are transported into the tangent space ``T_{x_{k+1}} \mathcal{M}``. After that we calculate and store ``s_k = \widetilde{s}_k = T^{S}_{x_k, α_k η_k}(α_k η_k)`` and ``y_k = \widetilde{y}_k``. This process ensures that new information about the objective function is always included and the old, probably no longer relevant, information is discarded. +# Provided functors + +* `(mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction +* `(η, mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction inplace of `η` + # Fields + * `memory_s` the set of the stored (and transported) search directions times step size ``\{ \widetilde{s}_i\}_{i=k-m}^{k-1}``. * `memory_y` set of the stored gradient differences ``\{ \widetilde{y}_i\}_{i=k-m}^{k-1}``. * `ξ` a variable used in the two-loop recursion. @@ -423,7 +459,7 @@ When updating there are two cases: if there is still free memory, i.e. ``k < m`` initial_vector=zero_vector(M,x), scale=1.0 project=true - ) + ) # See also @@ -561,6 +597,11 @@ If [`InverseBFGS`](@ref) or [`InverseBFGS`](@ref) is chosen as update, then the method follows the method of [Huang, Absil, Gallivan, SIAM J. Optim., 2018](@cite HuangAbsilGallivan:2018), taking into account that the corresponding step size is chosen. +# Provided functors + +* `(mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction +* `(η, mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction inplace of `η` + # Fields * `update` – an [`AbstractQuasiNewtonDirectionUpdate`](@ref) diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index 20d1f428c3..30dd65e0d7 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -200,29 +200,29 @@ end @doc raw""" ArmijoLinesearch <: Linesearch -A functor representing Armijo line search including the last runs state, i.e. a -last step size. +A functor representing Armijo line search including the last runs state string the last stepsize. # Fields -* `initial_stepsize` (`1.0`) and initial step size -* `retraction_method` (`default_retraction_method(M)`) the retraction to use -* `contraction_factor` (`0.95`) exponent for line search reduction -* `sufficient_decrease` (`0.1`) gain within Armijo's rule -* `last_stepsize` (`initialstepsize`) the last step size we start the search with -* `initial_guess` (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, +* `initial_stepsize` (`1.0`) and initial step size +* `retraction_method` (`default_retraction_method(M)`) the retraction to use +* `contraction_factor` (`0.95`) exponent for line search reduction +* `sufficient_decrease` (`0.1`) gain within Armijo's rule +* `last_stepsize` (`initialstepsize`) the last step size we start the search with +* `initial_guess` (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, [`AbstractManoptSolverState`](@ref) `s` and a current iterate `i` and a last step size `l`, this returns an initial guess. The default uses the last obtained stepsize as well as for internal use -* `candidate_point` (`allocate_result(M, rand)`) + +* `candidate_point` (`allocate_result(M, rand)`) to store an interims result Furthermore the following fields act as safeguards -* `stop_when_stepsize_less` - (`0.0`) smallest stepsize when to stop (the last one before is taken) -* `stop_when_stepsize_exceeds - ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize when to stop. -* `stop_increasing_at_step` - (`100`) last step to increase the stepsize (phase 1), -* `stop_decreasing_at_step` - (`1000`) last step size to decrease the stepsize (phase 2), +* `stop_when_stepsize_less` (`0.0`) smallest stepsize when to stop (the last one before is taken) +* `stop_when_stepsize_exceeds ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize when to stop. +* `stop_increasing_at_step` (`100`) last step to increase the stepsize (phase 1), +* `stop_decreasing_at_step` (`1000`) last step size to decrease the stepsize (phase 2), Pass `:Messages` to a `debug=` to see `@info`s when these happen. @@ -238,8 +238,12 @@ The constructors return the functor to perform Armijo line search, where of a [`AbstractManoptProblem`](@ref) `amp`, [`AbstractManoptSolverState`](@ref) `ams` and a current iterate `i` with keywords - * `p` (`a.candidate_point`) to pass memory for the candidate, which in the end also stores the resulting point, - * `η` (`-get_gradient(mp, get_iterate(s));`) the search direction to use, by default the steepest descent direction. + +## Keyword Arguments + + * `candidate_point` (`allocate_result(M, rand)`) to pass memory for the candidate point + * `η` (`-get_gradient(mp, get_iterate(s));`) the search direction to use, + by default the steepest descent direction. """ mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,F} <: Linesearch candidate_point::P @@ -330,7 +334,7 @@ get_message(a::ArmijoLinesearch) = a.message @doc raw""" (s, msg) = linesearch_backtrack(M, F, p, X, s, decrease, contract η = -X, f0 = f(p)) - (s, msg) = linesearch_backtrack(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) perform a linesearch * on manifold `M` @@ -349,11 +353,11 @@ with the step size `s` as second argument. ## Keywords -* `retraction_method` (`default_retraction_method(M)`) the retraction to use. -* `stop_when_stepsize_less (`0.0`) to avoid numerical underflow +* `retraction_method` (`default_retraction_method(M)`) the retraction to use. +* `stop_when_stepsize_less (`0.0`) to avoid numerical underflow * `stop_when_stepsize_exceeds` ([`max_stepsize`](@ref)`(M, p) / norm(M, p, η)`) to avoid leaving the injectivity radius on a manifold -* `stop_increasing_at_step` (`100`) stop the inicial increase of step size after these many steps -* `stop_decreasing_at_step` (`1000`) stop the decreasing search after these many steps +* `stop_increasing_at_step` (`100`) stop the inicial increase of step size after these many steps +* `stop_decreasing_at_step` (`1000`) stop the decreasing search after these many steps These keywords are used as safeguards, where only the max stepsize is a very manifold specific one. @@ -371,7 +375,8 @@ end """ (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) -Perform a linesearch backtrack in-place of `q`. For all details and options, see [`linesearch_backtrack`](@ref) +Perform a linesearch backtrack in-place of `q`. +For all details and options, see [`linesearch_backtrack`](@ref) """ function linesearch_backtrack!( M::AbstractManifold, @@ -504,6 +509,10 @@ and ``γ`` is the sufficient decrease parameter ``∈(0,1)``. We can then find t * `sufficient_decrease` – (`1e-4`) sufficient decrease parameter contained in the interval (0,1) * `vector_transport_method` – (`ParallelTransport()`) the vector transport method to use +as well as for internal use + +* `candidate_point` (`allocate_result(M, rand)`) to store an interims result + Furthermore the following fields act as safeguards * `stop_when_stepsize_less - (`0.0`) smallest stepsize when to stop (the last one before is taken) @@ -518,6 +527,8 @@ Pass `:Messages` to a `debug=` to see `@info`s when these happen. NonmonotoneLinesearch() with the Fields above in their order as optional arguments (deprecated). +THis is deprecated, since both defaults above and the memory allocation for the candidate +would be for the default manifold. NonmonotoneLinesearch(M) @@ -769,13 +780,13 @@ Generate a Wolfe-Powell linesearch ## Keyword Arguments -* `candidate_point` (`allocate_result(M, rand)`) memory for an internims candidate -* `candidate_tangent` (`allocate_result(M, zero_vector, candidate_point)`) memory for a gradient -* `candidate_direcntion` (`allocate_result(M, zero_vector, candidate_point)`) memory for a direction -* `max_stepsize` ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize allowed here. -* `retraction_method` – (`ExponentialRetraction()`) the retraction to use -* `stop_when_stepsize_less` - (`0.0`) smallest stepsize when to stop (the last one before is taken) -* `vector_transport_method` – (`ParallelTransport()`) the vector transport method to use +* `candidate_point` (`allocate_result(M, rand)`) memory for an internims candidate +* `candidate_tangent` (`allocate_result(M, zero_vector, candidate_point)`) memory for a gradient +* `candidate_direcntion` (`allocate_result(M, zero_vector, candidate_point)`) memory for a direction +* `max_stepsize` ([`max_stepsize`](@ref)`(M, p)`) – largest stepsize allowed here. +* `retraction_method` (`ExponentialRetraction()`) the retraction to use +* `stop_when_stepsize_less` (`0.0`) smallest stepsize when to stop (the last one before is taken) +* `vector_transport_method` (`ParallelTransport()`) the vector transport method to use """ mutable struct WolfePowellLinesearch{ TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod,P,T diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index b28a9b2a62..d80783b105 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -5,13 +5,20 @@ These Quasi Newton [`AbstractManoptSolverState`](@ref) represent any quasi-Newto used with any update rule for the direction. # Fields -* `p` – the current iterate, a point on a manifold -* `X` – the current gradient -* `sk` – the current step -* `yk` the current gradient difference -* `direction_update` - an [`AbstractQuasiNewtonDirectionUpdate`](@ref) rule. -* `retraction_method` – an `AbstractRetractionMethod` -* `stop` – a [`StoppingCriterion`](@ref) + +* `p` the current iterate, a point on a manifold +* `X` the current gradient +* `sk` the current step +* `yk` the current gradient difference +* `direction_update` an [`AbstractQuasiNewtonDirectionUpdate`](@ref) rule. +* `retraction_method` an `AbstractRetractionMethod` +* `stop` a [`StoppingCriterion`](@ref) + +as well as for internal use + +* `p_old` the last iterate +* `η` the current update direction +* `X_old` the last gradient # Constructor @@ -152,6 +159,7 @@ Perform a quasi Newton iteration for `f` on the manifold `M` starting in the point `p`. The ``k``th iteration consists of + 1. Compute the search direction ``η_k = -\mathcal{B}_k [\operatorname{grad}f (p_k)]`` or solve ``\mathcal{H}_k [η_k] = -\operatorname{grad}f (p_k)]``. 2. Determine a suitable stepsize ``α_k`` along the curve ``\gamma(α) = R_{p_k}(α η_k)`` e.g. by using [`WolfePowellLinesearch`](@ref). 3. Compute `p_{k+1} = R_{p_k}(α_k η_k)``. @@ -159,37 +167,38 @@ The ``k``th iteration consists of 5. Compute the new approximate Hessian ``H_{k+1}`` or its inverse ``B_k``. # Input -* `M` – a manifold ``\mathcal{M}``. -* `f` – a cost function ``F : \mathcal{M} →ℝ`` to minimize. -* `grad_f`– the gradient ``\operatorname{grad}F : \mathcal{M} →T_x\mathcal M`` of ``F``. -* `p` – an initial value ``p ∈ \mathcal{M}``. + +* `M` a manifold ``\mathcal{M}``. +* `f` a cost function ``F : \mathcal{M} →ℝ`` to minimize. +* `grad_f` the gradient ``\operatorname{grad}F : \mathcal{M} →T_x\mathcal M`` of ``F``. +* `p` an initial value ``p ∈ \mathcal{M}``. # Optional -* `basis` – (`DefaultOrthonormalBasis()`) basis within the tangent space(s) to represent the - Hessian (inverse). -* `cautious_update` – (`false`) – whether or not to use + +* `basis` (`DefaultOrthonormalBasis()`) basis within the tangent space(s) + to represent the Hessian (inverse). +* `cautious_update` (`false`) – whether or not to use a [`QuasiNewtonCautiousDirectionUpdate`](@ref) -* `cautious_function` – (`(x) -> x*10^(-4)`) – a monotone increasing function that is zero +* `cautious_function` (`(x) -> x*10^(-4)`) – a monotone increasing function that is zero at 0 and strictly increasing at 0 for the cautious update. -* `direction_update` – ([`InverseBFGS`](@ref)`()`) the update rule to use. -* `evaluation` – ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by +* `direction_update` ([`InverseBFGS`](@ref)`()`) the update rule to use. +* `evaluation` ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `gradF(M, x)` or [`InplaceEvaluation`](@ref) in place, i.e. is of the form `gradF!(M, X, x)`. -* `initial_operator` – (`Matrix{Float64}(I,n,n)`) initial matrix to use die the +* `initial_operator` (`Matrix{Float64}(I,n,n)`) initial matrix to use die the approximation, where `n=manifold_dimension(M)`, see also `scale_initial_operator`. -* `memory_size` – (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative +* `memory_size` (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative value to use a full memory representation -* `retraction_method` – (`default_retraction_method(M, typeof(p))`) a retraction method to use, by default - the exponential map. -* `scale_initial_operator` - (`true`) scale initial operator with +* `retraction_method` (`default_retraction_method(M, typeof(p))`) a retraction method to use +* `scale_initial_operator` (`true`) scale initial operator with ``\frac{⟨s_k,y_k⟩_{p_k}}{\lVert y_k\rVert_{p_k}}`` in the computation -* `stabilize` – (`true`) stabilize the method numerically by projecting computed (Newton-) +* `stabilize` (`true`) stabilize the method numerically by projecting computed (Newton-) directions to the tangent space to reduce numerical errors -* `stepsize` – ([`WolfePowellLinesearch`](@ref)`(retraction_method, vector_transport_method)`) +* `stepsize` ([`WolfePowellLinesearch`](@ref)`(retraction_method, vector_transport_method)`) specify a [`Stepsize`](@ref). -* `stopping_criterion` - ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) +* `stopping_criterion` ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) specify a [`StoppingCriterion`](@ref) -* `vector_transport_method` – (`default_vector_transport_method(M, typeof(p))`) a vector transport to use. +* `vector_transport_method` (`default_vector_transport_method(M, typeof(p))`) a vector transport to use. # Output @@ -235,10 +244,10 @@ Perform a quasi Newton iteration for `F` on the manifold `M` starting in the point `x` using a retraction ``R`` and a vector transport ``T``. # Input -* `M` – a manifold ``\mathcal{M}``. -* `F` – a cost function ``F: \mathcal{M} →ℝ`` to minimize. -* `gradF`– the gradient ``\operatorname{grad}F : \mathcal{M} → T_x\mathcal M`` of ``F`` implemented as `gradF(M,p)`. -* `x` – an initial value ``x ∈ \mathcal{M}``. +* `M` a manifold ``\mathcal{M}``. +* `F` a cost function ``F: \mathcal{M} →ℝ`` to minimize. +* `gradF` the gradient ``\operatorname{grad}F : \mathcal{M} → T_x\mathcal M`` of ``F`` implemented as `gradF(M,p)`. +* `x` an initial value ``x ∈ \mathcal{M}``. For all optional parameters, see [`quasi_Newton`](@ref). """ From a92e8a73ec04f2194e26e7bb5e509239a65ddcd1 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 18 Dec 2023 09:55:19 +0100 Subject: [PATCH 15/18] Work on Test Coverage. --- test/solvers/test_quasi_Newton.jl | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/test/solvers/test_quasi_Newton.jl b/test/solvers/test_quasi_Newton.jl index a949a6fd72..97f0e96a22 100644 --- a/test/solvers/test_quasi_Newton.jl +++ b/test/solvers/test_quasi_Newton.jl @@ -37,10 +37,16 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal stopping_criterion=StopWhenGradientNormLess(10^(-6)), return_state=true, ) + # Check newton update direction works also allocating + dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f)) + p_star = get_solver_result(lrbfgs_s) + D = zero_vector(M, p_star) + lrbfgs_s.direction_update(D, dmp, lrbfgs_s) + @test isapprox(M, p_star, D, lrbfgs_s.direction_update(dmp, lrbfgs_s)) + @test startswith( repr(lrbfgs_s), "# Solver state for `Manopt.jl`s Quasi Newton Method\n" ) - dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f)) @test get_last_stepsize(dmp, lrbfgs_s, lrbfgs_s.stepsize) > 0 @test Manopt.get_iterate(lrbfgs_s) == x_lrbfgs set_gradient!(lrbfgs_s, M, p, grad_f(M, p)) @@ -68,14 +74,21 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal ) @test isapprox(M, x_lrbfgs_cached_2, x_lrbfgs; atol=1e-5) - x_clrbfgs = quasi_Newton( + clrbfgs_s = quasi_Newton( M, f, grad_f, p; cautious_update=true, stopping_criterion=StopWhenGradientNormLess(10^(-6)), + return_state=true, ) + # Test direction passthrough + x_clrbfgs = get_solver_result(clrbfgs_s) + D = zero_vector(M, x_clrbfgs) + clrbfgs_s.direction_update(D, dmp, clrbfgs_s) + @test isapprox(M, x_clrbfgs, D, clrbfgs_s.direction_update(dmp, clrbfgs_s)) + @test norm(x_clrbfgs - x_solution) ≈ 0 atol = 10.0^(-14) x_rbfgs_Huang = quasi_Newton( @@ -95,7 +108,7 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal for T in [InverseBFGS(), BFGS(), InverseDFP(), DFP(), InverseSR1(), SR1()] for c in [true, false] - x_direction = quasi_Newton( + x_state = quasi_Newton( M, f, grad_f, @@ -104,7 +117,12 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal cautious_update=c, memory_size=-1, stopping_criterion=StopWhenGradientNormLess(10^(-12)), + return_state=true, ) + x_direction = get_solver_result(x_state) + D = zero_vector(M, x_direction) + x_state.direction_update(D, dmp, x_state) + @test isapprox(M, x_direction, D, x_state.direction_update(dmp, x_state)) @test norm(x_direction - x_solution) ≈ 0 atol = 10.0^(-14) end end From 42d22b4f9ef23cf5138fe978e8868a13b050ecd9 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 25 Dec 2023 19:23:23 +0100 Subject: [PATCH 16/18] Fix deps. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index eda73b98e0..8d0738e3a1 100644 --- a/Project.toml +++ b/Project.toml @@ -45,7 +45,7 @@ JuMP = "1.15" LinearAlgebra = "1.6" LRUCache = "1.4" ManifoldDiff = "0.3.8" -Manifolds = "0.9" +Manifolds = "0.9.9" ManifoldsBase = "0.15" ManoptExamples = "0.1.4" Markdown = "1.6" From c683eba2fea6ca16d1b93fd865d9d5e00f597a4e Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Wed, 27 Dec 2023 15:34:21 +0100 Subject: [PATCH 17/18] Bump dependencies. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8d0738e3a1..f0074007f9 100644 --- a/Project.toml +++ b/Project.toml @@ -45,7 +45,7 @@ JuMP = "1.15" LinearAlgebra = "1.6" LRUCache = "1.4" ManifoldDiff = "0.3.8" -Manifolds = "0.9.9" +Manifolds = "0.9.10" ManifoldsBase = "0.15" ManoptExamples = "0.1.4" Markdown = "1.6" From 525d4f723643f84bb888d434adff859b41cf4fd4 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 28 Dec 2023 06:34:08 +0100 Subject: [PATCH 18/18] bump deps. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f0074007f9..857ceae1c2 100644 --- a/Project.toml +++ b/Project.toml @@ -45,7 +45,7 @@ JuMP = "1.15" LinearAlgebra = "1.6" LRUCache = "1.4" ManifoldDiff = "0.3.8" -Manifolds = "0.9.10" +Manifolds = "0.9.11" ManifoldsBase = "0.15" ManoptExamples = "0.1.4" Markdown = "1.6"