diff --git a/Changelog.md b/Changelog.md index cde24b251f..464bb6ef39 100644 --- a/Changelog.md +++ b/Changelog.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.4.45] unreleased +### Added + +* Introduce `sub_kwargs` and `sub_stopping_criterion` for `trust_regions` as noticed in [#336](https://github.com/JuliaManifolds/Manopt.jl/discussions/336) + ### Changed * Faster `safe_indices` in L-BFGS. diff --git a/src/solvers/FrankWolfe.jl b/src/solvers/FrankWolfe.jl index 45cb721797..c41f07854b 100644 --- a/src/solvers/FrankWolfe.jl +++ b/src/solvers/FrankWolfe.jl @@ -180,7 +180,7 @@ the [`AbstractManifoldGradientObjective`](@ref) `gradient_objective` directly. For points 2 and 3 the `sub_state` has to be set to the corresponding [`AbstractEvaluationType`](@ref), [`AllocatingEvaluation`](@ref) and [`InplaceEvaluation`](@ref), respectively * `sub_state` - (`evaluation` if `sub_problem` is a function, a decorated [`GradientDescentState`](@ref) otherwise) for a function, the evaluation is inherited from the Frank-Wolfe `evaluation` keyword. -* `sub_kwargs` - (`[]`) – keyword arguments to decorate the `sub_state` default state in case the sub_problem is not a function +* `sub_kwargs` - (`(;)`) – keyword arguments to decorate the `sub_state` default state in case the sub_problem is not a function All other keyword arguments are passed to [`decorate_state!`](@ref) for decorators or [`decorate_objective!`](@ref), respectively. @@ -259,7 +259,7 @@ function Frank_Wolfe_method!( StopWhenChangeLess(1.0e-8), sub_cost=FrankWolfeCost(p, initial_vector), sub_grad=FrankWolfeGradient(p, initial_vector), - sub_kwargs=[], + sub_kwargs=(;), sub_objective=ManifoldGradientObjective(sub_cost, sub_grad), sub_problem=DefaultManoptProblem( M, @@ -278,6 +278,7 @@ function Frank_Wolfe_method!( stepsize=default_stepsize( M, GradientDescentState; retraction_method=retraction_method ), + sub_kwargs..., ); objective_type=objective_type, sub_kwargs..., diff --git a/src/solvers/adaptive_regularization_with_cubics.jl b/src/solvers/adaptive_regularization_with_cubics.jl index 2a7f75f1ec..ea25c750ac 100644 --- a/src/solvers/adaptive_regularization_with_cubics.jl +++ b/src/solvers/adaptive_regularization_with_cubics.jl @@ -391,15 +391,19 @@ function adaptive_regularization_with_cubics!( γ1::R=0.1, γ2::R=2.0, θ::R=0.5, - sub_kwargs=[], + sub_kwargs=(;), sub_stopping_criterion::StoppingCriterion=StopAfterIteration(maxIterLanczos) | StopWhenFirstOrderProgress(θ), - sub_state::Union{<:AbstractManoptSolverState,<:AbstractEvaluationType}=LanczosState( - TangentSpace(M, copy(M, p)); - maxIterLanczos=maxIterLanczos, - σ=σ, - θ=θ, - stopping_criterion=sub_stopping_criterion, + sub_state::Union{<:AbstractManoptSolverState,<:AbstractEvaluationType}=decorate_state!( + LanczosState( + TangentSpace(M, copy(M, p)); + maxIterLanczos=maxIterLanczos, + σ=σ, + θ=θ, + stopping_criterion=sub_stopping_criterion, + sub_kwargs..., + ); + sub_kwargs, ), sub_objective=nothing, sub_problem=nothing, @@ -414,7 +418,9 @@ function adaptive_regularization_with_cubics!( ) where {T,R,O<:Union{ManifoldHessianObjective,AbstractDecoratedManifoldObjective}} dmho = decorate_objective!(M, mho; objective_type=objective_type, kwargs...) if isnothing(sub_objective) - sub_objective = AdaptiveRagularizationWithCubicsModelObjective(dmho, σ) + sub_objective = decorate_objective!( + M, AdaptiveRagularizationWithCubicsModelObjective(dmho, σ); sub_kwargs... + ) end if isnothing(sub_problem) sub_problem = DefaultManoptProblem(TangentSpace(M, copy(M, p)), sub_objective) diff --git a/src/solvers/augmented_Lagrangian_method.jl b/src/solvers/augmented_Lagrangian_method.jl index 5a6bcda465..d96da13990 100644 --- a/src/solvers/augmented_Lagrangian_method.jl +++ b/src/solvers/augmented_Lagrangian_method.jl @@ -84,6 +84,7 @@ mutable struct AugmentedLagrangianMethodState{ stopping_criterion::SC=StopAfterIteration(300) | ( StopWhenSmallerOrEqual(:ϵ, ϵ_min) & StopWhenChangeLess(1e-10) ), + kwargs..., ) where { P, Pr<:AbstractManoptProblem, @@ -330,7 +331,7 @@ function augmented_Lagrangian_method!( objective_type=:Riemannian, sub_cost=AugmentedLagrangianCost(cmo, ρ, μ, λ), sub_grad=AugmentedLagrangianGrad(cmo, ρ, μ, λ), - sub_kwargs=[], + sub_kwargs=(;), sub_stopping_criterion=StopAfterIteration(300) | StopWhenGradientNormLess(ϵ) | StopWhenStepsizeLess(1e-8), @@ -344,6 +345,7 @@ function augmented_Lagrangian_method!( ), stopping_criterion=sub_stopping_criterion, stepsize=default_stepsize(M, QuasiNewtonState), + sub_kwargs..., ); sub_kwargs..., ), diff --git a/src/solvers/difference-of-convex-proximal-point.jl b/src/solvers/difference-of-convex-proximal-point.jl index 4f82789ccb..3f8fa6a445 100644 --- a/src/solvers/difference-of-convex-proximal-point.jl +++ b/src/solvers/difference-of-convex-proximal-point.jl @@ -191,7 +191,7 @@ difference_of_convex_proximal_point(M, grad_h, p0; g=g, grad_g=grad_g) This is generated by default when `grad_g` is provided. You can specify your own by overwriting this keyword. * `sub_hess` – (a finite difference approximation by default) specify a Hessian of the subproblem, which the default solver, see `sub_state` needs -* `sub_kwargs` – (`[]`) pass keyword arguments to the `sub_state`, in form of +* `sub_kwargs` – (`(;)`) pass keyword arguments to the `sub_state`, in form of a `Dict(:kwname=>value)`, unless you set the `sub_state` directly. * `sub_objective` – (a gradient or hessian objective based on the last 3 keywords) provide the objective used within `sub_problem` (if that is not specified by the user) @@ -331,7 +331,7 @@ function difference_of_convex_proximal_point!( ProximalDCGrad(grad_g, copy(M, p), λ(1); evaluation=evaluation) end, sub_hess=ApproxHessianFiniteDifference(M, copy(M, p), sub_grad; evaluation=evaluation), - sub_kwargs=[], + sub_kwargs=(;), sub_stopping_criterion=StopAfterIteration(300) | StopWhenGradientNormLess(1e-8), sub_objective=if isnothing(sub_cost) || isnothing(sub_grad) nothing @@ -368,7 +368,7 @@ function difference_of_convex_proximal_point!( decorate_state!( if isnothing(sub_hess) GradientDescentState( - M, copy(M, p); stopping_criterion=sub_stopping_criterion + M, copy(M, p); stopping_criterion=sub_stopping_criterion, sub_kwargs... ) else TrustRegionsState( @@ -378,7 +378,7 @@ function difference_of_convex_proximal_point!( TangentSpace(M, copy(M, p)), TrustRegionModelObjective(sub_objective), ), - TruncatedConjugateGradientState(TangentSpace(M, p)), + TruncatedConjugateGradientState(TangentSpace(M, p); sub_kwargs...), ) end; sub_kwargs..., diff --git a/src/solvers/difference_of_convex_algorithm.jl b/src/solvers/difference_of_convex_algorithm.jl index fbc01be1de..1362ef9b35 100644 --- a/src/solvers/difference_of_convex_algorithm.jl +++ b/src/solvers/difference_of_convex_algorithm.jl @@ -166,7 +166,7 @@ difference_of_convex_algorithm(M, f, g, grad_h, p; grad_g=grad_g) This is generated by default when `grad_g` is provided. You can specify your own by overwriting this keyword. * `sub_hess` – (a finite difference approximation by default) specify a Hessian of the subproblem, which the default solver, see `sub_state` needs -* `sub_kwargs` - (`[]`) pass keyword arguments to the `sub_state`, in form of +* `sub_kwargs` - (`(;)`) pass keyword arguments to the `sub_state`, in form of a `Dict(:kwname=>value)`, unless you set the `sub_state` directly. * `sub_objective` - (a gradient or hessian objective based on the last 3 keywords) provide the objective used within `sub_problem` (if that is not specified by the user) @@ -301,7 +301,7 @@ function difference_of_convex_algorithm!( ) end, sub_hess=ApproxHessianFiniteDifference(M, copy(M, p), sub_grad; evaluation=evaluation), - sub_kwargs=[], + sub_kwargs=(;), sub_stopping_criterion=StopAfterIteration(300) | StopWhenGradientNormLess(1e-8), sub_objective=if isnothing(sub_cost) || isnothing(sub_grad) nothing @@ -333,11 +333,15 @@ function difference_of_convex_algorithm!( decorate_state!( if isnothing(sub_hess) GradientDescentState( - M, copy(M, p); stopping_criterion=sub_stopping_criterion + M, copy(M, p); stopping_criterion=sub_stopping_criterion, sub_kwargs... ) else TrustRegionsState( - M, copy(M, p), sub_objective; stopping_criterion=sub_stopping_criterion + M, + copy(M, p), + sub_objective; + stopping_criterion=sub_stopping_criterion, + sub_kwargs..., ) end; sub_kwargs..., diff --git a/src/solvers/exact_penalty_method.jl b/src/solvers/exact_penalty_method.jl index ce0062f7e2..000a96a8b2 100644 --- a/src/solvers/exact_penalty_method.jl +++ b/src/solvers/exact_penalty_method.jl @@ -301,7 +301,7 @@ function exact_penalty_method!( smoothing=LogarithmicSumOfExponentials(), sub_cost=ExactPenaltyCost(cmo, ρ, u; smoothing=smoothing), sub_grad=ExactPenaltyGrad(cmo, ρ, u; smoothing=smoothing), - sub_kwargs=[], + sub_kwargs=(;), sub_problem::AbstractManoptProblem=DefaultManoptProblem( M, decorate_objective!( @@ -324,6 +324,7 @@ function exact_penalty_method!( ), stopping_criterion=sub_stopping_criterion, stepsize=default_stepsize(M, QuasiNewtonState), + sub_kwargs..., ); sub_kwargs..., ), diff --git a/src/solvers/truncated_conjugate_gradient_descent.jl b/src/solvers/truncated_conjugate_gradient_descent.jl index e00cff29b5..c0e769f521 100644 --- a/src/solvers/truncated_conjugate_gradient_descent.jl +++ b/src/solvers/truncated_conjugate_gradient_descent.jl @@ -71,6 +71,7 @@ mutable struct TruncatedConjugateGradientState{T,R<:Real,SC<:StoppingCriterion,P StopWhenTrustRegionIsExceeded() | StopWhenCurvatureIsNegative() | StopWhenModelIncreased(), + kwargs..., ) where {T,R<:Real,F} tcgs = new{T,R,typeof(stopping_criterion),F}() tcgs.stop = stopping_criterion diff --git a/src/solvers/trust_regions.jl b/src/solvers/trust_regions.jl index fd1a4ff665..b6daf2006b 100644 --- a/src/solvers/trust_regions.jl +++ b/src/solvers/trust_regions.jl @@ -277,37 +277,42 @@ by default the [`truncated_conjugate_gradient_descent`](@ref) is used. * `Hess_f` – (optional), the hessian ``\operatorname{Hess}F(x): T_x\mathcal M → T_x\mathcal M``, ``X ↦ \operatorname{Hess}F(x)[X] = ∇_ξ\operatorname{grad}f(x)`` * `p` – (`rand(M)`) an initial value ``x ∈ \mathcal M`` -# Optional +# Keyword Arguments + +* `acceptance_rate` – Accept/reject threshold: if ρ (the performance ratio for the + iterate) is at least the acceptance rate ρ', the candidate is accepted. + This value should be between ``0`` and ``\frac{1}{4}`` + (formerly this was called `ρ_prime, which will be removed on the next breaking change) +* `augmentation_threshold` – (`0.75`) trust-region augmentation threshold: if ρ is above this threshold, + we have a solution on the trust region boundary and negative curvature, we extend (augment) the radius +* `augmentation_factor` – (`2.0`) trust-region augmentation factor * `evaluation` – ([`AllocatingEvaluation`](@ref)) specify whether the gradient and hessian work by allocation (default) or [`InplaceEvaluation`](@ref) in place +* `κ` – (`0.1`) the linear convergence target rate of the tCG method + [`truncated_conjugate_gradient_descent`](@ref), and is used in a stopping crierion therein * `max_trust_region_radius` – the maximum trust-region radius * `preconditioner` – a preconditioner (a symmetric, positive definite operator that should approximate the inverse of the Hessian) -* `randomize` – set to true if the trust-region solve is to be initiated with a - random tangent vector and no preconditioner will be used. * `project!` – (`copyto!`) specify a projection operation for tangent vectors within the subsolver for numerical stability. this means we require a function `(M, Y, p, X) -> ...` working in place of `Y`. +* `randomize` – set to true if the trust-region solve is to be initiated with a + random tangent vector and no preconditioner will be used. +* `ρ_regularization` – (`1e3`) regularize the performance evaluation ``ρ`` + to avoid numerical inaccuracies. +* `reduction_factor` – (`0.25`) trust-region reduction factor +* `reduction_threshold` – (`0.1`) trust-region reduction threshold: if ρ is below this threshold, + the trust region radius is reduced by `reduction_factor`. * `retraction` – (`default_retraction_method(M, typeof(p))`) a retraction to use * `stopping_criterion` – ([`StopAfterIteration`](@ref)`(1000) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. -* `trust_region_radius` – the initial trust-region radius -* `acceptance_rate` – Accept/reject threshold: if ρ (the performance ratio for the - iterate) is at least the acceptance rate ρ', the candidate is accepted. - This value should be between ``0`` and ``\frac{1}{4}`` - (formerly this was called `ρ_prime, which will be removed on the next breaking change) -* `ρ_regularization` – (`1e3`) regularize the performance evaluation ``ρ`` - to avoid numerical inaccuracies. +* `sub_kwargs` – keyword arguments passed to the sub state and used to decorate the sub options, e.g. with debug. +* `sub_stopping_criterion` – a stopping criterion for the sub solver, uses the same standard as TCG. +* `sub_problem` – ([`DefaultManoptProblem`](@ref)`(M, `[`ConstrainedManifoldObjective`](@ref)`(subcost, subgrad; evaluation=evaluation))`) problem for the subsolver +* `sub_state` – ([`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. See also `sub_kwargs`. * `θ` – (`1.0`) 1+θ is the superlinear convergence target rate of the tCG-method [`truncated_conjugate_gradient_descent`](@ref), and is used in a stopping crierion therein -* `κ` – (`0.1`) the linear convergence target rate of the tCG method - [`truncated_conjugate_gradient_descent`](@ref), and is used in a stopping crierion therein -* `reduction_threshold` – (`0.1`) trust-region reduction threshold: if ρ is below this threshold, - the trust region radius is reduced by `reduction_factor`. -* `reduction_factor` – (`0.25`) trust-region reduction factor -* `augmentation_threshold` – (`0.75`) trust-region augmentation threshold: if ρ is above this threshold, - we have a solution on the trust region boundary and negative curvature, we extend (augment) the radius -* `augmentation_factor` – (`2.0`) trust-region augmentation factor +* `trust_region_radius` – the initial trust-region radius For the case that no hessian is provided, the Hessian is computed using finite difference, see [`ApproxHessianFiniteDifference`](@ref). @@ -493,16 +498,29 @@ function trust_regions!( reduction_factor::R=0.25, augmentation_threshold::R=0.75, augmentation_factor::R=2.0, - sub_objective=TrustRegionModelObjective(mho), + sub_kwargs=(;), + sub_objective=decorate_objective!(M, TrustRegionModelObjective(mho), sub_kwargs...), sub_problem=DefaultManoptProblem(TangentSpace(M, p), sub_objective), - sub_state::Union{AbstractHessianSolverState,AbstractEvaluationType}=TruncatedConjugateGradientState( - TangentSpace(M, copy(M, p)), - zero_vector(M, p); - θ=θ, - κ=κ, - trust_region_radius, - randomize=randomize, - (project!)=project!, + sub_stopping_criterion::StoppingCriterion=StopAfterIteration(manifold_dimension(M)) | + StopWhenResidualIsReducedByFactorOrPower(; + κ=κ, θ=θ + ) | + StopWhenTrustRegionIsExceeded() | + StopWhenCurvatureIsNegative() | + StopWhenModelIncreased(), + sub_state::AbstractManoptSolverState=decorate_state!( + TruncatedConjugateGradientState( + TangentSpace(M, copy(M, p)), + zero_vector(M, p); + θ=θ, + κ=κ, + trust_region_radius, + randomize=randomize, + (project!)=project!, + sub_kwargs..., + stopping_criterion=sub_stopping_criterion, + ); + sub_kwargs..., ), kwargs..., #collect rest ) where {Proj,O<:Union{ManifoldHessianObjective,AbstractDecoratedManifoldObjective},R}