From 494e1511cb8c61789c9d38bcb919e8b2468bfb62 Mon Sep 17 00:00:00 2001 From: Stefan Kroboth Date: Sun, 12 Jun 2022 17:03:42 +0200 Subject: [PATCH] Cleanup and improve documentation of trust region method, added doc and unit tests Added documentation, doc and unit tests for TrustRegion, CauchyPoint, Dogleg and Steihaug. Whenever possible, users can provide initial gradients, hessians and parameter vectors. Any setter methods were renamed to `with_`. --- argmin/examples/sr1_trustregion.rs | 2 +- argmin/examples/trustregion_nd.rs | 7 +- .../src/solver/quasinewton/sr1_trustregion.rs | 2 +- argmin/src/solver/trustregion/cauchypoint.rs | 91 +++++- argmin/src/solver/trustregion/dogleg.rs | 111 ++++++- argmin/src/solver/trustregion/mod.rs | 88 +++++- argmin/src/solver/trustregion/steihaug.rs | 234 +++++++++++++- .../solver/trustregion/trustregion_method.rs | 293 +++++++++++++++--- 8 files changed, 738 insertions(+), 90 deletions(-) diff --git a/argmin/examples/sr1_trustregion.rs b/argmin/examples/sr1_trustregion.rs index db13cddb4..9cf0d5f12 100644 --- a/argmin/examples/sr1_trustregion.rs +++ b/argmin/examples/sr1_trustregion.rs @@ -57,7 +57,7 @@ fn run() -> Result<(), Error> { // let init_hessian: Array2 = Array2::eye(8); // Set up the subproblem - let subproblem = Steihaug::new().max_iters(20); + let subproblem = Steihaug::new().with_max_iters(20); // let subproblem = CauchyPoint::new(); // let subproblem = Dogleg::new(); diff --git a/argmin/examples/trustregion_nd.rs b/argmin/examples/trustregion_nd.rs index df94528e9..575dff301 100644 --- a/argmin/examples/trustregion_nd.rs +++ b/argmin/examples/trustregion_nd.rs @@ -25,6 +25,7 @@ impl CostFunction for Rosenbrock { Ok(rosenbrock_2d(&p.to_vec(), self.a, self.b)) } } + impl Gradient for Rosenbrock { type Param = Array1; type Gradient = Array1; @@ -59,9 +60,9 @@ fn run() -> Result<(), Error> { let init_param: Array1 = Array1::from(vec![-1.2, 1.0]); // Set up the subproblem - // let subproblem = Steihaug::new().max_iters(2); - // let subproblem = CauchyPoint::new(); - let subproblem = Dogleg::new(); + // let subproblem = Steihaug::new().with_max_iters(2); + let subproblem = CauchyPoint::new(); + // let subproblem = Dogleg::new(); // Set up solver let solver = TrustRegion::new(subproblem); diff --git a/argmin/src/solver/quasinewton/sr1_trustregion.rs b/argmin/src/solver/quasinewton/sr1_trustregion.rs index f26294090..4b226df3b 100644 --- a/argmin/src/solver/quasinewton/sr1_trustregion.rs +++ b/argmin/src/solver/quasinewton/sr1_trustregion.rs @@ -53,7 +53,7 @@ where /// ``` /// # use argmin::solver::quasinewton::SR1TrustRegion; /// # let subproblem = (); - /// let subproblem = argmin::solver::trustregion::Steihaug::new().max_iters(20); + /// let subproblem = argmin::solver::trustregion::Steihaug::new().with_max_iters(20); /// # // The next line defines the type of `subproblem`. This is done here hidden in order to /// # // not litter the docs. When all of this is fed into an Executor, the compiler will /// # // figure out the types. diff --git a/argmin/src/solver/trustregion/cauchypoint.rs b/argmin/src/solver/trustregion/cauchypoint.rs index 13f728814..d508d7f3e 100644 --- a/argmin/src/solver/trustregion/cauchypoint.rs +++ b/argmin/src/solver/trustregion/cauchypoint.rs @@ -5,11 +5,6 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -//! # References: -//! -//! \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. -//! Springer. ISBN 0-387-30303-0. - use crate::core::{ ArgminFloat, Error, Gradient, Hessian, IterState, Problem, Solver, State, TerminationReason, TrustRegionRadius, KV, @@ -19,12 +14,14 @@ use argmin_math::{ArgminMul, ArgminNorm, ArgminWeightedDot}; use serde::{Deserialize, Serialize}; use std::fmt::Debug; +/// # Cauchy point method +/// /// The Cauchy point is the minimum of the quadratic approximation of the cost function within the /// trust region along the direction given by the first derivative. /// -/// # References: +/// ## Reference /// -/// \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. +/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. /// Springer. ISBN 0-387-30303-0. #[derive(Clone, Debug, Copy, PartialEq, PartialOrd, Default)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] @@ -37,7 +34,14 @@ impl CauchyPoint where F: ArgminFloat, { - /// Constructor + /// Construct a new instance of [`CauchyPoint`] + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::CauchyPoint; + /// let cp: CauchyPoint = CauchyPoint::new(); + /// ``` pub fn new() -> Self { CauchyPoint { radius: F::nan() } } @@ -57,18 +61,28 @@ where problem: &mut Problem, mut state: IterState, ) -> Result<(IterState, Option), Error> { - let param = state.take_param().unwrap(); + let param = state.take_param().ok_or_else(argmin_error_closure!( + NotInitialized, + concat!( + "`CauchyPoint` requires an initial parameter vector. ", + "Please provide an initial guess via `Executor`s `configure` method." + ) + ))?; + let grad = state .take_grad() .map(Result::Ok) .unwrap_or_else(|| problem.gradient(¶m))?; + let grad_norm = grad.norm(); + let hessian = state .take_hessian() .map(Result::Ok) .unwrap_or_else(|| problem.hessian(¶m))?; let wdp = grad.weighted_dot(&hessian, &grad); + let tau: F = if wdp <= float!(0.0) { float!(1.0) } else { @@ -76,10 +90,11 @@ where }; let new_param = grad.mul(&(-tau * self.radius / grad_norm)); - Ok((state.param(new_param).grad(grad).hessian(hessian), None)) + Ok((state.param(new_param), None)) } fn terminate(&mut self, state: &IterState) -> TerminationReason { + // Not an iterative algorithm if state.get_iter() >= 1 { TerminationReason::MaxItersReached } else { @@ -92,6 +107,17 @@ impl TrustRegionRadius for CauchyPoint where F: ArgminFloat, { + /// Set current radius. + /// + /// Needed by [`TrustRegion`](`crate::solver::trustregion::TrustRegion`). + /// + /// # Example + /// + /// ``` + /// use argmin::solver::trustregion::{CauchyPoint, TrustRegionRadius}; + /// let mut cp: CauchyPoint = CauchyPoint::new(); + /// cp.set_radius(0.8); + /// ``` fn set_radius(&mut self, radius: F) { self.radius = radius; } @@ -100,7 +126,52 @@ where #[cfg(test)] mod tests { use super::*; + use crate::core::{test_utils::TestProblem, ArgminError}; use crate::test_trait_impl; + use approx::assert_relative_eq; test_trait_impl!(cauchypoint, CauchyPoint); + + #[test] + fn test_new() { + let cp: CauchyPoint = CauchyPoint::new(); + + let CauchyPoint { radius } = cp; + + assert_eq!(radius.to_ne_bytes(), f64::NAN.to_ne_bytes()); + } + + #[test] + fn test_next_iter() { + let param: Vec = vec![-1.0, 1.0]; + + let mut cp: CauchyPoint = CauchyPoint::new(); + cp.set_radius(1.0); + + // Forgot to initialize the parameter vector + let state: IterState, Vec, (), Vec>, f64> = IterState::new(); + let problem = TestProblem::new(); + let res = cp.next_iter(&mut Problem::new(problem), state); + assert_error!( + res, + ArgminError, + concat!( + "Not initialized: \"`CauchyPoint` requires an initial parameter vector. Please ", + "provide an initial guess via `Executor`s `configure` method.\"" + ) + ); + + // All good. + let state: IterState, Vec, (), Vec>, f64> = + IterState::new().param(param); + let problem = TestProblem::new(); + let (mut state_out, kv) = cp.next_iter(&mut Problem::new(problem), state).unwrap(); + + assert!(kv.is_none()); + + let s_param = state_out.take_param().unwrap(); + + assert_relative_eq!(s_param[0], 1.0f64 / 2.0f64.sqrt(), epsilon = f64::EPSILON); + assert_relative_eq!(s_param[1], -1.0f64 / 2.0f64.sqrt(), epsilon = f64::EPSILON); + } } diff --git a/argmin/src/solver/trustregion/dogleg.rs b/argmin/src/solver/trustregion/dogleg.rs index e22f8f24c..cba17bab6 100644 --- a/argmin/src/solver/trustregion/dogleg.rs +++ b/argmin/src/solver/trustregion/dogleg.rs @@ -5,11 +5,6 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -//! # References: -//! -//! \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. -//! Springer. ISBN 0-387-30303-0. - use crate::core::{ ArgminFloat, Error, Gradient, Hessian, IterState, Problem, Solver, State, TerminationReason, TrustRegionRadius, KV, @@ -20,13 +15,15 @@ use argmin_math::{ #[cfg(feature = "serde1")] use serde::{Deserialize, Serialize}; +/// # Dogleg method +/// /// The Dogleg method computes the intersection of the trust region boundary with a path given by /// the unconstraind minimum along the steepest descent direction and the optimum of the quadratic /// approximation of the cost function at the current point. /// -/// # References: +/// ## Reference /// -/// \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. +/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. /// Springer. ISBN 0-387-30303-0. #[derive(Clone, Debug, Copy, PartialEq, PartialOrd, Default)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] @@ -39,7 +36,14 @@ impl Dogleg where F: ArgminFloat, { - /// Constructor + /// Construct a new instance of [`Dogleg`] + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::Dogleg; + /// let dl: Dogleg = Dogleg::new(); + /// ``` pub fn new() -> Self { Dogleg { radius: F::nan() } } @@ -64,15 +68,24 @@ where problem: &mut Problem, mut state: IterState, ) -> Result<(IterState, Option), Error> { - let param = state.take_param().unwrap(); + let param = state.take_param().ok_or_else(argmin_error_closure!( + NotInitialized, + concat!( + "`Dogleg` requires an initial parameter vector. ", + "Please provide an initial guess via `Executor`s `configure` method." + ) + ))?; + let g = state .take_grad() .map(Result::Ok) .unwrap_or_else(|| problem.gradient(¶m))?; + let h = state .take_hessian() .map(Result::Ok) .unwrap_or_else(|| problem.hessian(¶m))?; + let pstar; // pb = -H^-1g @@ -131,6 +144,17 @@ where } impl TrustRegionRadius for Dogleg { + /// Set current radius. + /// + /// Needed by [`TrustRegion`](`crate::solver::trustregion::TrustRegion`). + /// + /// # Example + /// + /// ``` + /// use argmin::solver::trustregion::{Dogleg, TrustRegionRadius}; + /// let mut dl: Dogleg = Dogleg::new(); + /// dl.set_radius(0.8); + /// ``` fn set_radius(&mut self, radius: F) { self.radius = radius; } @@ -139,7 +163,76 @@ impl TrustRegionRadius for Dogleg { #[cfg(test)] mod tests { use super::*; + #[cfg(feature = "_ndarrayl")] + use crate::core::ArgminError; use crate::test_trait_impl; test_trait_impl!(dogleg, Dogleg); + + #[test] + fn test_new() { + let dl: Dogleg = Dogleg::new(); + + let Dogleg { radius } = dl; + + assert_eq!(radius.to_ne_bytes(), f64::NAN.to_ne_bytes()); + } + + #[cfg(feature = "_ndarrayl")] + #[test] + fn test_next_iter() { + use approx::assert_relative_eq; + use ndarray::{Array, Array1, Array2}; + + struct TestProblem {} + + impl Gradient for TestProblem { + type Param = Array1; + type Gradient = Array1; + + fn gradient(&self, _p: &Self::Param) -> Result { + Ok(Array1::from_vec(vec![0.5, 2.0])) + } + } + + impl Hessian for TestProblem { + type Param = Array1; + type Hessian = Array2; + + fn hessian(&self, _p: &Self::Param) -> Result { + Ok(Array::from_shape_vec((2, 2), vec![1f64, 2.0, 3.0, 4.0])?) + } + } + + let param: Array1 = Array1::from_vec(vec![-1.0, 1.0]); + + let mut dl: Dogleg = Dogleg::new(); + dl.set_radius(1.0); + + // Forgot to initialize the parameter vector + let state: IterState, Array1, (), Array2, f64> = IterState::new(); + let problem = TestProblem {}; + let res = dl.next_iter(&mut Problem::new(problem), state); + assert_error!( + res, + ArgminError, + concat!( + "Not initialized: \"`Dogleg` requires an initial parameter vector. Please ", + "provide an initial guess via `Executor`s `configure` method.\"" + ) + ); + + // All good. + let state: IterState, Array1, (), Array2, f64> = + IterState::new().param(param); + let problem = TestProblem {}; + let (mut state_out, kv) = dl.next_iter(&mut Problem::new(problem), state).unwrap(); + + assert!(kv.is_none()); + + let s_param = state_out.take_param().unwrap(); + + assert_relative_eq!(s_param[0], -0.9730617585026131, epsilon = f64::EPSILON); + assert_relative_eq!(s_param[1], 0.2305446033629983, epsilon = f64::EPSILON); + } } diff --git a/argmin/src/solver/trustregion/mod.rs b/argmin/src/solver/trustregion/mod.rs index febc7c60e..9b52c0845 100644 --- a/argmin/src/solver/trustregion/mod.rs +++ b/argmin/src/solver/trustregion/mod.rs @@ -5,26 +5,78 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -//! Trust region methods +//! # Trust region method +//! +//! The trust region method approximates the cost function within a certain region around the +//! current point in parameter space. Depending on the quality of this approximation, the region is +//! either expanded or contracted. +//! +//! For more details see [`TrustRegion`]. +//! +//! ## Reference +//! +//! Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. +//! Springer. ISBN 0-387-30303-0. /// Cauchy Point -pub mod cauchypoint; +mod cauchypoint; /// Dogleg method -pub mod dogleg; +mod dogleg; /// Steihaug method -pub mod steihaug; +mod steihaug; /// Trust region solver -pub mod trustregion_method; +mod trustregion_method; pub use self::cauchypoint::*; pub use self::dogleg::*; pub use self::steihaug::*; pub use self::trustregion_method::*; -/// Defines a common interface to methods which calculate approximate steps for trust region -/// methods. +/// An interface methods which calculate approximate steps for trust region methods must implement. +/// +/// # Example +/// +/// ``` +/// use argmin::solver::trustregion::TrustRegionRadius; +/// +/// struct MySubProblem { +/// radius: F +/// } +/// +/// impl TrustRegionRadius for MySubProblem { +/// fn set_radius(&mut self, radius: F) { +/// self.radius = radius +/// } +/// } +/// ``` pub trait TrustRegionRadius { - /// Set the initial step length + /// Set the initial radius + /// + /// # Example + /// + /// ``` + /// use argmin::solver::trustregion::TrustRegionRadius; + /// # use argmin::core::ArgminFloat; + /// + /// # struct MySubProblem { + /// # radius: F + /// # } + /// # + /// # impl MySubProblem { + /// # pub fn new() -> Self { + /// # MySubProblem { radius: F::from_f64(1.0f64).unwrap() } + /// # } + /// # } + /// # + /// # impl TrustRegionRadius for MySubProblem { + /// # fn set_radius(&mut self, radius: F) { + /// # self.radius = radius + /// # } + /// # } + /// let mut subproblem = MySubProblem::new(); + /// + /// subproblem.set_radius(0.8); + /// ``` fn set_radius(&mut self, radius: F); } @@ -32,3 +84,23 @@ pub trait TrustRegionRadius { pub fn reduction_ratio(fxk: F, fxkpk: F, mk0: F, mkpk: F) -> F { (fxk - fxkpk) / (mk0 - mkpk) } + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_relative_eq; + + #[test] + fn test_reduction_ration() { + let fxk = 10.0f64; + let fxkpk = 6.0; + let mk0 = 12.0; + let mkpk = 10.0; + + assert_relative_eq!( + reduction_ratio(fxk, fxkpk, mk0, mkpk), + 2.0f64, + epsilon = std::f64::EPSILON + ); + } +} diff --git a/argmin/src/solver/trustregion/steihaug.rs b/argmin/src/solver/trustregion/steihaug.rs index a87c1823f..2fa2c1ebf 100644 --- a/argmin/src/solver/trustregion/steihaug.rs +++ b/argmin/src/solver/trustregion/steihaug.rs @@ -5,11 +5,6 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -//! # References: -//! -//! \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. -//! Springer. ISBN 0-387-30303-0. - use crate::core::{ ArgminFloat, Error, IterState, Problem, SerializeAlias, Solver, State, TerminationReason, TrustRegionRadius, KV, @@ -18,12 +13,14 @@ use argmin_math::{ArgminAdd, ArgminDot, ArgminMul, ArgminNorm, ArgminWeightedDot #[cfg(feature = "serde1")] use serde::{Deserialize, Serialize}; +/// # Steihaug method +/// /// The Steihaug method is a conjugate gradients based approach for finding an approximate solution /// to the second order approximation of the cost function within the trust region. /// -/// # References: +/// ## Reference /// -/// \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. +/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. /// Springer. ISBN 0-387-30303-0. #[derive(Clone, Default)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] @@ -51,7 +48,14 @@ where P: ArgminMul + ArgminDot + ArgminAdd, F: ArgminFloat, { - /// Constructor + /// Construct a new instance of [`Steihaug`] + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::Steihaug; + /// let sh: Steihaug, f64> = Steihaug::new(); + /// ``` pub fn new() -> Self { Steihaug { radius: F::nan(), @@ -66,20 +70,47 @@ where } /// Set epsilon - pub fn epsilon(mut self, epsilon: F) -> Result { + /// + /// The algorithm stops when the residual is smaller than `epsilon`. + /// + /// Must be larger than 0 and defaults to 10^-10. + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::Steihaug; + /// # use argmin::core::Error; + /// # fn main() -> Result<(), Error> { + /// let sh: Steihaug, f64> = Steihaug::new().with_epsilon(10e-9)?; + /// # Ok(()) + /// # } + /// ``` + pub fn with_epsilon(mut self, epsilon: F) -> Result { if epsilon <= float!(0.0) { return Err(argmin_error!( InvalidParameter, - "Steihaug: epsilon must be > 0.0." + "`Steihaug`: epsilon must be > 0.0." )); } self.epsilon = epsilon; Ok(self) } - /// set maximum number of iterations + /// Set maximum number of iterations + /// + /// The algorithm stops after `iter` iterations. + /// + /// Defaults to `u64::MAX`. + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::Steihaug; + /// # use argmin::core::Error; + /// let sh: Steihaug, f64> = Steihaug::new().with_max_iters(100); + /// ``` #[must_use] - pub fn max_iters(mut self, iters: u64) -> Self { + pub fn with_max_iters(mut self, iters: u64) -> Self { self.max_iters = iters; self } @@ -164,7 +195,26 @@ where _problem: &mut Problem, state: IterState, ) -> Result<(IterState, Option), Error> { - let r = state.get_grad().unwrap().clone(); + let r = state + .get_grad() + .ok_or_else(argmin_error_closure!( + NotInitialized, + concat!( + "`Steihaug` requires an initial gradient. ", + "Please provide an initial gradient via `Executor`s `configure` method." + ) + ))? + .clone(); + + if state.get_hessian().is_none() { + return Err(argmin_error!( + NotInitialized, + concat!( + "`Steihaug` requires an initial Hessian. ", + "Please provide an initial Hessian via `Executor`s `configure` method." + ) + )); + } self.r_0_norm = r.norm(); self.rtr = r.dot(&r); @@ -182,8 +232,16 @@ where _problem: &mut Problem, mut state: IterState, ) -> Result<(IterState, Option), Error> { - let grad = state.take_grad().unwrap(); - let h = state.take_hessian().unwrap(); + let grad = state.take_grad().ok_or_else(argmin_error_closure!( + PotentialBug, + "`Steihaug`: Gradient in state not set." + ))?; + + let h = state.take_hessian().ok_or_else(argmin_error_closure!( + PotentialBug, + "`Steihaug`: Hessian in state not set." + ))?; + let d = self.d.as_ref().unwrap(); let dhd = d.weighted_dot(&h, d); @@ -246,7 +304,18 @@ where } } -impl TrustRegionRadius for Steihaug { +impl TrustRegionRadius for Steihaug { + /// Set current radius. + /// + /// Needed by [`TrustRegion`](`crate::solver::trustregion::TrustRegion`). + /// + /// # Example + /// + /// ``` + /// use argmin::solver::trustregion::{Steihaug, TrustRegionRadius}; + /// let mut sh: Steihaug, f64> = Steihaug::new(); + /// sh.set_radius(0.8); + /// ``` fn set_radius(&mut self, radius: F) { self.radius = radius; } @@ -256,7 +325,140 @@ impl TrustRegionRadius for Steihau mod tests { use super::*; use crate::core::test_utils::TestProblem; + use crate::core::ArgminError; use crate::test_trait_impl; + use approx::assert_relative_eq; test_trait_impl!(steihaug, Steihaug); + + #[test] + fn test_new() { + let sh: Steihaug, f64> = Steihaug::new(); + + let Steihaug { + radius, + epsilon, + p, + r, + rtr, + r_0_norm, + d, + max_iters, + } = sh; + + assert_eq!(radius.to_ne_bytes(), f64::NAN.to_ne_bytes()); + assert_eq!(epsilon.to_ne_bytes(), 10e-10f64.to_ne_bytes()); + assert!(p.is_none()); + assert!(r.is_none()); + assert_eq!(rtr.to_ne_bytes(), f64::NAN.to_ne_bytes()); + assert_eq!(r_0_norm.to_ne_bytes(), f64::NAN.to_ne_bytes()); + assert!(d.is_none()); + assert_eq!(max_iters, u64::MAX); + } + + #[test] + fn test_with_tolerance() { + for tolerance in [f64::EPSILON, 1e-10, 1e-12, 1e-6, 1.0, 10.0, 100.0] { + let sh: Steihaug, f64> = Steihaug::new().with_epsilon(tolerance).unwrap(); + assert_eq!(sh.epsilon.to_ne_bytes(), tolerance.to_ne_bytes()); + } + + for tolerance in [-f64::EPSILON, 0.0, -1.0] { + let res: Result, f64>, _> = Steihaug::new().with_epsilon(tolerance); + assert_error!( + res, + ArgminError, + "Invalid parameter: \"`Steihaug`: epsilon must be > 0.0.\"" + ); + } + } + + #[test] + fn test_max_iters() { + let sh: Steihaug, f64> = Steihaug::new(); + + let Steihaug { max_iters, .. } = sh; + + assert_eq!(max_iters, u64::MAX); + + for iters in [1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144] { + let sh: Steihaug, f64> = Steihaug::new().with_max_iters(iters); + + let Steihaug { max_iters, .. } = sh; + + assert_eq!(max_iters, iters); + } + } + + #[test] + fn test_init() { + let grad: Vec = vec![1.0, 2.0]; + let hessian: Vec> = vec![vec![4.0, 3.0], vec![2.0, 1.0]]; + + let mut sh: Steihaug, f64> = Steihaug::new(); + sh.set_radius(1.0); + + // Forgot to initialize gradient + let state: IterState, Vec, (), Vec>, f64> = IterState::new(); + let problem = TestProblem::new(); + let res = sh.init(&mut Problem::new(problem), state); + assert_error!( + res, + ArgminError, + concat!( + "Not initialized: \"`Steihaug` requires an initial gradient. Please ", + "provide an initial gradient via `Executor`s `configure` method.\"" + ) + ); + + // Forgot to initialize Hessian + let state: IterState, Vec, (), Vec>, f64> = + IterState::new().grad(grad.clone()); + let problem = TestProblem::new(); + let res = sh.init(&mut Problem::new(problem), state); + assert_error!( + res, + ArgminError, + concat!( + "Not initialized: \"`Steihaug` requires an initial Hessian. Please ", + "provide an initial Hessian via `Executor`s `configure` method.\"" + ) + ); + + // All good. + let state: IterState, Vec, (), Vec>, f64> = + IterState::new().grad(grad.clone()).hessian(hessian); + let problem = TestProblem::new(); + let (mut state_out, kv) = sh.init(&mut Problem::new(problem), state).unwrap(); + + assert!(kv.is_none()); + + let s_param = state_out.take_param().unwrap(); + + assert_relative_eq!(s_param[0], 0.0f64.sqrt(), epsilon = f64::EPSILON); + assert_relative_eq!(s_param[1], 0.0f64.sqrt(), epsilon = f64::EPSILON); + + let Steihaug { + radius, + epsilon, + p, + r, + rtr, + r_0_norm, + d, + max_iters, + } = sh; + + assert_eq!(radius.to_ne_bytes(), 1.0f64.to_ne_bytes()); + assert_eq!(epsilon.to_ne_bytes(), 10e-10f64.to_ne_bytes()); + assert_relative_eq!(p.as_ref().unwrap()[0], 0.0f64, epsilon = f64::EPSILON); + assert_relative_eq!(p.as_ref().unwrap()[1], 0.0f64, epsilon = f64::EPSILON); + assert_relative_eq!(r.as_ref().unwrap()[0], grad[0], epsilon = f64::EPSILON); + assert_relative_eq!(r.as_ref().unwrap()[1], grad[1], epsilon = f64::EPSILON); + assert_eq!(rtr.to_ne_bytes(), 5.0f64.to_ne_bytes()); + assert_eq!(r_0_norm.to_ne_bytes(), (5.0f64).sqrt().to_ne_bytes()); + assert_relative_eq!(d.as_ref().unwrap()[0], -grad[0], epsilon = f64::EPSILON); + assert_relative_eq!(d.as_ref().unwrap()[1], -grad[1], epsilon = f64::EPSILON); + assert_eq!(max_iters, u64::MAX); + } } diff --git a/argmin/src/solver/trustregion/trustregion_method.rs b/argmin/src/solver/trustregion/trustregion_method.rs index 6b00e83fd..92034d841 100644 --- a/argmin/src/solver/trustregion/trustregion_method.rs +++ b/argmin/src/solver/trustregion/trustregion_method.rs @@ -5,11 +5,6 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -//! # References: -//! -//! \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. -//! Springer. ISBN 0-387-30303-0. - use crate::core::{ ArgminFloat, CostFunction, DeserializeOwnedAlias, Error, Executor, Gradient, Hessian, IterState, OptimizationResult, Problem, SerializeAlias, Solver, TerminationReason, @@ -20,34 +15,33 @@ use argmin_math::{ArgminAdd, ArgminDot, ArgminNorm, ArgminWeightedDot}; #[cfg(feature = "serde1")] use serde::{Deserialize, Serialize}; +/// # Trust region method +/// /// The trust region method approximates the cost function within a certain region around the /// current point in parameter space. Depending on the quality of this approximation, the region is /// either expanded or contracted. /// -/// The calculation of the actual step length and direction is done by one of the following -/// methods: +/// The calculation of the actual step length and direction is performed by a method which +/// implements [`TrustRegionRadius`](`crate::solver::trustregion::TrustRegionRadius`), such as: /// -/// * [Cauchy point](../cauchypoint/struct.CauchyPoint.html) -/// * [Dogleg method](../dogleg/struct.Dogleg.html) -/// * [Steihaug method](../steihaug/struct.Steihaug.html) +/// * [Cauchy point](`crate::solver::trustregion::CauchyPoint`) +/// * [Dogleg method](`crate::solver::trustregion::Dogleg`) +/// * [Steihaug method](`crate::solver::trustregion::Steihaug`) /// -/// This subproblem can be set via `set_subproblem(...)`. If this is not provided, it will default -/// to the Steihaug method. +/// ## Reference /// -/// # References: -/// -/// \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. +/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization. /// Springer. ISBN 0-387-30303-0. #[derive(Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct TrustRegion { /// Radius radius: F, - /// Maximum Radius + /// Maximum radius max_radius: F, /// eta \in [0, 1/4) eta: F, - /// subproblem + /// subproblem (must implement [`crate::solver::trustregion::TrustRegionRadius`]) subproblem: R, /// f(xk) fxk: F, @@ -59,7 +53,15 @@ impl TrustRegion where F: ArgminFloat, { - /// Constructor + /// Construct a new instance of [`TrustRegion`] + /// + /// # Example + /// + /// ``` + /// use argmin::solver::trustregion::{CauchyPoint, TrustRegion}; + /// let cp: CauchyPoint = CauchyPoint::new(); + /// let tr: TrustRegion<_, f64> = TrustRegion::new(cp); + /// ``` pub fn new(subproblem: R) -> Self { TrustRegion { radius: float!(1.0), @@ -71,26 +73,78 @@ where } } - /// set radius - #[must_use] - pub fn radius(mut self, radius: F) -> Self { + /// Set radius + /// + /// Defaults to `1.0`. + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::{TrustRegion, CauchyPoint}; + /// # use argmin::core::Error; + /// # fn main() -> Result<(), Error> { + /// let cp: CauchyPoint = CauchyPoint::new(); + /// let tr: TrustRegion<_, f64> = TrustRegion::new(cp).with_radius(0.8)?; + /// # Ok(()) + /// # } + /// ``` + pub fn with_radius(mut self, radius: F) -> Result { + if radius <= float!(0.0) { + return Err(argmin_error!( + InvalidParameter, + "`TrustRegion`: radius must be > 0." + )); + } self.radius = radius; - self + Ok(self) } /// Set maximum radius - #[must_use] - pub fn max_radius(mut self, max_radius: F) -> Self { + /// + /// Defaults to `100.0`. + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::{TrustRegion, CauchyPoint}; + /// # use argmin::core::Error; + /// # fn main() -> Result<(), Error> { + /// let cp: CauchyPoint = CauchyPoint::new(); + /// let tr: TrustRegion<_, f64> = TrustRegion::new(cp).with_max_radius(1000.0)?; + /// # Ok(()) + /// # } + /// ``` + pub fn with_max_radius(mut self, max_radius: F) -> Result { + if max_radius <= float!(0.0) { + return Err(argmin_error!( + InvalidParameter, + "`TrustRegion`: maximum radius must be > 0." + )); + } self.max_radius = max_radius; - self + Ok(self) } /// Set eta - pub fn eta(mut self, eta: F) -> Result { + /// + /// Must lie in `[0, 1/4)` and defaults to `0.125`. + /// + /// # Example + /// + /// ``` + /// # use argmin::solver::trustregion::{TrustRegion, CauchyPoint}; + /// # use argmin::core::Error; + /// # fn main() -> Result<(), Error> { + /// let cp: CauchyPoint = CauchyPoint::new(); + /// let tr: TrustRegion<_, f64> = TrustRegion::new(cp).with_eta(0.2)?; + /// # Ok(()) + /// # } + /// ``` + pub fn with_eta(mut self, eta: F) -> Result { if eta >= float!(0.25) || eta < float!(0.0) { return Err(argmin_error!( InvalidParameter, - "TrustRegion: eta must be in [0, 1/4)." + "`TrustRegion`: eta must be in [0, 1/4)." )); } self.eta = eta; @@ -122,10 +176,31 @@ where problem: &mut Problem, mut state: IterState, ) -> Result<(IterState, Option), Error> { - let param = state.take_param().unwrap(); - let grad = problem.gradient(¶m)?; - let hessian = problem.hessian(¶m)?; - self.fxk = problem.cost(¶m)?; + let param = state.take_param().ok_or_else(argmin_error_closure!( + NotInitialized, + concat!( + "`TrustRegion` requires an initial parameter vector. ", + "Please provide an initial guess via `Executor`s `configure` method." + ) + ))?; + + let grad = state + .take_grad() + .map(Result::Ok) + .unwrap_or_else(|| problem.gradient(¶m))?; + + let hessian = state + .take_hessian() + .map(Result::Ok) + .unwrap_or_else(|| problem.hessian(¶m))?; + + let cost = state.get_cost(); + self.fxk = if cost.is_infinite() && cost.is_sign_positive() { + problem.cost(¶m)? + } else { + cost + }; + self.mk0 = self.fxk; Ok(( state @@ -142,15 +217,20 @@ where problem: &mut Problem, mut state: IterState, ) -> Result<(IterState, Option), Error> { - let param = state.take_param().unwrap(); - let grad = state - .take_grad() - .map(Result::Ok) - .unwrap_or_else(|| problem.gradient(¶m))?; - let hessian = state - .take_hessian() - .map(Result::Ok) - .unwrap_or_else(|| problem.hessian(¶m))?; + let param = state.take_param().ok_or_else(argmin_error_closure!( + PotentialBug, + "`TrustRegion`: Parameter vector in state not set." + ))?; + + let grad = state.take_grad().ok_or_else(argmin_error_closure!( + PotentialBug, + "`TrustRegion`: Gradient in state not set." + ))?; + + let hessian = state.take_hessian().ok_or_else(argmin_error_closure!( + PotentialBug, + "`TrustRegion`: Hessian in state not set." + ))?; self.subproblem.set_radius(self.radius); @@ -182,6 +262,7 @@ where let pk_norm = pk.norm(); let cur_radius = self.radius; + self.radius = if rho < float!(0.25) { float!(0.25) * pk_norm } else if rho > float!(0.75) && (pk_norm - self.radius).abs() <= float!(10.0) * F::epsilon() @@ -214,7 +295,6 @@ where } fn terminate(&mut self, _state: &IterState) -> TerminationReason { - // todo TerminationReason::NotTerminated } } @@ -223,8 +303,137 @@ where mod tests { use super::*; use crate::core::test_utils::TestProblem; - use crate::solver::trustregion::steihaug::Steihaug; + use crate::core::{ArgminError, State}; + use crate::solver::trustregion::{CauchyPoint, Steihaug}; use crate::test_trait_impl; test_trait_impl!(trustregion, TrustRegion, f64>); + + #[test] + fn test_new() { + let cp: CauchyPoint = CauchyPoint::new(); + let tr: TrustRegion<_, f64> = TrustRegion::new(cp); + + let TrustRegion { + radius, + max_radius, + eta, + subproblem: _, + fxk, + mk0, + } = tr; + + assert_eq!(radius.to_ne_bytes(), 1.0f64.to_ne_bytes()); + assert_eq!(max_radius.to_ne_bytes(), 100.0f64.to_ne_bytes()); + assert_eq!(eta.to_ne_bytes(), 0.125f64.to_ne_bytes()); + assert_eq!(fxk.to_ne_bytes(), f64::NAN.to_ne_bytes()); + assert_eq!(mk0.to_ne_bytes(), f64::NAN.to_ne_bytes()); + } + + #[test] + fn test_with_radius() { + // correct parameters + for radius in [std::f64::EPSILON, 1e-2, 1.0, 2.0, 10.0, 100.0] { + let cp: CauchyPoint = CauchyPoint::new(); + let tr: TrustRegion<_, f64> = TrustRegion::new(cp); + let res = tr.with_radius(radius); + assert!(res.is_ok()); + + let nm = res.unwrap(); + assert_eq!(nm.radius.to_ne_bytes(), radius.to_ne_bytes()); + } + + // incorrect parameters + for radius in [0.0, -f64::EPSILON, -1.0, -100.0, -42.0] { + let cp: CauchyPoint = CauchyPoint::new(); + let tr: TrustRegion<_, f64> = TrustRegion::new(cp); + let res = tr.with_radius(radius); + assert_error!( + res, + ArgminError, + "Invalid parameter: \"`TrustRegion`: radius must be > 0.\"" + ); + } + } + + #[test] + fn test_with_eta() { + // correct parameters + for eta in [ + 0.0, + std::f64::EPSILON, + 1e-2, + 0.125, + 0.25 - std::f64::EPSILON, + ] { + let cp: CauchyPoint = CauchyPoint::new(); + let tr: TrustRegion<_, f64> = TrustRegion::new(cp); + let res = tr.with_eta(eta); + assert!(res.is_ok()); + + let nm = res.unwrap(); + assert_eq!(nm.eta.to_ne_bytes(), eta.to_ne_bytes()); + } + + // incorrect parameters + for eta in [-f64::EPSILON, -1.0, -100.0, -42.0, 0.25, 1.0] { + let cp: CauchyPoint = CauchyPoint::new(); + let tr: TrustRegion<_, f64> = TrustRegion::new(cp); + let res = tr.with_eta(eta); + assert_error!( + res, + ArgminError, + "Invalid parameter: \"`TrustRegion`: eta must be in [0, 1/4).\"" + ); + } + } + + #[test] + fn test_init() { + let param: Vec = vec![1.0, 2.0]; + + let cp: CauchyPoint = CauchyPoint::new(); + let mut tr: TrustRegion<_, f64> = TrustRegion::new(cp); + + // Forgot to initialize parameter vector + let state: IterState, Vec, (), Vec>, f64> = IterState::new(); + let problem = TestProblem::new(); + let res = tr.init(&mut Problem::new(problem), state); + assert_error!( + res, + ArgminError, + concat!( + "Not initialized: \"`TrustRegion` requires an initial parameter vector. Please ", + "provide an initial guess via `Executor`s `configure` method.\"" + ) + ); + + // All good. + let state: IterState, Vec, (), Vec>, f64> = + IterState::new().param(param.clone()); + let problem = TestProblem::new(); + let (mut state_out, kv) = tr.init(&mut Problem::new(problem), state).unwrap(); + + assert!(kv.is_none()); + + let s_param = state_out.take_param().unwrap(); + + assert_eq!(s_param[0].to_ne_bytes(), param[0].to_ne_bytes()); + assert_eq!(s_param[1].to_ne_bytes(), param[1].to_ne_bytes()); + + let TrustRegion { + radius, + max_radius, + eta, + subproblem: _, + fxk, + mk0, + } = tr; + + assert_eq!(radius.to_ne_bytes(), 1.0f64.to_ne_bytes()); + assert_eq!(max_radius.to_ne_bytes(), 100.0f64.to_ne_bytes()); + assert_eq!(eta.to_ne_bytes(), 0.125f64.to_ne_bytes()); + assert_eq!(fxk.to_ne_bytes(), 1.0f64.sqrt().to_ne_bytes()); + assert_eq!(mk0.to_ne_bytes(), 1.0f64.to_ne_bytes()); + } }