Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup and improve documentation of trust region method, added doc and unit tests #226

Merged
merged 1 commit into from
Jun 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion argmin/examples/sr1_trustregion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn run() -> Result<(), Error> {
// let init_hessian: Array2<f64> = Array2::eye(8);

// Set up the subproblem
let subproblem = Steihaug::new().max_iters(20);
let subproblem = Steihaug::new().with_max_iters(20);
// let subproblem = CauchyPoint::new();
// let subproblem = Dogleg::new();

Expand Down
7 changes: 4 additions & 3 deletions argmin/examples/trustregion_nd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ impl CostFunction for Rosenbrock {
Ok(rosenbrock_2d(&p.to_vec(), self.a, self.b))
}
}

impl Gradient for Rosenbrock {
type Param = Array1<f64>;
type Gradient = Array1<f64>;
Expand Down Expand Up @@ -59,9 +60,9 @@ fn run() -> Result<(), Error> {
let init_param: Array1<f64> = Array1::from(vec![-1.2, 1.0]);

// Set up the subproblem
// let subproblem = Steihaug::new().max_iters(2);
// let subproblem = CauchyPoint::new();
let subproblem = Dogleg::new();
// let subproblem = Steihaug::new().with_max_iters(2);
let subproblem = CauchyPoint::new();
// let subproblem = Dogleg::new();

// Set up solver
let solver = TrustRegion::new(subproblem);
Expand Down
2 changes: 1 addition & 1 deletion argmin/src/solver/quasinewton/sr1_trustregion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ where
/// ```
/// # use argmin::solver::quasinewton::SR1TrustRegion;
/// # let subproblem = ();
/// let subproblem = argmin::solver::trustregion::Steihaug::new().max_iters(20);
/// let subproblem = argmin::solver::trustregion::Steihaug::new().with_max_iters(20);
/// # // The next line defines the type of `subproblem`. This is done here hidden in order to
/// # // not litter the docs. When all of this is fed into an Executor, the compiler will
/// # // figure out the types.
Expand Down
91 changes: 81 additions & 10 deletions argmin/src/solver/trustregion/cauchypoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.

//! # References:
//!
//! \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
//! Springer. ISBN 0-387-30303-0.

use crate::core::{
ArgminFloat, Error, Gradient, Hessian, IterState, Problem, Solver, State, TerminationReason,
TrustRegionRadius, KV,
Expand All @@ -19,12 +14,14 @@ use argmin_math::{ArgminMul, ArgminNorm, ArgminWeightedDot};
use serde::{Deserialize, Serialize};
use std::fmt::Debug;

/// # Cauchy point method
///
/// The Cauchy point is the minimum of the quadratic approximation of the cost function within the
/// trust region along the direction given by the first derivative.
///
/// # References:
/// ## Reference
///
/// \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
/// Springer. ISBN 0-387-30303-0.
#[derive(Clone, Debug, Copy, PartialEq, PartialOrd, Default)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
Expand All @@ -37,7 +34,14 @@ impl<F> CauchyPoint<F>
where
F: ArgminFloat,
{
/// Constructor
/// Construct a new instance of [`CauchyPoint`]
///
/// # Example
///
/// ```
/// # use argmin::solver::trustregion::CauchyPoint;
/// let cp: CauchyPoint<f64> = CauchyPoint::new();
/// ```
pub fn new() -> Self {
CauchyPoint { radius: F::nan() }
}
Expand All @@ -57,29 +61,40 @@ where
problem: &mut Problem<O>,
mut state: IterState<P, G, (), H, F>,
) -> Result<(IterState<P, G, (), H, F>, Option<KV>), Error> {
let param = state.take_param().unwrap();
let param = state.take_param().ok_or_else(argmin_error_closure!(
NotInitialized,
concat!(
"`CauchyPoint` requires an initial parameter vector. ",
"Please provide an initial guess via `Executor`s `configure` method."
)
))?;

let grad = state
.take_grad()
.map(Result::Ok)
.unwrap_or_else(|| problem.gradient(&param))?;

let grad_norm = grad.norm();

let hessian = state
.take_hessian()
.map(Result::Ok)
.unwrap_or_else(|| problem.hessian(&param))?;

let wdp = grad.weighted_dot(&hessian, &grad);

let tau: F = if wdp <= float!(0.0) {
float!(1.0)
} else {
float!(1.0).min(grad_norm.powi(3) / (self.radius * wdp))
};

let new_param = grad.mul(&(-tau * self.radius / grad_norm));
Ok((state.param(new_param).grad(grad).hessian(hessian), None))
Ok((state.param(new_param), None))
}

fn terminate(&mut self, state: &IterState<P, G, (), H, F>) -> TerminationReason {
// Not an iterative algorithm
if state.get_iter() >= 1 {
TerminationReason::MaxItersReached
} else {
Expand All @@ -92,6 +107,17 @@ impl<F> TrustRegionRadius<F> for CauchyPoint<F>
where
F: ArgminFloat,
{
/// Set current radius.
///
/// Needed by [`TrustRegion`](`crate::solver::trustregion::TrustRegion`).
///
/// # Example
///
/// ```
/// use argmin::solver::trustregion::{CauchyPoint, TrustRegionRadius};
/// let mut cp: CauchyPoint<f64> = CauchyPoint::new();
/// cp.set_radius(0.8);
/// ```
fn set_radius(&mut self, radius: F) {
self.radius = radius;
}
Expand All @@ -100,7 +126,52 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::core::{test_utils::TestProblem, ArgminError};
use crate::test_trait_impl;
use approx::assert_relative_eq;

test_trait_impl!(cauchypoint, CauchyPoint<f64>);

#[test]
fn test_new() {
let cp: CauchyPoint<f64> = CauchyPoint::new();

let CauchyPoint { radius } = cp;

assert_eq!(radius.to_ne_bytes(), f64::NAN.to_ne_bytes());
}

#[test]
fn test_next_iter() {
let param: Vec<f64> = vec![-1.0, 1.0];

let mut cp: CauchyPoint<f64> = CauchyPoint::new();
cp.set_radius(1.0);

// Forgot to initialize the parameter vector
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, f64> = IterState::new();
let problem = TestProblem::new();
let res = cp.next_iter(&mut Problem::new(problem), state);
assert_error!(
res,
ArgminError,
concat!(
"Not initialized: \"`CauchyPoint` requires an initial parameter vector. Please ",
"provide an initial guess via `Executor`s `configure` method.\""
)
);

// All good.
let state: IterState<Vec<f64>, Vec<f64>, (), Vec<Vec<f64>>, f64> =
IterState::new().param(param);
let problem = TestProblem::new();
let (mut state_out, kv) = cp.next_iter(&mut Problem::new(problem), state).unwrap();

assert!(kv.is_none());

let s_param = state_out.take_param().unwrap();

assert_relative_eq!(s_param[0], 1.0f64 / 2.0f64.sqrt(), epsilon = f64::EPSILON);
assert_relative_eq!(s_param[1], -1.0f64 / 2.0f64.sqrt(), epsilon = f64::EPSILON);
}
}
111 changes: 102 additions & 9 deletions argmin/src/solver/trustregion/dogleg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.

//! # References:
//!
//! \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
//! Springer. ISBN 0-387-30303-0.

use crate::core::{
ArgminFloat, Error, Gradient, Hessian, IterState, Problem, Solver, State, TerminationReason,
TrustRegionRadius, KV,
Expand All @@ -20,13 +15,15 @@ use argmin_math::{
#[cfg(feature = "serde1")]
use serde::{Deserialize, Serialize};

/// # Dogleg method
///
/// The Dogleg method computes the intersection of the trust region boundary with a path given by
/// the unconstraind minimum along the steepest descent direction and the optimum of the quadratic
/// approximation of the cost function at the current point.
///
/// # References:
/// ## Reference
///
/// \[0\] Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
/// Jorge Nocedal and Stephen J. Wright (2006). Numerical Optimization.
/// Springer. ISBN 0-387-30303-0.
#[derive(Clone, Debug, Copy, PartialEq, PartialOrd, Default)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
Expand All @@ -39,7 +36,14 @@ impl<F> Dogleg<F>
where
F: ArgminFloat,
{
/// Constructor
/// Construct a new instance of [`Dogleg`]
///
/// # Example
///
/// ```
/// # use argmin::solver::trustregion::Dogleg;
/// let dl: Dogleg<f64> = Dogleg::new();
/// ```
pub fn new() -> Self {
Dogleg { radius: F::nan() }
}
Expand All @@ -64,15 +68,24 @@ where
problem: &mut Problem<O>,
mut state: IterState<P, P, (), H, F>,
) -> Result<(IterState<P, P, (), H, F>, Option<KV>), Error> {
let param = state.take_param().unwrap();
let param = state.take_param().ok_or_else(argmin_error_closure!(
NotInitialized,
concat!(
"`Dogleg` requires an initial parameter vector. ",
"Please provide an initial guess via `Executor`s `configure` method."
)
))?;

let g = state
.take_grad()
.map(Result::Ok)
.unwrap_or_else(|| problem.gradient(&param))?;

let h = state
.take_hessian()
.map(Result::Ok)
.unwrap_or_else(|| problem.hessian(&param))?;

let pstar;

// pb = -H^-1g
Expand Down Expand Up @@ -131,6 +144,17 @@ where
}

impl<F: ArgminFloat> TrustRegionRadius<F> for Dogleg<F> {
/// Set current radius.
///
/// Needed by [`TrustRegion`](`crate::solver::trustregion::TrustRegion`).
///
/// # Example
///
/// ```
/// use argmin::solver::trustregion::{Dogleg, TrustRegionRadius};
/// let mut dl: Dogleg<f64> = Dogleg::new();
/// dl.set_radius(0.8);
/// ```
fn set_radius(&mut self, radius: F) {
self.radius = radius;
}
Expand All @@ -139,7 +163,76 @@ impl<F: ArgminFloat> TrustRegionRadius<F> for Dogleg<F> {
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "_ndarrayl")]
use crate::core::ArgminError;
use crate::test_trait_impl;

test_trait_impl!(dogleg, Dogleg<f64>);

#[test]
fn test_new() {
let dl: Dogleg<f64> = Dogleg::new();

let Dogleg { radius } = dl;

assert_eq!(radius.to_ne_bytes(), f64::NAN.to_ne_bytes());
}

#[cfg(feature = "_ndarrayl")]
#[test]
fn test_next_iter() {
use approx::assert_relative_eq;
use ndarray::{Array, Array1, Array2};

struct TestProblem {}

impl Gradient for TestProblem {
type Param = Array1<f64>;
type Gradient = Array1<f64>;

fn gradient(&self, _p: &Self::Param) -> Result<Self::Gradient, Error> {
Ok(Array1::from_vec(vec![0.5, 2.0]))
}
}

impl Hessian for TestProblem {
type Param = Array1<f64>;
type Hessian = Array2<f64>;

fn hessian(&self, _p: &Self::Param) -> Result<Self::Hessian, Error> {
Ok(Array::from_shape_vec((2, 2), vec![1f64, 2.0, 3.0, 4.0])?)
}
}

let param: Array1<f64> = Array1::from_vec(vec![-1.0, 1.0]);

let mut dl: Dogleg<f64> = Dogleg::new();
dl.set_radius(1.0);

// Forgot to initialize the parameter vector
let state: IterState<Array1<f64>, Array1<f64>, (), Array2<f64>, f64> = IterState::new();
let problem = TestProblem {};
let res = dl.next_iter(&mut Problem::new(problem), state);
assert_error!(
res,
ArgminError,
concat!(
"Not initialized: \"`Dogleg` requires an initial parameter vector. Please ",
"provide an initial guess via `Executor`s `configure` method.\""
)
);

// All good.
let state: IterState<Array1<f64>, Array1<f64>, (), Array2<f64>, f64> =
IterState::new().param(param);
let problem = TestProblem {};
let (mut state_out, kv) = dl.next_iter(&mut Problem::new(problem), state).unwrap();

assert!(kv.is_none());

let s_param = state_out.take_param().unwrap();

assert_relative_eq!(s_param[0], -0.9730617585026131, epsilon = f64::EPSILON);
assert_relative_eq!(s_param[1], 0.2305446033629983, epsilon = f64::EPSILON);
}
}
Loading