diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 19587dfb..3c68ac66 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,11 +11,14 @@ in development * Added space mapping methods to cashocs. The space mapping methods can utilize parallelism via MPI. +* Added polynomial based models for computing trial stepsizes in an extended Armijo rule. This method will become the default line search in the future. + +* implemented a wrapper for cashocs-convert, so that this can be used from inside python too. Simply call cashocs.convert(inputfile). + * cashocs print calls now flush the output buffer, which helps when sys.stdout is a file * cashocs' loggers are now not colored anymore, which makes reading the log easier if one logs to a file -* implemented a wrapper for cashocs-convert, so that this can be used from inside python too. Simply call cashocs.convert(inputfile). * BFGS methods can now be used in a restarted fashion, if desired @@ -24,6 +27,16 @@ in development * Section AlgoLBFGS * ``bfgs_periodic_restart`` is an integer parameter. If this is 0 (the default), no restarting is done. If this is >0, then the BFGS method is restarted after as many iterations, as given in the parameter + + * Section LineSearch is a completely new section where the line searches can be configured. + + * ``method`` is a string parameter, which can take the values ``armijo`` (which is the default previous line search) and ``polynomial`` (which are the new models) + + * ``polynomial_model`` is a string parameter which can be either ``quadratic`` or ``cubic``. In case this is ``quadratic``, three values (current function value, directional derivative, and trial function value) are used to generate a quadratic model of the one-dimensional cost functional. If this is ``cubic``, a cubic model is generated based on the last two guesses for the stepsize. These models are exactly minimized to get a new trial stepsize and a safeguarding is applied so that the steps remain feasible. + + * ``factor_high`` is one parameter for the safeguarding, the upper bound for the search interval for the stepsize (this is multiplied with the previous stepsize) + + * ``factor_low`` is the other parameter for the safeguarding, the lower bound for the search interval for the stepsize (this is multiplied with the previous stepsize) 1.8.0 (July 6, 2022) -------------------- diff --git a/cashocs/_optimization/line_search/__init__.py b/cashocs/_optimization/line_search/__init__.py index 395b14a7..b2923e3f 100644 --- a/cashocs/_optimization/line_search/__init__.py +++ b/cashocs/_optimization/line_search/__init__.py @@ -19,5 +19,8 @@ from cashocs._optimization.line_search.armijo_line_search import ArmijoLineSearch from cashocs._optimization.line_search.line_search import LineSearch +from cashocs._optimization.line_search.polynomial_line_search import ( + PolynomialLineSearch, +) -__all__ = ["ArmijoLineSearch", "LineSearch"] +__all__ = ["ArmijoLineSearch", "LineSearch", "PolynomialLineSearch"] diff --git a/cashocs/_optimization/line_search/polynomial_line_search.py b/cashocs/_optimization/line_search/polynomial_line_search.py new file mode 100644 index 00000000..62768b2c --- /dev/null +++ b/cashocs/_optimization/line_search/polynomial_line_search.py @@ -0,0 +1,330 @@ +# Copyright (C) 2020-2022 Sebastian Blauth +# +# This file is part of cashocs. +# +# cashocs is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# cashocs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with cashocs. If not, see . + +"""Module for the Armijo line search.""" + + +from __future__ import annotations + +import collections +from typing import Deque, List + +import fenics +import numpy as np +from typing_extensions import TYPE_CHECKING + +from cashocs import _exceptions +from cashocs import _loggers +from cashocs._optimization.line_search import line_search + +if TYPE_CHECKING: + from cashocs import types + from cashocs._optimization import optimization_algorithms + + +class PolynomialLineSearch(line_search.LineSearch): + """Implementation of the Armijo line search procedure.""" + + def __init__( + self, + optimization_problem: types.OptimizationProblem, + ) -> None: + """Initializes self. + + Args: + optimization_problem: The corresponding optimization problem. + + """ + super().__init__(optimization_problem) + + self.epsilon_armijo: float = self.config.getfloat( + "OptimizationRoutine", "epsilon_armijo" + ) + self.beta_armijo: float = self.config.getfloat( + "OptimizationRoutine", "beta_armijo" + ) + self.armijo_stepsize_initial = self.stepsize + self.search_direction_inf = 1.0 + self.decrease_measure_w_o_step = 1.0 + + self.factor_low = self.config.getfloat("LineSearch", "factor_low") + self.factor_high = self.config.getfloat("LineSearch", "factor_high") + self.polynomial_model = self.config.get( + "LineSearch", "polynomial_model" + ).casefold() + self.f_vals: Deque[float] = collections.deque() + self.alpha_vals: Deque[float] = collections.deque() + + def _check_for_nonconvergence( + self, solver: optimization_algorithms.OptimizationAlgorithm + ) -> bool: + """Checks, whether the line search failed to converge. + + Args: + solver: The optimization algorithm, which uses the line search. + + Returns: + A boolean, which is True if a termination / cancellation criterion is + satisfied. + + """ + if solver.iteration >= solver.maximum_iterations: + solver.remeshing_its = True + return True + + if self.stepsize * self.search_direction_inf <= 1e-8: + _loggers.error("Stepsize too small.") + solver.line_search_broken = True + return True + elif ( + not self.is_newton_like + and not self.is_newton + and self.stepsize / self.armijo_stepsize_initial <= 1e-8 + ): + _loggers.error("Stepsize too small.") + solver.line_search_broken = True + return True + + return False + + def search( + self, + solver: optimization_algorithms.OptimizationAlgorithm, + search_direction: List[fenics.Function], + has_curvature_info: bool, + ) -> None: + """Performs the line search. + + Args: + solver: The optimization algorithm. + search_direction: The current search direction. + has_curvature_info: A flag, which indicates whether the direction is + (presumably) scaled. + + """ + self.search_direction_inf = np.max( + [ + search_direction[i].vector().norm("linf") + for i in range(len(self.gradient)) + ] + ) + + if has_curvature_info: + self.stepsize = 1.0 + + num_decreases = ( + self.optimization_variable_abstractions.compute_a_priori_decreases( + search_direction, self.stepsize + ) + ) + self.stepsize /= pow(self.beta_armijo, num_decreases) + + if self.safeguard_stepsize and solver.iteration == 0: + search_direction_norm = np.sqrt( + self.form_handler.scalar_product(search_direction, search_direction) + ) + self.stepsize = float( + np.minimum(self.stepsize, 100.0 / (1.0 + search_direction_norm)) + ) + + self.f_vals.clear() + self.alpha_vals.clear() + while True: + if self._check_for_nonconvergence(solver): + return None + + if self.is_shape_problem: + self.decrease_measure_w_o_step = ( + self.optimization_variable_abstractions.compute_decrease_measure( + search_direction + ) + ) + self.stepsize = ( + self.optimization_variable_abstractions.update_optimization_variables( + search_direction, self.stepsize, self.beta_armijo + ) + ) + self.alpha_vals.append(self.stepsize) + + current_function_value = solver.objective_value + + self.state_problem.has_solution = False + objective_step = self.cost_functional.evaluate() + self.f_vals.append(objective_step) + + decrease_measure = self._compute_decrease_measure(search_direction) + + if ( + objective_step + < current_function_value + self.epsilon_armijo * decrease_measure + ): + if self.optimization_variable_abstractions.requires_remeshing(): + solver.requires_remeshing = True + return None + + if solver.iteration == 0: + self.armijo_stepsize_initial = self.stepsize + break + + else: + self.stepsize = self._compute_polynomial_stepsize( + current_function_value, + decrease_measure, + self.f_vals, + self.alpha_vals, + ) + self.optimization_variable_abstractions.revert_variable_update() + + solver.stepsize = self.stepsize + + if not has_curvature_info: + self.stepsize /= self.factor_high + + return None + + def _compute_polynomial_stepsize( + self, + f_current: float, + decrease_measure: float, + f_vals: Deque[float], + alpha_vals: Deque[float], + ) -> float: + """Computes a stepsize based on polynomial models. + + Args: + f_current: Current function value + decrease_measure: Current directional derivative in descent direction + f_vals: History of trial function values + alpha_vals: History of trial stepsizes + + Returns: + A new stepsize based on polynomial interpolation. + + """ + if len(f_vals) == 1: + alpha = self._quadratic_stepsize_model( + f_current, decrease_measure, f_vals, alpha_vals + ) + elif len(f_vals) == 2: + alpha = self._cubic_stepsize_model( + f_current, decrease_measure, f_vals, alpha_vals + ) + else: + raise _exceptions.CashocsException("This code should not be reached.") + + if alpha < self.factor_low * alpha_vals[-1]: + stepsize = self.factor_low * alpha_vals[-1] + elif alpha > self.factor_high * alpha_vals[-1]: + stepsize = self.factor_high * alpha_vals[-1] + else: + stepsize = alpha + + if (self.polynomial_model == "quadratic" and len(f_vals) == 1) or ( + self.polynomial_model == "cubic" and len(f_vals) == 2 + ): + f_vals.popleft() + alpha_vals.popleft() + + return float(stepsize) + + def _quadratic_stepsize_model( + self, + f_current: float, + decrease_measure: float, + f_vals: Deque[float], + alpha_vals: Deque[float], + ) -> float: + """Computes a trial stepsize based on a quadratic model. + + Args: + f_current: Current function value + decrease_measure: Current directional derivative in descent direction + f_vals: History of trial function values + alpha_vals: History of trial stepsizes + + Returns: + A new trial stepsize based on quadratic interpolation + + """ + stepsize = -(decrease_measure * self.stepsize**2) / ( + 2.0 * (f_vals[0] - f_current - decrease_measure * alpha_vals[0]) + ) + return stepsize + + def _cubic_stepsize_model( + self, + f_current: float, + decrease_measure: float, + f_vals: Deque[float], + alpha_vals: Deque[float], + ) -> float: + """Computes a trial stepsize based on a cubic model. + + Args: + f_current: Current function value + decrease_measure: Current directional derivative in descent direction + f_vals: History of trial function values + alpha_vals: History of trial stepsizes + + Returns: + A new trial stepsize based on cubic interpolation + + """ + coeffs = ( + 1 + / ( + alpha_vals[0] ** 2 + * alpha_vals[1] ** 2 + * (alpha_vals[1] - alpha_vals[0]) + ) + * np.array( + [ + [alpha_vals[0] ** 2, -alpha_vals[1] ** 2], + [-alpha_vals[0] ** 3, alpha_vals[1] ** 3], + ] + ) + @ np.array( + [ + f_vals[1] - f_current - decrease_measure * alpha_vals[1], + f_vals[0] - f_current - decrease_measure * alpha_vals[0], + ] + ) + ) + a, b = coeffs + stepsize = (-b + np.sqrt(b**2 - 3 * a * decrease_measure)) / (3 * a) + return float(stepsize) + + def _compute_decrease_measure( + self, search_direction: List[fenics.Function] + ) -> float: + """Computes the decrease measure for use in the Armijo line search. + + Args: + search_direction: The current search direction. + + Returns: + The computed decrease measure. + + """ + if self.is_control_problem: + return self.optimization_variable_abstractions.compute_decrease_measure( + search_direction + ) + elif self.is_shape_problem: + return self.decrease_measure_w_o_step * self.stepsize + else: + return float("inf") diff --git a/cashocs/_optimization/optimal_control/optimal_control_problem.py b/cashocs/_optimization/optimal_control/optimal_control_problem.py index f41f2aa9..a8b31a61 100755 --- a/cashocs/_optimization/optimal_control/optimal_control_problem.py +++ b/cashocs/_optimization/optimal_control/optimal_control_problem.py @@ -423,7 +423,12 @@ def solve( self.optimization_variable_abstractions = ( optimal_control.ControlVariableAbstractions(self) ) - self.line_search = line_search.ArmijoLineSearch(self) + + line_search_type = self.config.get("LineSearch", "method").casefold() + if line_search_type == "armijo": + self.line_search = line_search.ArmijoLineSearch(self) + elif line_search_type == "polynomial": + self.line_search = line_search.PolynomialLineSearch(self) if self.algorithm.casefold() == "newton": self.form_handler.compute_newton_forms() diff --git a/cashocs/_optimization/shape_optimization/shape_optimization_problem.py b/cashocs/_optimization/shape_optimization/shape_optimization_problem.py index 3ce248a9..8fe78ab7 100755 --- a/cashocs/_optimization/shape_optimization/shape_optimization_problem.py +++ b/cashocs/_optimization/shape_optimization/shape_optimization_problem.py @@ -488,7 +488,12 @@ def solve( self.optimization_variable_abstractions = ( shape_variable_abstractions.ShapeVariableAbstractions(self) ) - self.line_search = line_search.ArmijoLineSearch(self) + + line_search_type = self.config.get("LineSearch", "method").casefold() + if line_search_type == "armijo": + self.line_search = line_search.ArmijoLineSearch(self) + elif line_search_type == "polynomial": + self.line_search = line_search.PolynomialLineSearch(self) # TODO: Do not pass the line search (unnecessary) if self.algorithm.casefold() == "gradient_descent": diff --git a/cashocs/io/config.py b/cashocs/io/config.py index 148745d8..9e9caab6 100644 --- a/cashocs/io/config.py +++ b/cashocs/io/config.py @@ -230,6 +230,25 @@ def __init__(self, config_file: Optional[str] = None) -> None: "type": "bool", }, }, + "LineSearch": { + "method": { + "type": "str", + "possible_options": ["armijo", "polynomial"], + }, + "polynomial_model": { + "type": "str", + "possible_options": ["cubic", "quadratic"], + }, + "factor_low": { + "type": "float", + "attributes": ["less_than_one", "positive"], + }, + "factor_high": { + "type": "float", + "attributes": ["less_than_one", "positive"], + "larger_than": ("LineSearch", "factor_low"), + }, + }, "AlgoLBFGS": { "bfgs_memory_size": { "type": "int", @@ -537,6 +556,12 @@ def __init__(self, config_file: Optional[str] = None) -> None: gradient_tol = 1e-9 gradient_method = direct +[LineSearch] +method = armijo +polynomial_model = cubic +factor_high = 0.5 +factor_low = 0.1 + [ShapeGradient] lambda_lame = 0.0 damping_factor = 0.0 diff --git a/docs/source/demos/optimal_control/doc_config.rst b/docs/source/demos/optimal_control/doc_config.rst index 62dec281..f7cfaf21 100755 --- a/docs/source/demos/optimal_control/doc_config.rst +++ b/docs/source/demos/optimal_control/doc_config.rst @@ -261,6 +261,30 @@ The following sections describe parameters that belong to the certain solution algorithms. +.. _config_ocp_linesearch + +Section LineSearch +------------------ + +In this section, parameters regarding the line search can be specified. The type of the line search can be chosen via the parameter :: + + method = armijo + +Possible options are ``armijo``, which performs a simple backtracking line search based on the armijo rule with fixed steps (think of halving the stepsize in each iteration), and ``polynomial``, which uses polynomial models of the cost functional restricted to the line to generate "better" guesses for the stepsize. The default is ``armijo``. However, this will change in the future and users are encouraged to try the new polynomial line search models. + +The next parameter, ``polynomial_model``, specifies, which type of polynomials are used to generate new trial stepsizes. It is set via :: + + polynomial_model = cubic + +The parameter can either be ``quadratic`` or ``cubic``. If this is ``quadratic``, a quadratic interpolation polynomial along the search direction is generated and this is minimized analytically to generate a new trial stepsize. Here, only the current function value, the direction derivative of the cost functional in direction of the search direction, and the most recent trial stepsize are used to generate the polynomial. In case that ``polynomial_model`` is chosen to be ``cubic``, the last two trial stepsizes (when available) are used in addition to the current cost functional value and the directional derivative, to generate a cubic model of the one-dimensional cost functional, which is then minimized to compute a new trial stepsize. + +For the polynomial models, we also have a safeguarding procedure, which ensures that trial stepsizes cannot be chosen too large or too small, and which can be configured with the following two parameters. The trial stepsizes generate by the polynomial models are projected to the interval :math:`[\beta_{low} \alpha, \beta_{high} \alpha]`, where :math:`\alpha` is the previous trial stepsize and :math:`\beta_{low}, \beta_{high}` are factors which can be set via the parameters ``factor_low`` and ``factor_high``. In the config file, this can look like this :: + + factor_high = 0.5 + factor_low = 0.1 + +and the values specified here are also the default values for these parameters. + .. _config_ocp_algolbfgs: Section AlgoLBFGS @@ -573,6 +597,28 @@ in the following. - if ``True``, the optimization algorithm does not raise an exception if it did not converge + +[LineSearch] +************ + +.. list-table:: + :header-rows: 1 + + * - Parameter + - Default value + - Remarks + * - method + - ``armijo`` + - ``armijo`` is a simple backtracking line search, whereas ``polynomial`` uses polynomial models to compute trial stepsizes. + * - polynomial_model + - ``cubic`` + - This specifies, whether a ``cubic`` or ``quadratic`` model is used for computing trial stepsizes + * - factor_high + - ``0.5`` + - Safeguard for stepsize, upper bound + * - factor_low + - ``0.1`` + - Safeguard for stepsize, lower bound [AlgoLBFGS] *********** diff --git a/docs/source/demos/shape_optimization/doc_config.rst b/docs/source/demos/shape_optimization/doc_config.rst index 52f886af..7e0eedbe 100755 --- a/docs/source/demos/shape_optimization/doc_config.rst +++ b/docs/source/demos/shape_optimization/doc_config.rst @@ -278,6 +278,30 @@ raises an exception and terminates. This is set via :: and is set to ``False`` by default. +.. _config_ocp_linesearch + +Section LineSearch +------------------ + +In this section, parameters regarding the line search can be specified. The type of the line search can be chosen via the parameter :: + + method = armijo + +Possible options are ``armijo``, which performs a simple backtracking line search based on the armijo rule with fixed steps (think of halving the stepsize in each iteration), and ``polynomial``, which uses polynomial models of the cost functional restricted to the line to generate "better" guesses for the stepsize. The default is ``armijo``. However, this will change in the future and users are encouraged to try the new polynomial line search models. + +The next parameter, ``polynomial_model``, specifies, which type of polynomials are used to generate new trial stepsizes. It is set via :: + + polynomial_model = cubic + +The parameter can either be ``quadratic`` or ``cubic``. If this is ``quadratic``, a quadratic interpolation polynomial along the search direction is generated and this is minimized analytically to generate a new trial stepsize. Here, only the current function value, the direction derivative of the cost functional in direction of the search direction, and the most recent trial stepsize are used to generate the polynomial. In case that ``polynomial_model`` is chosen to be ``cubic``, the last two trial stepsizes (when available) are used in addition to the current cost functional value and the directional derivative, to generate a cubic model of the one-dimensional cost functional, which is then minimized to compute a new trial stepsize. + +For the polynomial models, we also have a safeguarding procedure, which ensures that trial stepsizes cannot be chosen too large or too small, and which can be configured with the following two parameters. The trial stepsizes generate by the polynomial models are projected to the interval :math:`[\beta_{low} \alpha, \beta_{high} \alpha]`, where :math:`\alpha` is the previous trial stepsize and :math:`\beta_{low}, \beta_{high}` are factors which can be set via the parameters ``factor_low`` and ``factor_high``. In the config file, this can look like this :: + + factor_high = 0.5 + factor_low = 0.1 + +and the values specified here are also the default values for these parameters. + .. _config_shape_algolbfgs: Section AlgoLBFGS @@ -1001,7 +1025,30 @@ in the following. - if ``True``, the optimization algorithm does not raise an exception if it did not converge + +[LineSearch] +************ + +.. list-table:: + :header-rows: 1 + + * - Parameter + - Default value + - Remarks + * - method + - ``armijo`` + - ``armijo`` is a simple backtracking line search, whereas ``polynomial`` uses polynomial models to compute trial stepsizes. + * - polynomial_model + - ``cubic`` + - This specifies, whether a ``cubic`` or ``quadratic`` model is used for computing trial stepsizes + * - factor_high + - ``0.5`` + - Safeguard for stepsize, upper bound + * - factor_low + - ``0.1`` + - Safeguard for stepsize, lower bound + [AlgoLBFGS] ***********