Improve global bootstrap optimizer configuration (#2080)

lballabio · Oct 3, 2024 · e366a83 · e366a83
2 parents 460a948 + 0e4c3df
commit e366a83
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 54 deletions.
diff --git a/ql/math/optimization/endcriteria.cpp b/ql/math/optimization/endcriteria.cpp
@@ -158,23 +158,31 @@ namespace QuantLib {
         return gradientNormEpsilon_;
     }
 
+    bool EndCriteria::succeeded(EndCriteria::Type ecType) {
+        return ecType == StationaryPoint ||
+               ecType == StationaryFunctionValue ||
+               ecType == StationaryFunctionAccuracy;
+    }
+
     std::ostream& operator<<(std::ostream& out, EndCriteria::Type ec) {
         switch (ec) {
-        case QuantLib::EndCriteria::None:
+          case EndCriteria::None:
             return out << "None";
-        case QuantLib::EndCriteria::MaxIterations:
+          case EndCriteria::MaxIterations:
             return out << "MaxIterations";
-        case QuantLib::EndCriteria::StationaryPoint:
+          case EndCriteria::StationaryPoint:
             return out << "StationaryPoint";
-        case QuantLib::EndCriteria::StationaryFunctionValue:
+          case EndCriteria::StationaryFunctionValue:
             return out << "StationaryFunctionValue";
-        case QuantLib::EndCriteria::StationaryFunctionAccuracy:
+          case EndCriteria::StationaryFunctionAccuracy:
             return out << "StationaryFunctionAccuracy";
-        case QuantLib::EndCriteria::ZeroGradientNorm:
+          case EndCriteria::ZeroGradientNorm:
             return out << "ZeroGradientNorm";
-        case QuantLib::EndCriteria::Unknown:
+          case EndCriteria::FunctionEpsilonTooSmall:
+            return out << "FunctionEpsilonTooSmall";
+          case EndCriteria::Unknown:
             return out << "Unknown";
-        default:
+          default:
             QL_FAIL("unknown EndCriteria::Type (" << Integer(ec) << ")");
         }
     }

diff --git a/ql/math/optimization/endcriteria.hpp b/ql/math/optimization/endcriteria.hpp
@@ -45,6 +45,7 @@ namespace QuantLib {
                    StationaryFunctionValue,
                    StationaryFunctionAccuracy,
                    ZeroGradientNorm,
+                   FunctionEpsilonTooSmall,
                    Unknown};
 
         //! Initialization constructor
@@ -95,6 +96,7 @@ namespace QuantLib {
         /*! Test if the gradient norm value is below gradientNormEpsilon */
         bool checkZeroGradientNorm(Real gNorm, EndCriteria::Type& ecType) const;
 
+        static bool succeeded(EndCriteria::Type ecType);
       protected:
         //! Maximum number of iterations
         Size maxIterations_;

diff --git a/ql/math/optimization/levenbergmarquardt.cpp b/ql/math/optimization/levenbergmarquardt.cpp
@@ -33,13 +33,8 @@ namespace QuantLib {
     : epsfcn_(epsfcn), xtol_(xtol), gtol_(gtol),
       useCostFunctionsJacobian_(useCostFunctionsJacobian) {}
 
-    Integer LevenbergMarquardt::getInfo() const {
-        return info_;
-    }
-
     EndCriteria::Type LevenbergMarquardt::minimize(Problem& P,
                                                    const EndCriteria& endCriteria) {
-        EndCriteria::Type ecType = EndCriteria::None;
         P.reset();
         Array x_ = P.currentValue();
         currentProblem_ = &P;
@@ -55,10 +50,14 @@ namespace QuantLib {
         std::unique_ptr<Real[]> fvec(new Real[m]);
         std::unique_ptr<Real[]> diag(new Real[n]);
         int mode = 1;
-        Real factor = 1;
+        // magic number recommended by the documentation
+        Real factor = 100;
+        // lmdif() evaluates cost function n+1 times for each iteration (technically, 2n+1
+        // times if useCostFunctionsJacobian is true, but lmdif() doesn't account for that)
+        int maxfev = endCriteria.maxIterations() * (n + 1);
         int nprint = 0;
         int info = 0;
-        int nfev =0;
+        int nfev = 0;
         std::unique_ptr<Real[]> fjac(new Real[m*n]);
         int ldfjac = m;
         std::unique_ptr<int[]> ipvt(new int[n]);
@@ -76,8 +75,7 @@ namespace QuantLib {
                    "negative f tolerance");
         QL_REQUIRE(xtol_ >= 0.0, "negative x tolerance");
         QL_REQUIRE(gtol_ >= 0.0, "negative g tolerance");
-        QL_REQUIRE(endCriteria.maxIterations() > 0,
-                   "null number of evaluations");
+        QL_REQUIRE(maxfev > 0, "null number of evaluations");
 
         // call lmdif to minimize the sum of the squares of m functions
         // in n variables by the Levenberg-Marquardt algorithm.
@@ -95,30 +93,42 @@ namespace QuantLib {
                        endCriteria.functionEpsilon(),
                        xtol_,
                        gtol_,
-                       endCriteria.maxIterations(),
+                       maxfev,
                        epsfcn_,
                        diag.get(), mode, factor,
                        nprint, &info, &nfev, fjac.get(),
                        ldfjac, ipvt.get(), qtf.get(),
                        wa1.get(), wa2.get(), wa3.get(), wa4.get(),
                        lmdifCostFunction,
                        lmdifJacFunction);
+        // for the time being
         info_ = info;
         // check requirements & endCriteria evaluation
         QL_REQUIRE(info != 0, "MINPACK: improper input parameters");
-        //QL_REQUIRE(info != 6, "MINPACK: ftol is too small. no further "
-        //                               "reduction in the sum of squares "
-        //                               "is possible.");
-        if (info != 6) ecType = QuantLib::EndCriteria::StationaryFunctionValue;
-        //QL_REQUIRE(info != 5, "MINPACK: number of calls to fcn has "
-        //                               "reached or exceeded maxfev.");
-        endCriteria.checkMaxIterations(nfev, ecType);
         QL_REQUIRE(info != 7, "MINPACK: xtol is too small. no further "
                                        "improvement in the approximate "
                                        "solution x is possible.");
         QL_REQUIRE(info != 8, "MINPACK: gtol is too small. fvec is "
                                        "orthogonal to the columns of the "
                                        "jacobian to machine precision.");
+
+        EndCriteria::Type ecType = EndCriteria::None;
+        switch (info) {
+          case 1:
+          case 2:
+          case 3:
+          case 4:
+            // 2 and 3 should be StationaryPoint, 4 a new gradient-related value,
+            // but we keep StationaryFunctionValue for backwards compatibility.
+            ecType = EndCriteria::StationaryFunctionValue;
+            break;
+          case 5:
+            ecType = EndCriteria::MaxIterations;
+            break;
+          case 6:
+            ecType = EndCriteria::FunctionEpsilonTooSmall;
+            break;
+        }
         // set problem
         std::copy(xx.get(), xx.get()+n, x_.begin());
         P.setCurrentValue(x_);

diff --git a/ql/math/optimization/levenbergmarquardt.hpp b/ql/math/optimization/levenbergmarquardt.hpp
@@ -40,7 +40,7 @@ namespace QuantLib {
         of the problem is used instead. Note that
         the default implementation of the jacobian
         in CostFunction uses a central difference
-        (oder 2, but requiring more function
+        (order 2, but requiring more function
         evaluations) compared to the forward
         difference implemented here (order 1).
 
@@ -53,10 +53,8 @@ namespace QuantLib {
                            Real gtol = 1.0e-8,
                            bool useCostFunctionsJacobian = false);
         EndCriteria::Type minimize(Problem& P,
-                                   const EndCriteria& endCriteria //= EndCriteria()
-                                   ) override;
-        //      = EndCriteria(400, 1.0e-8, 1.0e-8)
-        virtual Integer getInfo() const;
+                                   const EndCriteria& endCriteria) override;
+
         void fcn(int m,
                  int n,
                  Real* x,
@@ -68,13 +66,18 @@ namespace QuantLib {
                  Real* fjac,
                  int* iflag);
 
+        /*! \deprecated Don't use this method; inspect the result of minimize instead.
+                        Deprecated in version 1.36.
+        */
+        [[deprecated("Don't use this method; inspect the result of minimize instead")]]
+        virtual Integer getInfo() const { return info_; }
       private:
         Problem* currentProblem_;
         Array initCostValues_;
         Matrix initJacobian_;
-        mutable Integer info_ = 0;
+        mutable Integer info_ = 0; // remove together with getInfo
         const Real epsfcn_, xtol_, gtol_;
-        bool useCostFunctionsJacobian_;
+        const bool useCostFunctionsJacobian_;
     };
 
 }

diff --git a/ql/termstructures/globalbootstrap.hpp b/ql/termstructures/globalbootstrap.hpp
@@ -41,7 +41,9 @@ template <class Curve> class GlobalBootstrap {
     typedef typename Curve::interpolator_type Interpolator; // Linear, LogLinear, ...
 
   public:
-    GlobalBootstrap(Real accuracy = Null<Real>());
+    GlobalBootstrap(Real accuracy = Null<Real>(),
+                    ext::shared_ptr<OptimizationMethod> optimizer = nullptr,
+                    ext::shared_ptr<EndCriteria> endCriteria = nullptr);
     /*! The set of (alive) additional dates is added to the interpolation grid. The set of additional dates must only
       depend on the current global evaluation date.  The additionalErrors functor must yield at least as many values
       such that
@@ -61,14 +63,18 @@ template <class Curve> class GlobalBootstrap {
     GlobalBootstrap(std::vector<ext::shared_ptr<typename Traits::helper> > additionalHelpers,
                     std::function<std::vector<Date>()> additionalDates,
                     std::function<Array()> additionalErrors,
-                    Real accuracy = Null<Real>());
+                    Real accuracy = Null<Real>(),
+                    ext::shared_ptr<OptimizationMethod> optimizer = nullptr,
+                    ext::shared_ptr<EndCriteria> endCriteria = nullptr);
     void setup(Curve *ts);
     void calculate() const;
 
   private:
     void initialize() const;
     Curve *ts_;
     Real accuracy_;
+    ext::shared_ptr<OptimizationMethod> optimizer_;
+    ext::shared_ptr<EndCriteria> endCriteria_;
     mutable std::vector<ext::shared_ptr<typename Traits::helper> > additionalHelpers_;
     std::function<std::vector<Date>()> additionalDates_;
     std::function<Array()> additionalErrors_;
@@ -80,15 +86,23 @@ template <class Curve> class GlobalBootstrap {
 // template definitions
 
 template <class Curve>
-GlobalBootstrap<Curve>::GlobalBootstrap(Real accuracy) : ts_(0), accuracy_(accuracy) {}
+GlobalBootstrap<Curve>::GlobalBootstrap(
+    Real accuracy,
+    ext::shared_ptr<OptimizationMethod> optimizer,
+    ext::shared_ptr<EndCriteria> endCriteria)
+: ts_(nullptr), accuracy_(accuracy), optimizer_(std::move(optimizer)),
+  endCriteria_(std::move(endCriteria)) {}
 
 template <class Curve>
 GlobalBootstrap<Curve>::GlobalBootstrap(
     std::vector<ext::shared_ptr<typename Traits::helper> > additionalHelpers,
     std::function<std::vector<Date>()> additionalDates,
     std::function<Array()> additionalErrors,
-    Real accuracy)
-: ts_(nullptr), accuracy_(accuracy), additionalHelpers_(std::move(additionalHelpers)),
+    Real accuracy,
+    ext::shared_ptr<OptimizationMethod> optimizer,
+    ext::shared_ptr<EndCriteria> endCriteria)
+: ts_(nullptr), accuracy_(accuracy), optimizer_(std::move(optimizer)),
+  endCriteria_(std::move(endCriteria)), additionalHelpers_(std::move(additionalHelpers)),
   additionalDates_(std::move(additionalDates)), additionalErrors_(std::move(additionalErrors)) {}
 
 template <class Curve> void GlobalBootstrap<Curve>::setup(Curve *ts) {
@@ -98,6 +112,15 @@ template <class Curve> void GlobalBootstrap<Curve>::setup(Curve *ts) {
     for (Size j = 0; j < additionalHelpers_.size(); ++j)
         ts_->registerWithObservables(additionalHelpers_[j]);
 
+    // setup optimizer and EndCriteria
+    Real accuracy = accuracy_ != Null<Real>() ? accuracy_ : ts_->accuracy_;
+    if (!optimizer_) {
+        optimizer_ = ext::make_shared<LevenbergMarquardt>(accuracy, accuracy, accuracy);
+    }
+    if (!endCriteria_) {
+        endCriteria_ = ext::make_shared<EndCriteria>(1000, 10, accuracy, accuracy, accuracy);
+    }
+
     // do not initialize yet: instruments could be invalid here
     // but valid later when bootstrapping is actually required
 }
@@ -213,13 +236,6 @@ template <class Curve> void GlobalBootstrap<Curve>::calculate() const {
         helper->setTermStructure(const_cast<Curve *>(ts_));
     }
 
-    Real accuracy = accuracy_ != Null<Real>() ? accuracy_ : ts_->accuracy_;
-
-    // setup optimizer and EndCriteria
-    Real optEps = accuracy;
-    LevenbergMarquardt optimizer(optEps, optEps, optEps); // FIXME hardcoded tolerances
-    EndCriteria ec(1000, 10, optEps, optEps, optEps);      // FIXME hardcoded values here as well
-
     // setup interpolation
     if (!validCurve_) {
         ts_->interpolation_ =
@@ -302,14 +318,11 @@ template <class Curve> void GlobalBootstrap<Curve>::calculate() const {
     Problem problem(cost, noConstraint, guess);
 
     // run optimization
-    optimizer.minimize(problem, ec);
-
-    // evaluate target function on best value found to ensure that data_ contains the optimal value
-    Real finalTargetError = cost.value(problem.currentValue());
+    EndCriteria::Type endType = optimizer_->minimize(problem, *endCriteria_);
 
-    // check final error
-    QL_REQUIRE(finalTargetError <= accuracy,
-               "global bootstrap failed, error is " << finalTargetError << ", accuracy is " << accuracy);
+    // check the end criteria
+    QL_REQUIRE(EndCriteria::succeeded(endType),
+               "global bootstrap failed to minimize to required accuracy: " << endType);
 
     // set valid flag
     validCurve_ = true;

diff --git a/ql/termstructures/localbootstrap.hpp b/ql/termstructures/localbootstrap.hpp
@@ -240,9 +240,8 @@ namespace QuantLib {
             EndCriteria::Type endType = solver.minimize(toSolve, endCriteria);
 
             // check the end criteria
-            QL_REQUIRE(endType == EndCriteria::StationaryFunctionAccuracy ||
-                       endType == EndCriteria::StationaryFunctionValue,
-                       "Unable to strip yieldcurve to required accuracy " );
+            QL_REQUIRE(EndCriteria::succeeded(endType),
+                       "Unable to strip yieldcurve to required accuracy: " << endType);
             ++iInst;
         } while ( iInst < nInsts );
         validCurve_ = true;