diff --git a/doc/roots/cubic_roots.qbk b/doc/roots/cubic_roots.qbk new file mode 100644 index 0000000000..89ae69fa61 --- /dev/null +++ b/doc/roots/cubic_roots.qbk @@ -0,0 +1,110 @@ +[/ +Copyright (c) 2021 Nick Thompson +Use, modification and distribution are subject to the +Boost Software License, Version 1.0. (See accompanying file +LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:cubic_roots Roots of Cubic Polynomials] + +[heading Synopsis] + +``` +#include + +namespace boost::math::tools { + +// Solves ax³ + bx² + cx + d = 0. +std::array cubic_roots(Real a, Real b, Real c, Real d); + +// Computes the empirical residual p(r) (first element) and expected residual ε|rṗ(r)| (second element) for a root. +// Recall that for a numerically computed root r of a function p, |p(r)| ≤ ε|rṗ(r)|. +template +std::array cubic_root_residual(Real a, Real b, Real c, Real d, Real root); +} +``` + +[heading Background] + +The `cubic_roots` function extracts all real roots of a cubic polynomial ax³ + bx² + cx + d. +The result is a `std::array`, which has length three, irrespective of whether there are 3 real roots. +There is always 1 real root, and hence (barring overflow or other exceptional circumstances), the first element of the +`std::array` is always populated. +If there is only one real root of the polynomial, then the second and third elements are set to `nans`. +The roots are returned in nondecreasing order. + +Be careful with double roots. +First, if you have a real double root, it is numerically indistinguishable from a complex conjugate pair of roots, +where the complex part is tiny. +Second, the condition number of rootfinding is infinite at a double root, +so even changes as subtle as different instruction generation can change the outcome. +We have some heuristics in place to detect double roots, but these should be regarded with suspicion. + + +[heading Example] + +``` +#include +#include +#include + +using boost::math::tools::cubic_roots; +using boost::math::tools::cubic_root_residual; + +template +std::string print_roots(std::array const & roots) { + std::ostringstream out; + out << "{" << roots[0] << ", " << roots[1] << ", " << roots[2] << "}"; + return out.str(); +} + +int main() { + // Solves x³ - 6x² + 11x - 6 = (x-1)(x-2)(x-3). + auto roots = cubic_roots(1.0, -6.0, 11.0, -6.0); + std::cout << "The roots of x³ - 6x² + 11x - 6 are " << print_roots(roots) << ".\n"; + + // Double root; YMMV: + // (x+1)²(x-2) = x³ - 3x - 2: + roots = cubic_roots(1.0, 0.0, -3.0, -2.0); + std::cout << "The roots of x³ - 3x - 2 are " << print_roots(roots) << ".\n"; + + // Single root: (x-i)(x+i)(x-3) = x³ - 3x² + x - 3: + roots = cubic_roots(1.0, -3.0, 1.0, -3.0); + std::cout << "The real roots of x³ - 3x² + x - 3 are " << print_roots(roots) << ".\n"; + + // I don't know the roots of x³ + 6.28x² + 2.3x + 3.6; + // how can I see if they've been computed correctly? + roots = cubic_roots(1.0, 6.28, 2.3, 3.6); + std::cout << "The real root of x³ +6.28x² + 2.3x + 3.6 is " << roots[0] << ".\n"; + auto res = cubic_root_residual(1.0, 6.28, 2.3, 3.6, roots[0]); + std::cout << "The residual is " << res[0] << ", and the expected residual is " << res[1] << ". "; + if (abs(res[0]) <= res[1]) { + std::cout << "This is an expected accuracy.\n"; + } else { + std::cerr << "The residual is unexpectedly large.\n"; + } +} +``` + +This prints: + +``` +The roots of x³ - 6x² + 11x - 6 are {1, 2, 3}. +The roots of x³ - 3x - 2 are {-1, -1, 2}. +The real roots of x³ - 3x² + x - 3 are {3, nan, nan}. +The real root of x³ +6.28x² + 2.3x + 3.6 is -5.99656. +The residual is -1.56586e-14, and the expected residual is 4.64155e-14. This is an expected accuracy. +``` + +[heading Performance and Accuracy] + +On an Intel laptop chip running at 2700 MHz, we observe 3 roots taking ~90ns to compute. +If the polynomial only possesses a single real root, it takes ~50ns. +See `reporting/performance/cubic_roots_performance.cpp`. + +The forward error cannot be effectively bounded. +However, for an approximate root x returned by this routine, the residuals should be constrained by |p(x)| ≤ ε|xṗ(x)|. + + +[endsect] +[/section:cubic_roots] diff --git a/doc/roots/roots_overview.qbk b/doc/roots/roots_overview.qbk index 5f1490d668..770432af5d 100644 --- a/doc/roots/roots_overview.qbk +++ b/doc/roots/roots_overview.qbk @@ -17,6 +17,7 @@ There are several fully-worked __root_finding_examples, including: [include roots_without_derivatives.qbk] [include roots.qbk] +[include cubic_roots.qbk] [include root_finding_examples.qbk] [include minima.qbk] [include root_comparison.qbk] diff --git a/include/boost/math/tools/cubic_roots.hpp b/include/boost/math/tools/cubic_roots.hpp index 2736fc4ba1..e3070dbebf 100644 --- a/include/boost/math/tools/cubic_roots.hpp +++ b/include/boost/math/tools/cubic_roots.hpp @@ -1,4 +1,4 @@ -// (C) Copyright Nick Thompson 2019. +// (C) Copyright Nick Thompson 2021. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -30,7 +30,7 @@ std::array cubic_roots(Real a, Real b, Real c, Real d) { std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}; if (a == 0) { - // bx^2 + cx + d = 0: + // bx² + cx + d = 0: if (b == 0) { // cx + d = 0: if (c == 0) { @@ -65,7 +65,7 @@ std::array cubic_roots(Real a, Real b, Real c, Real d) { Real Q = (p*p - 3*q)/9; Real R = (2*p*p*p - 9*p*q + 27*r)/54; if (R*R < Q*Q*Q) { - //std::cout << "In the R^2 < Q^3 branch.\n"; + //std::cout << "In the R² < Q³ branch.\n"; Real rtQ = sqrt(Q); Real theta = acos(R/(Q*rtQ))/3; Real st = sin(theta); @@ -73,45 +73,33 @@ std::array cubic_roots(Real a, Real b, Real c, Real d) { roots[0] = -2*rtQ*ct - p/3; roots[1] = -rtQ*(-ct + sqrt(Real(3))*st) - p/3; roots[2] = rtQ*(ct + sqrt(Real(3))*st) - p/3; - // This formula is not super accurate. - // Do a cleanup iteration. - for (auto & r : roots) { - // Horner's method. - // Here I'll take John Gustaffson's opinion that the fma is a *distinct* operation from a*x +b: - // Make sure to compile these fmas into a single instruction! - Real f = fma(a, r, b); - f = fma(f,r,c); - f = fma(f,r,d); - Real df = fma(3*a, r, 2*b); - df = fma(df, r, c); - if (df != 0) { - // No standard library feature for fused-divide add! - r -= f/df; - } + } else { + // In Numerical Recipes, Chapter 5, Section 6, it is claimed that we only have one real root + // if R² >= Q³. But this isn't true; we can even see this from equation 5.6.18. + // The condition for having three real roots is that A = B. + // It *is* the case that if we're in this branch, and we have 3 real roots, two are a double root. + // Take (x+1)²(x-2) = x³ - 3x -2 as an example. This clearly has a double root at x = -1, + // and it gets sent into this branch. + Real arg = R*R - Q*Q*Q; + Real A = -detail::sgn(R)*cbrt(abs(R) + sqrt(arg)); + Real B = 0; + if (A != 0) { + B = Q/A; + } + roots[0] = A + B - p/3; + // Yes, we're comparing floats for equality: + // Any perturbation pushes the roots into the complex plane; out of the bailiwick of this routine. + if (A == B || arg == 0) { + roots[1] = -A - p/3; + roots[2] = -A - p/3; } - std::sort(roots.begin(), roots.end()); - return roots; - } - // In Numerical Recipes, Chapter 5, Section 6, it is claimed that we only have one real root - // if R^2 >= Q^3. But this isn't true; we can even see this from equation 5.6.18. - // The condition for having three real roots is that A = B. - // It *is* the case that if we're in this branch, and we have 3 real roots, two are a double root. - // Take (x+1)^2(x-2) = x^3 - 3x -2 as an example. This clearly has a double root at x = -1, - // and it gets sent into this branch. - Real arg = R*R - Q*Q*Q; - Real A = -detail::sgn(R)*cbrt(abs(R) + sqrt(arg)); - Real B = 0; - if (A != 0) { - B = Q/A; - } - roots[0] = A + B - p/3; - // Yes, we're comparing floats for equality: - // Any perturbation pushes the roots into the complex plane; out of the bailiwick of this routine. - if (A == B || arg == 0) { - roots[1] = -A - p/3; - roots[2] = -A - p/3; } + // Root polishing: for (auto & r : roots) { + // Horner's method. + // Here I'll take John Gustaffson's opinion that the fma is a *distinct* operation from a*x +b: + // Make sure to compile these fmas into a single instruction and not a function call! + // (I'm looking at you Windows.) Real f = fma(a, r, b); f = fma(f,r,c); f = fma(f,r,d); @@ -126,5 +114,25 @@ std::array cubic_roots(Real a, Real b, Real c, Real d) { return roots; } +// Computes the empirical residual p(r) (first element) and expected residual ε|rṗ(r)| (second element) for a root. +// Recall that for a numerically computed root r of a function p, |p(r)| ≤ ε|rṗ(r)|. +template +std::array cubic_root_residual(Real a, Real b, Real c, Real d, Real root) { + using std::fma; + using std::abs; + std::array out; + Real residual = fma(a, root, b); + residual = fma(residual,root,c); + residual = fma(residual,root,d); + + out[0] = residual; + + Real expected_residual = fma(3*a, root, 2*b); + expected_residual = fma(expected_residual, root, c); + expected_residual = abs(root*expected_residual)*std::numeric_limits::epsilon(); + out[1] = expected_residual; + return out; +} + } #endif diff --git a/reporting/performance/cubic_roots_performance.cpp b/reporting/performance/cubic_roots_performance.cpp index 5eb9a993f6..3550677dda 100644 --- a/reporting/performance/cubic_roots_performance.cpp +++ b/reporting/performance/cubic_roots_performance.cpp @@ -16,8 +16,9 @@ template void CubicRoots(benchmark::State& state) { std::random_device rd; - //auto seed = rd(); - uint32_t seed = 416683252; + auto seed = rd(); + // This seed generates 3 real roots: + //uint32_t seed = 416683252; std::mt19937_64 mt(seed); std::uniform_real_distribution unif(-10, 10); @@ -30,12 +31,10 @@ void CubicRoots(benchmark::State& state) auto roots = cubic_roots(a,b,c,d); benchmark::DoNotOptimize(roots[0]); } - std::cout << "Just solved " << a << "x^3 + " << b << "x^2 + " << c << "x + " << d << "\n"; - std::cout << "This was generated by seed " << seed << "\n"; } -//BENCHMARK_TEMPLATE(CubicRoots, float); +BENCHMARK_TEMPLATE(CubicRoots, float); BENCHMARK_TEMPLATE(CubicRoots, double); -//BENCHMARK_TEMPLATE(CubicRoots, long double); +BENCHMARK_TEMPLATE(CubicRoots, long double); BENCHMARK_MAIN(); diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index dc6682abaf..bc1859fbbb 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -990,6 +990,7 @@ test-suite misc : [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run anderson_darling_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run ljung_box_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run cubic_roots_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run test_t_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] [ run test_z_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : BOOST_MATH_TEST_FLOAT128 "-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] [ run bivariate_statistics_test.cpp : : : [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ] diff --git a/test/cubic_roots_test.cpp b/test/cubic_roots_test.cpp index b87dbeb3d8..48e0d4d2c7 100644 --- a/test/cubic_roots_test.cpp +++ b/test/cubic_roots_test.cpp @@ -98,13 +98,13 @@ void test_zero_coefficients() auto roots = cubic_roots(a, b, c, d); // I could check the condition number here, but this is fine right? - if(!CHECK_ULP_CLOSE(r[0], roots[0], 3)) { + if(!CHECK_ULP_CLOSE(r[0], roots[0], 25)) { std::cerr << " Polynomial x^3 + " << b << "x^2 + " << c << "x + " << d << " has roots {"; std::cerr << r[0] << ", " << r[1] << ", " << r[2] << "}, but the computed roots are {"; std::cerr << roots[0] << ", " << roots[1] << ", " << roots[2] << "}\n"; } - CHECK_ULP_CLOSE(r[1], roots[1], 3); - CHECK_ULP_CLOSE(r[2], roots[2], 3); + CHECK_ULP_CLOSE(r[1], roots[1], 25); + CHECK_ULP_CLOSE(r[2], roots[2], 25); } }