boostorg · mborland · Aug 19, 2024 · Aug 19, 2024 · Aug 19, 2024 · Aug 19, 2024
diff --git a/doc/math.qbk b/doc/math.qbk
@@ -313,6 +313,7 @@ and use the function's name as the link text.]
 [def __sqrt1pm1 [link math_toolkit.powers.sqrt1pm1 sqrt1pm1]]
 [def __rsqrt [link math_toolkit.powers.rsqrt rsqrt]]
 [def __powm1 [link math_toolkit.powers.powm1 powm1]]
+[def __pow1p [link math_toolkit.powers.pow1p pow1p]]
 [def __hypot [link math_toolkit.powers.hypot hypot]]
 [def __pow [link math_toolkit.powers.ct_pow pow]]
 

diff --git a/doc/sf/pow1p.qbk b/doc/sf/pow1p.qbk
@@ -0,0 +1,34 @@
+[/
+  Copyright Matt Borland 2024
+  Distributed under the Boost Software License, Version 1.0.
+  (See accompanying file LICENSE_1_0.txt or copy at
+  http://www.boost.org/LICENSE_1_0.txt).
+]
+
+[section:pow1p pow1p]
+
+[h4 Synopsis]
+
+    #include <boost/math/special_functions/pow1p.hpp>
+
+    namespace boost {
+    namespace math {
+
+    template <typename T1, typename T2, typename Policy>
+    BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
+    pow1p(const T1 x, const T2 y, const Policy& pol)
+
+    template <typename T1, typename T2>
+    BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
+    pow1p(const T1 x, const T2 y)
+
+    }} // namespaces
+
+
+The function `pow1p` computes (1 + x)[super y ] where x and y are real numbers. 
+This function is particularly useful for scenarios where adding 1 to x before raising it to the power of y is required. 
+It provides a more numerically stable and efficient way to perform this computation, especially for small values of x.
+When x is very small, directly computing (1 + x)[super y ] using standard arithmetic operations can lead to a loss of precision due to floating-point arithmetic limitations. 
+The pow1p function helps mitigate this issue by internally handling such cases more accurately.
+
+[endsect]
diff --git a/include/boost/math/special_functions.hpp b/include/boost/math/special_functions.hpp
@@ -79,6 +79,7 @@
 #include <boost/math/special_functions/lambert_w.hpp>
 #include <boost/math/special_functions/gegenbauer.hpp>
 #include <boost/math/special_functions/jacobi.hpp>
+#include <boost/math/special_functions/pow1p.hpp>
 #ifndef BOOST_MATH_NO_EXCEPTIONS
 #include <boost/math/special_functions/legendre_stieltjes.hpp>
 #endif

diff --git a/include/boost/math/special_functions/pow1p.hpp b/include/boost/math/special_functions/pow1p.hpp
@@ -0,0 +1,269 @@
+//  (C) Copyright Matt Borland 2024.
+//  (C) Copyrigh Fancidev 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_SF_POW1P_HPP
+#define BOOST_MATH_SF_POW1P_HPP
+
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/promotion.hpp>
+#include <boost/math/tools/is_detected.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+// For cuda we would rather use builtin nextafter than unsupported boost::math::nextafter
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#  include <boost/math/special_functions/next.hpp>
+#  include <boost/math/special_functions/math_fwd.hpp>
+#  include <utility>
+#endif // BOOST_MATH_ENABLE_CUDA
+
+namespace boost {
+namespace math {
+
+namespace detail {
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
+template <typename T>
+using fma_t = decltype(fma(std::declval<T>(), std::declval<T>(), std::declval<T>()));
+
+template <typename T>
+constexpr bool is_fma_detected_v = boost::math::tools::is_detected<fma_t, T>::value;
+
+template <typename T, boost::math::enable_if_t<is_fma_detected_v<T>, bool> = true>
+BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED T local_fma(const T x, const T y, const T z)
+{
+    using std::fma;
+    return fma(x, y, z);
+}
+
+template <typename T, boost::math::enable_if_t<!is_fma_detected_v<T>, bool> = true>
+BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED T local_fma(const T x, const T y, const T z)
+{
+    return x * y + z;
+}
+
+#else
+
+template <typename T>
+BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED T local_fma(const T x, const T y, const T z)
+{
+    return ::fma(x, y, z);
+}
+
+template <>
+BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED float local_fma(const float x, const float y, const float z)
+{
+    return ::fmaf(x, y, z);
+}
+
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED T pow1p_imp(const T x, const T y, const Policy& pol)
+{
+    BOOST_MATH_STD_USING
+    constexpr auto function = "boost::math::pow1p<%1%>(%1%, %1%)";
+
+    // The special values follow the spec of the pow() function defined in
+    // IEEE-754, Section 9.2.1.  The order of the `if` statements matters.
+    if (abs(y) == T(0)) 
+    { 
+        // pow(x, +/-0)
+        return T(1);
+    }
+
+    if (x == T(-1))
+    { 
+        // pow(+/-0, y)
+        if ((boost::math::isfinite)(y) && y < 0) 
+        {
+            return boost::math::policies::raise_domain_error<T>(function, "Division by 0", x, pol);
+        }
+
+        // Gets correct special handling
+        return pow(T(0), y);
+    }
+
+    if (x == T(-2) && (boost::math::isinf)(y)) 
+    { 
+        // pow(-1, +/-inf)
+        return T(1);
+    }
+
+    if (x == T(0))
+    { 
+        // pow(+1, y)
+        return T(1);
+    }
+
+    if ((boost::math::isinf)(y))
+    {
+        if ((boost::math::signbit)(y) == 1)
+        {
+            // pow(x, -inf)
+            return T(0);
+        }
+        else
+        {
+            // pow(x, +inf)
+            return y;
+        }
+    }
+
+    if ((boost::math::isinf)(x)) 
+    { 
+        // pow(+/-inf, y)
+        return pow(x, y);
+    }
+
+    // Up to this point, (1+x) = {+/-0, +/-1, +/-inf} have been handled, and
+    // and y = {+/-0, +/-inf} have been handled.  Next, we handle `nan` and
+    // y = {+/-1}.
+    if ((boost::math::isnan)(x)) 
+    {
+        return x;
+    }
+    else if ((boost::math::isnan)(y))
+    {
+        return y;
+    }
+
+    if (y == T(1)) 
+    {
+        return T(1) + x;
+    }
+    if (y == T(-1)) 
+    {
+        return T(1) / (T(1) + x); // guaranteed no overflow
+    }
+
+    // Handle (1+x) < 0
+    if (x < -1) 
+    {
+        // TODO(fancidev): maybe use (1+x)^y == [(1+x)^2]^(y/2)
+        return pow(1+x, y);
+    }
+
+    // Now x, y are finite and not equal to 0 or +/-1, and x > -1.
+    // To compute (1+x)^y, write (1+x) == (s+t) where s is equal to (1+x)
+    // rounded toward 1, and t is the (exact) rounding error.
+    T s, t;
+    if (x < 0) 
+    {
+        s = T(1) + x;
+        t = x - (s - T(1));
+        if (t > 0) 
+        {
+            #ifdef BOOST_MATH_HAS_GPU_SUPPORT
+            s = ::nextafter(s, T(1));
+            #else
+            s = boost::math::nextafter(s, T(1));
+            #endif
+
+            t = x - (s - T(1));
+        }
+    } 
+    else if (x < 1) 
+    {
+        s = T(1) + x;
+        t = x - (s - T(1));
+        if (t < 0) 
+        {
+            #ifdef BOOST_MATH_HAS_GPU_SUPPORT
+            s = ::nextafter(s, T(0));
+            #else
+            s = boost::math::nextafter(s, T(0));
+            #endif
+
+            t = x - (s - T(1));
+        }
+    } 
+    else
+    {
+        s = x + T(1);
+        t = T(1) - (s - x);
+        if (t < 0) 
+        {
+            #ifdef BOOST_MATH_HAS_GPU_SUPPORT
+            s = ::nextafter(s, T(0));
+            #else
+            s = boost::math::nextafter(s, T(0));
+            #endif
+
+            t = T(1) - (s - x);
+        }
+    }
+
+    // Because x > -1 and s is rounded toward 1, s is guaranteed to be > 0.
+    // Write (1+x)^y == (s+t)^y == (s^y)*((1+t/s)^y) == term1*term2.
+    // It can be shown that either both terms <= 1 or both >= 1; so
+    // if the first term over/underflows, then the result over/underflows.
+    // And of course, term2 == 1 if t == 0.
+    T term1 = pow(s, y);
+    if (t == T(0) || term1 == T(0) || (boost::math::isinf)(term1))
+    {
+        return term1;
+    }
+
+    // (1+t/s)^y == exp(y*log(1+t/s)).  The relative error of the result is
+    // equal to the absolute error of the exponent (plus the relative error
+    // of 'exp').  Therefore, when the exponent is small, it is accurately
+    // evaluated to machine epsilon using T arithmetic.  In addition, since
+    // t/s <= epsilon, log(1+t/s) is well approximated by t/s to first order.
+    T u = t / s;
+    T w = y * u;
+    if (abs(w) <= T(0.5)) 
+    {
+        T term2 = exp(w);
+        return term1 * term2;
+    }
+
+    // Now y*log(1+t/s) is large, and its relative error is "magnified" by
+    // the exponent.  To reduce the error, we use double-T arithmetic, and
+    // expand log(1+t/s) to second order.
+
+    // (u + uu) ~= t/s.
+    T r1 = local_fma(-u, s, t);
+    T uu = r1 / s;
+
+    // (u + vv) ~= log(1+(u+uu)) ~= log(1+t/s).
+    T vv = local_fma(T(-0.5)*u, u, uu);
+
+    // (w + ww) ~= y*(u+vv) ~= y*log(1+t/s).
+    T r2 = local_fma(y, u, -w);
+    T ww = local_fma(y, vv, r2);
+
+    // TODO: maybe ww is small enough such that exp(ww) ~= 1+ww.
+    T term2 = exp(w) * exp(ww);
+    return term1 * term2;
+}
+
+} // Namespace detail
+
+template <typename T1, typename T2, typename Policy>
+BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
+pow1p(const T1 x, const T2 y, const Policy& pol)
+{
+    using result_type = tools::promote_args_t<T1, T2>;
+    return detail::pow1p_imp(static_cast<result_type>(x), static_cast<result_type>(y), pol);
+}
+
+template <typename T1, typename T2>
+BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
+pow1p(const T1 x, const T2 y)
+{
+    using result_type = tools::promote_args_t<T1, T2>;
+    return detail::pow1p_imp(static_cast<result_type>(x), static_cast<result_type>(y), policies::policy<>());
+}
+
+} // Namespace math
+} // Namespace boost
+
+#endif // BOOST_MATH_SF_POW1P_HPP
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
@@ -128,6 +128,7 @@ test-suite special_fun :
 
    [ run hypot_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ]
    [ run pow_test.cpp ../../test/build//boost_unit_test_framework ]
+   [ run test_pow1p.cpp ]
    [ run logaddexp_test.cpp ../../test/build//boost_unit_test_framework ]
    [ run logsumexp_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_variadic_templates ] ]
    [ run ccmath_sqrt_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
@@ -171,3 +171,5 @@ run test_trigamma_float.cu ;
 
 run test_trunc_double.cu ;
 run test_trunc_float.cu ;
+run test_pow1p_double.cu ;
+run test_pow1p_float.cu ;
diff --git a/test/nvrtc_jamfile b/test/nvrtc_jamfile
@@ -168,3 +168,4 @@ run test_trigamma_nvrtc_double.cpp ;
 run test_trigamma_nvrtc_float.cpp ;
 
 run test_trunc_nvrtc_double.cpp ;
+run test_pow1p_nvrtc_double.cpp ;
diff --git a/test/sycl_jamfile b/test/sycl_jamfile
@@ -38,3 +38,4 @@ run test_digamma_simple.cpp ;
 run test_trigamma.cpp ;
 run test_erf.cpp ;
 run test_gamma.cpp ;
+run test_pow1p.cpp ;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -168,3 +168,4 @@ run test_trigamma_nvrtc_double.cpp ;
		run test_trigamma_nvrtc_float.cpp ;

		run test_trunc_nvrtc_double.cpp ;
		run test_pow1p_nvrtc_double.cpp ;