Skip to content

Commit

Permalink
GH-100425: Improve accuracy of builtin sum() for float inputs (GH-100426
Browse files Browse the repository at this point in the history
)
  • Loading branch information
rhettinger authored Dec 23, 2022
1 parent 1ecfd1e commit 5d84966
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 8 deletions.
4 changes: 4 additions & 0 deletions Doc/library/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1733,6 +1733,10 @@ are always available. They are listed here in alphabetical order.
.. versionchanged:: 3.8
The *start* parameter can be specified as a keyword argument.

.. versionchanged:: 3.12 Summation of floats switched to an algorithm
that gives higher accuracy on most builds.


.. class:: super()
super(type, object_or_type=None)

Expand Down
7 changes: 1 addition & 6 deletions Doc/library/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,7 @@ Number-theoretic and representation functions
.. function:: fsum(iterable)

Return an accurate floating point sum of values in the iterable. Avoids
loss of precision by tracking multiple intermediate partial sums:

>>> sum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1])
0.9999999999999999
>>> fsum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1])
1.0
loss of precision by tracking multiple intermediate partial sums.

The algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the
typical case where the rounding mode is half-even. On some non-Windows
Expand Down
2 changes: 1 addition & 1 deletion Doc/tutorial/floatingpoint.rst
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ added onto a running total. That can make a difference in overall accuracy
so that the errors do not accumulate to the point where they affect the
final total:

>>> sum([0.1] * 10) == 1.0
>>> 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 == 1.0
False
>>> math.fsum([0.1] * 10) == 1.0
True
Expand Down
18 changes: 18 additions & 0 deletions Lib/test/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import gc
import io
import locale
import math
import os
import pickle
import platform
Expand All @@ -31,13 +32,20 @@
from test.support.os_helper import (EnvironmentVarGuard, TESTFN, unlink)
from test.support.script_helper import assert_python_ok
from test.support.warnings_helper import check_warnings
from test.support import requires_IEEE_754
from unittest.mock import MagicMock, patch
try:
import pty, signal
except ImportError:
pty = signal = None


# Detect evidence of double-rounding: sum() does not always
# get improved accuracy on machines that suffer from double rounding.
x, y = 1e16, 2.9999 # use temporary values to defeat peephole optimizer
HAVE_DOUBLE_ROUNDING = (x + y == 1e16 + 4)


class Squares:

def __init__(self, max):
Expand Down Expand Up @@ -1617,6 +1625,8 @@ def test_sum(self):
self.assertEqual(repr(sum([-0.0])), '0.0')
self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0')
self.assertEqual(repr(sum([], -0.0)), '-0.0')
self.assertTrue(math.isinf(sum([float("inf"), float("inf")])))
self.assertTrue(math.isinf(sum([1e308, 1e308])))

self.assertRaises(TypeError, sum)
self.assertRaises(TypeError, sum, 42)
Expand All @@ -1641,6 +1651,14 @@ def __getitem__(self, index):
sum(([x] for x in range(10)), empty)
self.assertEqual(empty, [])

@requires_IEEE_754
@unittest.skipIf(HAVE_DOUBLE_ROUNDING,
"sum accuracy not guaranteed on machines with double rounding")
@support.cpython_only # Other implementations may choose a different algorithm
def test_sum_accuracy(self):
self.assertEqual(sum([0.1] * 10), 1.0)
self.assertEqual(sum([1.0, 10E100, 1.0, -10E100]), 2.0)

def test_type(self):
self.assertEqual(type(''), type('123'))
self.assertNotEqual(type(''), type(()))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve the accuracy of ``sum()`` with compensated summation.
21 changes: 20 additions & 1 deletion Python/bltinmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2532,17 +2532,33 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start)

if (PyFloat_CheckExact(result)) {
double f_result = PyFloat_AS_DOUBLE(result);
double c = 0.0;
Py_SETREF(result, NULL);
while(result == NULL) {
item = PyIter_Next(iter);
if (item == NULL) {
Py_DECREF(iter);
if (PyErr_Occurred())
return NULL;
/* Avoid losing the sign on a negative result,
and don't let adding the compensation convert
an infinite or overflowed sum to a NaN. */
if (c && Py_IS_FINITE(c)) {
f_result += c;
}
return PyFloat_FromDouble(f_result);
}
if (PyFloat_CheckExact(item)) {
f_result += PyFloat_AS_DOUBLE(item);
// Improved Kahan–Babuška algorithm by Arnold Neumaier
// https://www.mat.univie.ac.at/~neum/scan/01.pdf
double x = PyFloat_AS_DOUBLE(item);
double t = f_result + x;
if (fabs(f_result) >= fabs(x)) {
c += (f_result - t) + x;
} else {
c += (x - t) + f_result;
}
f_result = t;
_Py_DECREF_SPECIALIZED(item, _PyFloat_ExactDealloc);
continue;
}
Expand All @@ -2556,6 +2572,9 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start)
continue;
}
}
if (c && Py_IS_FINITE(c)) {
f_result += c;
}
result = PyFloat_FromDouble(f_result);
if (result == NULL) {
Py_DECREF(item);
Expand Down

0 comments on commit 5d84966

Please sign in to comment.