Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-43574: Dont overallocate list literals #24954

58 changes: 58 additions & 0 deletions Lib/test/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from test import list_tests
from test.support import cpython_only
import pickle
import struct
import unittest

class ListTest(list_tests.CommonTest):
Expand Down Expand Up @@ -196,6 +197,63 @@ def test_preallocation(self):
self.assertEqual(iter_size, sys.getsizeof(list([0] * 10)))
self.assertEqual(iter_size, sys.getsizeof(list(range(10))))

@cpython_only
def test_overallocation(self):
# bpo-33234: Don't overallocate when initialized from known lengths
# bpo-38373: Allows list over-allocation to be zero for some lengths
# bpo-43574: Don't overallocate for list-literals
sizeof = sys.getsizeof

# First handle empty list and empty list-literal cases. Should have no
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make this set of initial assertions into its own test method.

# overallocation, including init from iterable of unknown length.
self.assertEqual(sizeof([]), sizeof(list()))
self.assertEqual(sizeof([]), sizeof(list(tuple())))
self.assertEqual(sizeof([]), sizeof(list(x for x in [])))

# Must use actual list-literals to test the overallocation behavior of
# compiled list-literals as well as those initialized from them.
test_literals = [
[1],
[1,2],
[1,2,3], # Literals of length > 2 are special-cased in compile
[1,2,3,4],
[1,2,3,4,5,6,7],
[1,2,3,4,5,6,7,8], # bpo-38373: Length 8 init won't over-alloc
[1,2,3,4,5,6,7,8,9],
]

overalloc_amts = []
for literal in test_literals:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no action required: meta: this is where I really wish we had an absltest.paramtereize or pytest.mark.parameterize available in the stdlib. it'd run all of these as unique tests with appropriate names instead of a loop that prevents others from running upon first failure.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think unittest.subTest() displays parameters on the failing subtests and keeps running through each even on failure.

# Direct check that list-literals do not over-allocate, by
# calculating the total size of used pointers.
total_ptr_size = len(literal) * struct.calcsize('P')
self.assertEqual(sizeof(literal), sizeof([]) + total_ptr_size)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a msg=repr(literal) arg to all of the asserts in this loop to aid debugging directly from a test failure log.


# Ensure that both list literals, and lists made from an iterable
# of known size, use the same amount of allocation.
self.assertEqual(sizeof(literal), sizeof(list(literal)))
self.assertEqual(sizeof(literal), sizeof(list(tuple(literal))))

# By contrast, confirm that non-empty lists initialized from an
# iterable where the length is unknown at the time of
# initialization, can be overallocated.
iterated_list = list(x for x in literal)
overalloc_amts.append(sizeof(iterated_list) - sizeof(literal))
self.assertGreaterEqual(sizeof(iterated_list), sizeof(literal))

# bpo-38373: initialized or grown lists are not always over-allocated.
# Confirm that over-allocation occurs at least some of the time.
self.assertEqual(True, any(x>0 for x in overalloc_amts))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add msg=repr(overalloc_amts) to aid debugging if this fails.


# Empty lists should overallocate on initial append/insert (unlike
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turn this bit into an additional test method.

# list-literals)
l1 = []
l1.append(1)
self.assertGreater(sizeof(l1), sizeof([1]))
l2 = []
l2.insert(0, 1)
self.assertGreater(sizeof(l2), sizeof([1]))

def test_count_index_remove_crashes(self):
# bpo-38610: The count(), index(), and remove() methods were not
# holding strong references to list elements while calling
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
``list`` objects don't overallocate when starting empty and then extended, or
when set to be empty. This effectively restores previous ``list`` memory
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quantify what "previous" means. 3.8 and earlier I believe.

behavior for lists initialized from literals.
44 changes: 26 additions & 18 deletions Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,25 +58,33 @@ list_resize(PyListObject *self, Py_ssize_t newsize)
return 0;
}

/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* Add padding to make the allocated size multiple of 4.
* The growth pattern is: 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, ...
* Note: new_allocated won't overflow because the largest possible value
* is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
*/
new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3;
/* Do not overallocate if the new size is closer to overallocated size
* than to the old size.
*/
if (newsize - Py_SIZE(self) > (Py_ssize_t)(new_allocated - newsize))
new_allocated = ((size_t)newsize + 3) & ~(size_t)3;
if (newsize == 0 || (Py_SIZE(self) == 0 && newsize > 1)) {
/* Don't overallocate empty lists that are extended by more than 1
* element. This helps ensure that list-literals aren't
* over-allocated, but still allows it for empty-list append/insert.
*/
new_allocated = newsize;
}
else {
/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* Add padding to make the allocated size multiple of 4.
* The growth pattern is: 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, ...
* Note: new_allocated won't overflow because the largest possible value
* is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
*/
new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3;
/* Do not overallocate if the new size is closer to overallocated size
* than to the old size.
*/
if (newsize - Py_SIZE(self) > (Py_ssize_t)(new_allocated - newsize)) {
new_allocated = ((size_t)newsize + 3) & ~(size_t)3;
}
}

if (newsize == 0)
new_allocated = 0;
num_allocated_bytes = new_allocated * sizeof(PyObject *);
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
if (items == NULL) {
Expand Down