diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst new file mode 100644 index 00000000000000..c69511218e3e16 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst @@ -0,0 +1,4 @@ +Workaround a bug in Apple's macOS platform zlib library where +:func:`zlib.crc32` and :func:`binascii.crc32` could produce incorrect results +on multi-gigabyte inputs. Including when using :mod:`zipfile` on zips +containing large data. diff --git a/Modules/binascii.c b/Modules/binascii.c index 4ecff4793be9a0..0614edf4bc03aa 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -780,12 +780,20 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) Py_BEGIN_ALLOW_THREADS /* Avoid truncation of length for very large buffers. crc32() takes - length as an unsigned int, which may be narrower than Py_ssize_t. */ - while ((size_t)len > UINT_MAX) { - crc = crc32(crc, buf, UINT_MAX); - buf += (size_t) UINT_MAX; - len -= (size_t) UINT_MAX; + length as an unsigned int, which may be narrower than Py_ssize_t. + We further limit size due to bugs in Apple's macOS zlib. + See https://github.com/python/cpython/issues/105967 + */ +#define ZLIB_CRC_CHUNK_SIZE 0x40000000 +#if ZLIB_CRC_CHUNK_SIZE > INT_MAX +# error "unsupported less than 32-bit platform?" +#endif + while ((size_t)len > ZLIB_CRC_CHUNK_SIZE) { + crc = crc32(crc, buf, ZLIB_CRC_CHUNK_SIZE); + buf += (size_t) ZLIB_CRC_CHUNK_SIZE; + len -= (size_t) ZLIB_CRC_CHUNK_SIZE; } +#undef ZLIB_CRC_CHUNK_SIZE crc = crc32(crc, buf, (unsigned int)len); Py_END_ALLOW_THREADS } else { diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index b67844a67c315c..f94c57e4c89c4f 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1890,12 +1890,20 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) Py_BEGIN_ALLOW_THREADS /* Avoid truncation of length for very large buffers. crc32() takes - length as an unsigned int, which may be narrower than Py_ssize_t. */ - while ((size_t)len > UINT_MAX) { - value = crc32(value, buf, UINT_MAX); - buf += (size_t) UINT_MAX; - len -= (size_t) UINT_MAX; + length as an unsigned int, which may be narrower than Py_ssize_t. + We further limit size due to bugs in Apple's macOS zlib. + See https://github.com/python/cpython/issues/105967. + */ +#define ZLIB_CRC_CHUNK_SIZE 0x40000000 +#if ZLIB_CRC_CHUNK_SIZE > INT_MAX +# error "unsupported less than 32-bit platform?" +#endif + while ((size_t)len > ZLIB_CRC_CHUNK_SIZE) { + value = crc32(value, buf, ZLIB_CRC_CHUNK_SIZE); + buf += (size_t) ZLIB_CRC_CHUNK_SIZE; + len -= (size_t) ZLIB_CRC_CHUNK_SIZE; } +#undef ZLIB_CRC_CHUNK_SIZE value = crc32(value, buf, (unsigned int)len); Py_END_ALLOW_THREADS } else {