From be4acc7040c6a94f23d78630504699acdfa43830 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 4 Dec 2023 21:20:41 +0100 Subject: [PATCH] [3.11] gh-105967: Work around a macOS bug, limit zlib C library crc32 API calls to 1gig (GH-112615) (#112725) gh-105967: Work around a macOS bug, limit zlib C library crc32 API calls to 1gig (GH-112615) Work around a macOS bug, limit zlib crc32 calls to 1GiB. Without this, `zlib.crc32` and `binascii.crc32` could produce incorrect results on multi-gigabyte inputs depending on the macOS version's Apple supplied zlib implementation. (cherry picked from commit 4eddb4c9d9452482c9af7fa9eec223d12b5a9f33) Co-authored-by: Gregory P. Smith --- ...3-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst | 4 ++++ Modules/binascii.c | 18 +++++++++++++----- Modules/zlibmodule.c | 18 +++++++++++++----- 3 files changed, 30 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst new file mode 100644 index 00000000000000..c69511218e3e16 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-01-19-02-21.gh-issue-105967.Puq5Cn.rst @@ -0,0 +1,4 @@ +Workaround a bug in Apple's macOS platform zlib library where +:func:`zlib.crc32` and :func:`binascii.crc32` could produce incorrect results +on multi-gigabyte inputs. Including when using :mod:`zipfile` on zips +containing large data. diff --git a/Modules/binascii.c b/Modules/binascii.c index afe49885491714..de3c2d889593fb 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -780,12 +780,20 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) Py_BEGIN_ALLOW_THREADS /* Avoid truncation of length for very large buffers. crc32() takes - length as an unsigned int, which may be narrower than Py_ssize_t. */ - while ((size_t)len > UINT_MAX) { - crc = crc32(crc, buf, UINT_MAX); - buf += (size_t) UINT_MAX; - len -= (size_t) UINT_MAX; + length as an unsigned int, which may be narrower than Py_ssize_t. + We further limit size due to bugs in Apple's macOS zlib. + See https://github.com/python/cpython/issues/105967 + */ +#define ZLIB_CRC_CHUNK_SIZE 0x40000000 +#if ZLIB_CRC_CHUNK_SIZE > INT_MAX +# error "unsupported less than 32-bit platform?" +#endif + while ((size_t)len > ZLIB_CRC_CHUNK_SIZE) { + crc = crc32(crc, buf, ZLIB_CRC_CHUNK_SIZE); + buf += (size_t) ZLIB_CRC_CHUNK_SIZE; + len -= (size_t) ZLIB_CRC_CHUNK_SIZE; } +#undef ZLIB_CRC_CHUNK_SIZE crc = crc32(crc, buf, (unsigned int)len); Py_END_ALLOW_THREADS } else { diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 2fc39a3bd56201..f11210d0e78fc4 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1444,12 +1444,20 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) Py_BEGIN_ALLOW_THREADS /* Avoid truncation of length for very large buffers. crc32() takes - length as an unsigned int, which may be narrower than Py_ssize_t. */ - while ((size_t)len > UINT_MAX) { - value = crc32(value, buf, UINT_MAX); - buf += (size_t) UINT_MAX; - len -= (size_t) UINT_MAX; + length as an unsigned int, which may be narrower than Py_ssize_t. + We further limit size due to bugs in Apple's macOS zlib. + See https://github.com/python/cpython/issues/105967. + */ +#define ZLIB_CRC_CHUNK_SIZE 0x40000000 +#if ZLIB_CRC_CHUNK_SIZE > INT_MAX +# error "unsupported less than 32-bit platform?" +#endif + while ((size_t)len > ZLIB_CRC_CHUNK_SIZE) { + value = crc32(value, buf, ZLIB_CRC_CHUNK_SIZE); + buf += (size_t) ZLIB_CRC_CHUNK_SIZE; + len -= (size_t) ZLIB_CRC_CHUNK_SIZE; } +#undef ZLIB_CRC_CHUNK_SIZE value = crc32(value, buf, (unsigned int)len); Py_END_ALLOW_THREADS } else {