From c66b577d9f7a11ffab57985fd6fb22e9dfd4f245 Mon Sep 17 00:00:00 2001 From: Jeffrey Kintscher <49998481+websurfer5@users.noreply.github.com> Date: Wed, 27 Dec 2023 08:23:42 -0800 Subject: [PATCH 01/71] bpo-26791: Update shutil.move() to provide the same symlink move behavior as the mv shell when moving a symlink into a directory that is the target of the symlink (GH-21759) --- Lib/shutil.py | 2 +- Lib/test/test_shutil.py | 29 +++++++++++++++++++ .../2020-08-06-14-43-55.bpo-26791.KxoEfO.rst | 4 +++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2020-08-06-14-43-55.bpo-26791.KxoEfO.rst diff --git a/Lib/shutil.py b/Lib/shutil.py index c40f6ddae39a17..acc9419be4dfca 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -885,7 +885,7 @@ def move(src, dst, copy_function=copy2): sys.audit("shutil.move", src, dst) real_dst = dst if os.path.isdir(dst): - if _samefile(src, dst): + if _samefile(src, dst) and not os.path.islink(src): # We might be on a case insensitive filesystem, # perform the rename anyway. os.rename(src, dst) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index cc5459aa08fe33..8edd75e9907ec0 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2688,6 +2688,35 @@ def test_move_dir_caseinsensitive(self): finally: os.rmdir(dst_dir) + # bpo-26791: Check that a symlink to a directory can + # be moved into that directory. + @mock_rename + def _test_move_symlink_to_dir_into_dir(self, dst): + src = os.path.join(self.src_dir, 'linktodir') + dst_link = os.path.join(self.dst_dir, 'linktodir') + os.symlink(self.dst_dir, src, target_is_directory=True) + shutil.move(src, dst) + self.assertTrue(os.path.islink(dst_link)) + self.assertTrue(os.path.samefile(self.dst_dir, dst_link)) + self.assertFalse(os.path.exists(src)) + + # Repeat the move operation with the destination + # symlink already in place (should raise shutil.Error). + os.symlink(self.dst_dir, src, target_is_directory=True) + with self.assertRaises(shutil.Error): + shutil.move(src, dst) + self.assertTrue(os.path.samefile(self.dst_dir, dst_link)) + self.assertTrue(os.path.exists(src)) + + @os_helper.skip_unless_symlink + def test_move_symlink_to_dir_into_dir(self): + self._test_move_symlink_to_dir_into_dir(self.dst_dir) + + @os_helper.skip_unless_symlink + def test_move_symlink_to_dir_into_symlink_to_dir(self): + dst = os.path.join(self.src_dir, 'otherlinktodir') + os.symlink(self.dst_dir, dst, target_is_directory=True) + self._test_move_symlink_to_dir_into_dir(dst) @os_helper.skip_unless_dac_override @unittest.skipUnless(hasattr(os, 'lchflags') diff --git a/Misc/NEWS.d/next/Library/2020-08-06-14-43-55.bpo-26791.KxoEfO.rst b/Misc/NEWS.d/next/Library/2020-08-06-14-43-55.bpo-26791.KxoEfO.rst new file mode 100644 index 00000000000000..c6f8dcb6f9269c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-08-06-14-43-55.bpo-26791.KxoEfO.rst @@ -0,0 +1,4 @@ +:func:`shutil.move` now moves a symlink into a directory when that +directory is the target of the symlink. This provides the same behavior as +the mv shell command. The previous behavior raised an exception. Patch by +Jeffrey Kintscher. From 0651936ae2bc6999f488f8c519b8d07a06a11557 Mon Sep 17 00:00:00 2001 From: Stanley <46876382+slateny@users.noreply.github.com> Date: Wed, 27 Dec 2023 09:16:36 -0800 Subject: [PATCH 02/71] gh-67641: Clarify documentation on bytes vs text with non-seeking tarfile stream (GH-31610) --- Doc/library/tarfile.rst | 10 ++++++---- Lib/tarfile.py | 9 +++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index f4e83d64bb1580..7ba29d4a40dedb 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -116,10 +116,12 @@ Some facts and figures: ``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile` object that processes its data as a stream of blocks. No random seeking will be done on the file. If given, *fileobj* may be any object that has a - :meth:`~io.TextIOBase.read` or :meth:`~io.TextIOBase.write` method (depending on the *mode*). *bufsize* - specifies the blocksize and defaults to ``20 * 512`` bytes. Use this variant - in combination with e.g. ``sys.stdin``, a socket :term:`file object` or a tape - device. However, such a :class:`TarFile` object is limited in that it does + :meth:`~io.RawIOBase.read` or :meth:`~io.RawIOBase.write` method + (depending on the *mode*) that works with bytes. + *bufsize* specifies the blocksize and defaults to ``20 * 512`` bytes. + Use this variant in combination with e.g. ``sys.stdin.buffer``, a socket + :term:`file object` or a tape device. + However, such a :class:`TarFile` object is limited in that it does not allow random access, see :ref:`tar-examples`. The currently possible modes: diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 5ada0ad626bda8..20e0394507f5db 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -330,10 +330,11 @@ def write(self, s): class _Stream: """Class that serves as an adapter between TarFile and a stream-like object. The stream-like object only - needs to have a read() or write() method and is accessed - blockwise. Use of gzip or bzip2 compression is possible. - A stream-like object could be for example: sys.stdin, - sys.stdout, a socket, a tape device etc. + needs to have a read() or write() method that works with bytes, + and the method is accessed blockwise. + Use of gzip or bzip2 compression is possible. + A stream-like object could be for example: sys.stdin.buffer, + sys.stdout.buffer, a socket, a tape device etc. _Stream is intended to be used only internally. """ From 6c98fce33a4c2d6671978f6286377af0d6e22182 Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Wed, 27 Dec 2023 15:51:49 -0500 Subject: [PATCH 03/71] gh-57795: Add news to idlelib/News3.txt (#113522) --- Lib/idlelib/News3.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/idlelib/News3.txt b/Lib/idlelib/News3.txt index 308865d968814c..f38cc96eceb766 100644 --- a/Lib/idlelib/News3.txt +++ b/Lib/idlelib/News3.txt @@ -4,7 +4,11 @@ Released on 2024-10-xx ========================= +gh-57795: Enter selected text into the Find box when opening +a Replace dialog. Patch by Roger Serwy and Zackery Spytz. + gh-113269: Fix test_editor hang on macOS Catalina. +Patch by Terry Reedy. gh-112939: Fix processing unsaved files when quitting IDLE on macOS. Patch by Ronald Oussoren and Christopher Chavez. From 87295b4068762f9cbdfcae5fed5ff54aadd3cb62 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 27 Dec 2023 22:43:19 +0100 Subject: [PATCH 04/71] gh-113317: Rework Argument Clinic cpp.py error handling (#113525) Rework error handling in the C preprocessor helper. Instead of monkey- patching the cpp.Monitor.fail() method from within clinic.py, rewrite cpp.py to use a subclass of the ClinicError exception. As a side-effect, ClinicError is moved into Tools/clinic/libclinic/errors.py. Yak-shaving in preparation for putting cpp.py into libclinic. --- Lib/test/test_clinic.py | 4 ++-- Tools/clinic/clinic.py | 23 +---------------------- Tools/clinic/cpp.py | 21 +++++++++------------ Tools/clinic/libclinic/__init__.py | 6 ++++++ Tools/clinic/libclinic/errors.py | 26 ++++++++++++++++++++++++++ 5 files changed, 44 insertions(+), 36 deletions(-) create mode 100644 Tools/clinic/libclinic/errors.py diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index 21f56fe0195e69..3d6816d73d45bc 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -22,7 +22,7 @@ def _make_clinic(*, filename='clinic_tests'): - clang = clinic.CLanguage(None) + clang = clinic.CLanguage(filename) c = clinic.Clinic(clang, filename=filename, limited_capi=False) c.block_parser = clinic.BlockParser('', clang) return c @@ -3920,7 +3920,7 @@ def test_Function_and_Parameter_reprs(self): self.assertEqual(repr(parameter), "") def test_Monitor_repr(self): - monitor = clinic.cpp.Monitor() + monitor = clinic.cpp.Monitor("test.c") self.assertRegex(repr(monitor), r"") monitor.line_number = 42 diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index f004bec3cce8f6..82efff56eda756 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -53,6 +53,7 @@ # Local imports. import libclinic +from libclinic import ClinicError # TODO: @@ -94,27 +95,6 @@ def __repr__(self) -> str: TemplateDict = dict[str, str] -@dc.dataclass -class ClinicError(Exception): - message: str - _: dc.KW_ONLY - lineno: int | None = None - filename: str | None = None - - def __post_init__(self) -> None: - super().__init__(self.message) - - def report(self, *, warn_only: bool = False) -> str: - msg = "Warning" if warn_only else "Error" - if self.filename is not None: - msg += f" in file {self.filename!r}" - if self.lineno is not None: - msg += f" on line {self.lineno}" - msg += ":\n" - msg += f"{self.message}\n" - return msg - - @overload def warn_or_fail( *args: object, @@ -669,7 +649,6 @@ class CLanguage(Language): def __init__(self, filename: str) -> None: super().__init__(filename) self.cpp = cpp.Monitor(filename) - self.cpp.fail = fail # type: ignore[method-assign] def parse_line(self, line: str) -> None: self.cpp.writeline(line) diff --git a/Tools/clinic/cpp.py b/Tools/clinic/cpp.py index 16eee6fc399491..659099056cd46c 100644 --- a/Tools/clinic/cpp.py +++ b/Tools/clinic/cpp.py @@ -3,6 +3,8 @@ import sys from typing import NoReturn +from libclinic.errors import ParseError + TokenAndCondition = tuple[str, str] TokenStack = list[TokenAndCondition] @@ -32,7 +34,7 @@ class Monitor: Anyway this implementation seems to work well enough for the CPython sources. """ - filename: str | None = None + filename: str _: dc.KW_ONLY verbose: bool = False @@ -59,14 +61,8 @@ def condition(self) -> str: """ return " && ".join(condition for token, condition in self.stack) - def fail(self, *a: object) -> NoReturn: - if self.filename: - filename = " " + self.filename - else: - filename = '' - print("Error at" + filename, "line", self.line_number, ":") - print(" ", ' '.join(str(x) for x in a)) - sys.exit(-1) + def fail(self, msg: str) -> NoReturn: + raise ParseError(msg, filename=self.filename, lineno=self.line_number) def writeline(self, line: str) -> None: self.line_number += 1 @@ -74,7 +70,7 @@ def writeline(self, line: str) -> None: def pop_stack() -> TokenAndCondition: if not self.stack: - self.fail("#" + token + " without matching #if / #ifdef / #ifndef!") + self.fail(f"#{token} without matching #if / #ifdef / #ifndef!") return self.stack.pop() if self.continuation: @@ -145,7 +141,7 @@ def pop_stack() -> TokenAndCondition: if token in {'if', 'ifdef', 'ifndef', 'elif'}: if not condition: - self.fail("Invalid format for #" + token + " line: no argument!") + self.fail(f"Invalid format for #{token} line: no argument!") if token in {'if', 'elif'}: if not is_a_simple_defined(condition): condition = "(" + condition + ")" @@ -155,7 +151,8 @@ def pop_stack() -> TokenAndCondition: else: fields = condition.split() if len(fields) != 1: - self.fail("Invalid format for #" + token + " line: should be exactly one argument!") + self.fail(f"Invalid format for #{token} line: " + "should be exactly one argument!") symbol = fields[0] condition = 'defined(' + symbol + ')' if token == 'ifndef': diff --git a/Tools/clinic/libclinic/__init__.py b/Tools/clinic/libclinic/__init__.py index 0c3c6840901a42..d4e7a0c5cf7b76 100644 --- a/Tools/clinic/libclinic/__init__.py +++ b/Tools/clinic/libclinic/__init__.py @@ -1,5 +1,8 @@ from typing import Final +from .errors import ( + ClinicError, +) from .formatting import ( SIG_END_MARKER, c_repr, @@ -15,6 +18,9 @@ __all__ = [ + # Error handling + "ClinicError", + # Formatting helpers "SIG_END_MARKER", "c_repr", diff --git a/Tools/clinic/libclinic/errors.py b/Tools/clinic/libclinic/errors.py new file mode 100644 index 00000000000000..afb21b02386fe7 --- /dev/null +++ b/Tools/clinic/libclinic/errors.py @@ -0,0 +1,26 @@ +import dataclasses as dc + + +@dc.dataclass +class ClinicError(Exception): + message: str + _: dc.KW_ONLY + lineno: int | None = None + filename: str | None = None + + def __post_init__(self) -> None: + super().__init__(self.message) + + def report(self, *, warn_only: bool = False) -> str: + msg = "Warning" if warn_only else "Error" + if self.filename is not None: + msg += f" in file {self.filename!r}" + if self.lineno is not None: + msg += f" on line {self.lineno}" + msg += ":\n" + msg += f"{self.message}\n" + return msg + + +class ParseError(ClinicError): + pass From 7ab9efdd6a2fb21cddca1ccd70175f1ac6bd9168 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 28 Dec 2023 00:20:57 +0100 Subject: [PATCH 05/71] gh-113299: Move cpp.py into libclinic (#113526) --- Lib/test/test_clinic.py | 2 +- Tools/clinic/clinic.py | 4 ++-- Tools/clinic/{ => libclinic}/cpp.py | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) rename Tools/clinic/{ => libclinic}/cpp.py (99%) diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index 3d6816d73d45bc..7323bdd801f4be 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -3920,7 +3920,7 @@ def test_Function_and_Parameter_reprs(self): self.assertEqual(repr(parameter), "") def test_Monitor_repr(self): - monitor = clinic.cpp.Monitor("test.c") + monitor = libclinic.cpp.Monitor("test.c") self.assertRegex(repr(monitor), r"") monitor.line_number = 42 diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 82efff56eda756..f6f95580f1a177 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -13,7 +13,6 @@ import collections import contextlib import copy -import cpp import dataclasses as dc import enum import functools @@ -53,6 +52,7 @@ # Local imports. import libclinic +import libclinic.cpp from libclinic import ClinicError @@ -648,7 +648,7 @@ class CLanguage(Language): def __init__(self, filename: str) -> None: super().__init__(filename) - self.cpp = cpp.Monitor(filename) + self.cpp = libclinic.cpp.Monitor(filename) def parse_line(self, line: str) -> None: self.cpp.writeline(line) diff --git a/Tools/clinic/cpp.py b/Tools/clinic/libclinic/cpp.py similarity index 99% rename from Tools/clinic/cpp.py rename to Tools/clinic/libclinic/cpp.py index 659099056cd46c..e115d65a88e1b6 100644 --- a/Tools/clinic/cpp.py +++ b/Tools/clinic/libclinic/cpp.py @@ -3,7 +3,10 @@ import sys from typing import NoReturn -from libclinic.errors import ParseError +from .errors import ParseError + + +__all__ = ["Monitor"] TokenAndCondition = tuple[str, str] From bfee2f77e16f01a718c1044564ee624f1f2bc328 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 28 Dec 2023 17:31:19 +0900 Subject: [PATCH 06/71] gh-73427: deprecate `_enablelegacywindowsfsencoding` (#107729) --- Doc/library/sys.rst | 8 ++++++++ Doc/whatsnew/3.13.rst | 4 ++++ .../Windows/2023-08-08-01-42-14.gh-issue-73427.WOpiNt.rst | 2 ++ Python/sysmodule.c | 7 +++++++ 4 files changed, 21 insertions(+) create mode 100644 Misc/NEWS.d/next/Windows/2023-08-08-01-42-14.gh-issue-73427.WOpiNt.rst diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index aaf79205d44282..2426c37ccb1e0f 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1744,9 +1744,17 @@ always available. .. availability:: Windows. + .. note:: + Changing the filesystem encoding after Python startup is risky because + the old fsencoding or paths encoded by the old fsencoding may be cached + somewhere. Use :envvar:`PYTHONLEGACYWINDOWSFSENCODING` instead. + .. versionadded:: 3.6 See :pep:`529` for more details. + .. deprecated-removed:: 3.13 3.16 + Use :envvar:`PYTHONLEGACYWINDOWSFSENCODING` instead. + .. data:: stdin stdout stderr diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 4b02ecddd63b27..888ebd0402d0e7 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -474,6 +474,10 @@ Deprecated security and functionality bugs. This includes removal of the ``--cgi`` flag to the ``python -m http.server`` command line in 3.15. +* :mod:`sys`: :func:`sys._enablelegacywindowsfsencoding` function. + Replace it with :envvar:`PYTHONLEGACYWINDOWSFSENCODING` environment variable. + (Contributed by Inada Naoki in :gh:`73427`.) + * :mod:`traceback`: * The field *exc_type* of :class:`traceback.TracebackException` is diff --git a/Misc/NEWS.d/next/Windows/2023-08-08-01-42-14.gh-issue-73427.WOpiNt.rst b/Misc/NEWS.d/next/Windows/2023-08-08-01-42-14.gh-issue-73427.WOpiNt.rst new file mode 100644 index 00000000000000..830c4c54838e80 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-08-08-01-42-14.gh-issue-73427.WOpiNt.rst @@ -0,0 +1,2 @@ +Deprecate :func:`sys._enablelegacywindowsfsencoding`. Use +:envvar:`PYTHONLEGACYWINDOWSFSENCODING` instead. Patch by Inada Naoki. diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 57dc4a1226ce75..c2de4ecdc8ce0f 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1715,6 +1715,13 @@ static PyObject * sys__enablelegacywindowsfsencoding_impl(PyObject *module) /*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/ { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "sys._enablelegacywindowsfsencoding() is deprecated and will be " + "removed in Python 3.16. Use PYTHONLEGACYWINDOWSFSENCODING " + "instead.", 1)) + { + return NULL; + } if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) { return NULL; } From cc13eabc7ce08accf49656e258ba500f74a1dae8 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Thu, 28 Dec 2023 09:42:05 +0100 Subject: [PATCH 07/71] gh-110459: Make sure --with-openssl-rpath works on macOS (#113441) * gh-110459: Make sure --with-openssl-rpath works on macOS On macOS the `-rpath` linker flag is spelled differently than on on platforms. --- .../macOS/2023-12-23-22-41-07.gh-issue-110459.NaMBJy.rst | 2 ++ configure | 7 ++++++- configure.ac | 7 ++++++- 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/macOS/2023-12-23-22-41-07.gh-issue-110459.NaMBJy.rst diff --git a/Misc/NEWS.d/next/macOS/2023-12-23-22-41-07.gh-issue-110459.NaMBJy.rst b/Misc/NEWS.d/next/macOS/2023-12-23-22-41-07.gh-issue-110459.NaMBJy.rst new file mode 100644 index 00000000000000..44ffd857785f0d --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2023-12-23-22-41-07.gh-issue-110459.NaMBJy.rst @@ -0,0 +1,2 @@ +Running ``configure ... --with-openssl-rpath=X/Y/Z`` no longer fails to detect +OpenSSL on macOS. diff --git a/configure b/configure index 7e50abc29d0c1a..6d65d3abc1811b 100755 --- a/configure +++ b/configure @@ -27478,7 +27478,12 @@ then : else $as_nop - rpath_arg="-Wl,-rpath=" + if test "$ac_sys_system" = "Darwin" + then + rpath_arg="-Wl,-rpath," + else + rpath_arg="-Wl,-rpath=" + fi fi diff --git a/configure.ac b/configure.ac index e064848af9ed1b..bfdabc4474e5eb 100644 --- a/configure.ac +++ b/configure.ac @@ -6808,7 +6808,12 @@ AX_CHECK_OPENSSL([have_openssl=yes],[have_openssl=no]) AS_VAR_IF([GNULD], [yes], [ rpath_arg="-Wl,--enable-new-dtags,-rpath=" ], [ - rpath_arg="-Wl,-rpath=" + if test "$ac_sys_system" = "Darwin" + then + rpath_arg="-Wl,-rpath," + else + rpath_arg="-Wl,-rpath=" + fi ]) AC_MSG_CHECKING([for --with-openssl-rpath]) From f1676867b52f8b6c7f70bf32e2a53f7edd6700a7 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 28 Dec 2023 12:12:21 +0300 Subject: [PATCH 08/71] gh-103092: Make `_elementtree` module importable in sub-interpreters (#113434) Enable imports of _elementtree module in sub-interpreters --- Modules/_elementtree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 5bf67870767698..b574c96d3f9625 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -4430,9 +4430,7 @@ module_exec(PyObject *m) static struct PyModuleDef_Slot elementtree_slots[] = { {Py_mod_exec, module_exec}, - // XXX gh-103092: fix isolation. - {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, - //{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {0, NULL}, }; From f108468970bf4e70910862476900f924fb701399 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Thu, 28 Dec 2023 02:47:44 -0800 Subject: [PATCH 09/71] bpo-11102: Make configure enable major(), makedev(), and minor() on HP-UX (GH-19856) Always include before . Co-authored-by: Serhiy Storchaka --- .../next/Build/2020-05-01-23-44-31.bpo-11102.Fw9zeS.rst | 2 ++ Modules/posixmodule.c | 9 +++++---- configure | 1 + configure.ac | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2020-05-01-23-44-31.bpo-11102.Fw9zeS.rst diff --git a/Misc/NEWS.d/next/Build/2020-05-01-23-44-31.bpo-11102.Fw9zeS.rst b/Misc/NEWS.d/next/Build/2020-05-01-23-44-31.bpo-11102.Fw9zeS.rst new file mode 100644 index 00000000000000..6477538edf5550 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2020-05-01-23-44-31.bpo-11102.Fw9zeS.rst @@ -0,0 +1,2 @@ +The :func:`os.major`, :func:`os.makedev`, and :func:`os.minor` functions are +now available on HP-UX v3. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index c635fd4d993d57..f4a18536e8f1e1 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -236,15 +236,16 @@ corresponding Unix manual entries for more information on calls."); # include #endif +#ifdef HAVE_SYS_TYPES_H +/* Should be included before on HP-UX v3 */ +# include +#endif /* HAVE_SYS_TYPES_H */ + #ifdef HAVE_SYS_SYSMACROS_H /* GNU C Library: major(), minor(), makedev() */ # include #endif -#ifdef HAVE_SYS_TYPES_H -# include -#endif /* HAVE_SYS_TYPES_H */ - #ifdef HAVE_SYS_STAT_H # include #endif /* HAVE_SYS_STAT_H */ diff --git a/configure b/configure index 6d65d3abc1811b..3322b7a682dd25 100755 --- a/configure +++ b/configure @@ -21805,6 +21805,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext #if defined(MAJOR_IN_MKDEV) #include #elif defined(MAJOR_IN_SYSMACROS) +#include #include #else #include diff --git a/configure.ac b/configure.ac index bfdabc4474e5eb..13a6d746763d62 100644 --- a/configure.ac +++ b/configure.ac @@ -5102,6 +5102,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #if defined(MAJOR_IN_MKDEV) #include #elif defined(MAJOR_IN_SYSMACROS) +#include #include #else #include From fba324154e65b752e42aa59dea287d639935565f Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 28 Dec 2023 21:58:06 +0300 Subject: [PATCH 10/71] gh-113543: Make sure that `MacOSXOSAScript` sends `webbrowser.open` audit event (#113544) --- Lib/webbrowser.py | 1 + .../next/Library/2023-12-28-14-36-20.gh-issue-113543.2iWkOR.rst | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-12-28-14-36-20.gh-issue-113543.2iWkOR.rst diff --git a/Lib/webbrowser.py b/Lib/webbrowser.py index 6f9c6a6de177e6..636e8ca459d109 100755 --- a/Lib/webbrowser.py +++ b/Lib/webbrowser.py @@ -574,6 +574,7 @@ def __init__(self, name='default'): super().__init__(name) def open(self, url, new=0, autoraise=True): + sys.audit("webbrowser.open", url) if self.name == 'default': script = 'open location "%s"' % url.replace('"', '%22') # opens in default browser else: diff --git a/Misc/NEWS.d/next/Library/2023-12-28-14-36-20.gh-issue-113543.2iWkOR.rst b/Misc/NEWS.d/next/Library/2023-12-28-14-36-20.gh-issue-113543.2iWkOR.rst new file mode 100644 index 00000000000000..5bf557bedd0204 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-28-14-36-20.gh-issue-113543.2iWkOR.rst @@ -0,0 +1,2 @@ +Make sure that ``webbrowser.MacOSXOSAScript`` sends ``webbrowser.open`` +audit event. From 8e5d70f4b6bc1d0321f4290f8a2d350706bce8b7 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Thu, 28 Dec 2023 21:29:12 +0200 Subject: [PATCH 11/71] gh-101100: Fix Sphinx warnings in library/random.rst (#112981) Co-authored-by: Alex Waygood --- Doc/library/random.rst | 59 +++++++++++++++++++++++++++++++----------- Doc/tools/.nitignore | 1 - 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/Doc/library/random.rst b/Doc/library/random.rst index feaf260caf3568..d0ced2416c9578 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -34,10 +34,8 @@ instance of the :class:`random.Random` class. You can instantiate your own instances of :class:`Random` to get generators that don't share state. Class :class:`Random` can also be subclassed if you want to use a different -basic generator of your own devising: in that case, override the :meth:`~Random.random`, -:meth:`~Random.seed`, :meth:`~Random.getstate`, and :meth:`~Random.setstate` methods. -Optionally, a new generator can supply a :meth:`~Random.getrandbits` method --- this -allows :meth:`randrange` to produce selections over an arbitrarily large range. +basic generator of your own devising: see the documentation on that class for +more details. The :mod:`random` module also provides the :class:`SystemRandom` class which uses the system function :func:`os.urandom` to generate random numbers @@ -88,7 +86,7 @@ Bookkeeping functions .. versionchanged:: 3.11 The *seed* must be one of the following types: - *NoneType*, :class:`int`, :class:`float`, :class:`str`, + ``None``, :class:`int`, :class:`float`, :class:`str`, :class:`bytes`, or :class:`bytearray`. .. function:: getstate() @@ -412,6 +410,37 @@ Alternative Generator ``None``, :class:`int`, :class:`float`, :class:`str`, :class:`bytes`, or :class:`bytearray`. + Subclasses of :class:`!Random` should override the following methods if they + wish to make use of a different basic generator: + + .. method:: Random.seed(a=None, version=2) + + Override this method in subclasses to customise the :meth:`~random.seed` + behaviour of :class:`!Random` instances. + + .. method:: Random.getstate() + + Override this method in subclasses to customise the :meth:`~random.getstate` + behaviour of :class:`!Random` instances. + + .. method:: Random.setstate(state) + + Override this method in subclasses to customise the :meth:`~random.setstate` + behaviour of :class:`!Random` instances. + + .. method:: Random.random() + + Override this method in subclasses to customise the :meth:`~random.random` + behaviour of :class:`!Random` instances. + + Optionally, a custom generator subclass can also supply the following method: + + .. method:: Random.getrandbits(k) + + Override this method in subclasses to customise the + :meth:`~random.getrandbits` behaviour of :class:`!Random` instances. + + .. class:: SystemRandom([seed]) Class that uses the :func:`os.urandom` function for generating random numbers @@ -445,30 +474,30 @@ Examples Basic examples:: - >>> random() # Random float: 0.0 <= x < 1.0 + >>> random() # Random float: 0.0 <= x < 1.0 0.37444887175646646 - >>> uniform(2.5, 10.0) # Random float: 2.5 <= x <= 10.0 + >>> uniform(2.5, 10.0) # Random float: 2.5 <= x <= 10.0 3.1800146073117523 - >>> expovariate(1 / 5) # Interval between arrivals averaging 5 seconds + >>> expovariate(1 / 5) # Interval between arrivals averaging 5 seconds 5.148957571865031 - >>> randrange(10) # Integer from 0 to 9 inclusive + >>> randrange(10) # Integer from 0 to 9 inclusive 7 - >>> randrange(0, 101, 2) # Even integer from 0 to 100 inclusive + >>> randrange(0, 101, 2) # Even integer from 0 to 100 inclusive 26 - >>> choice(['win', 'lose', 'draw']) # Single random element from a sequence + >>> choice(['win', 'lose', 'draw']) # Single random element from a sequence 'draw' >>> deck = 'ace two three four'.split() - >>> shuffle(deck) # Shuffle a list + >>> shuffle(deck) # Shuffle a list >>> deck ['four', 'two', 'ace', 'three'] - >>> sample([10, 20, 30, 40, 50], k=4) # Four samples without replacement + >>> sample([10, 20, 30, 40, 50], k=4) # Four samples without replacement [40, 10, 50, 30] Simulations:: @@ -572,14 +601,14 @@ Simulation of arrival times and service deliveries for a multiserver queue:: including simulation, sampling, shuffling, and cross-validation. `Economics Simulation - `_ + `_ a simulation of a marketplace by `Peter Norvig `_ that shows effective use of many of the tools and distributions provided by this module (gauss, uniform, sample, betavariate, choice, triangular, and randrange). `A Concrete Introduction to Probability (using Python) - `_ + `_ a tutorial by `Peter Norvig `_ covering the basics of probability theory, how to write simulations, and how to perform data analysis using Python. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 9953f2ea9ed4d5..ab6baf819de97a 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -71,7 +71,6 @@ Doc/library/profile.rst Doc/library/pyclbr.rst Doc/library/pydoc.rst Doc/library/pyexpat.rst -Doc/library/random.rst Doc/library/readline.rst Doc/library/resource.rst Doc/library/select.rst From db1c88223986efe3076eb3b229a8b6db59bae284 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 28 Dec 2023 14:36:20 -0500 Subject: [PATCH 12/71] Doc/library/os.rst: `os.waitid` absent on MacOS (#104558) * Doc/library/os.rst: `os.waitid` absent on MacOS Co-authored-by: AN Long --- Doc/library/os.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 6b6e62a683ab18..2af61f2960cc63 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -4986,6 +4986,9 @@ written in Python, such as a mail server's external command delivery program. .. availability:: Unix, not Emscripten, not WASI. + .. note:: + This function is not available on macOS. + .. versionadded:: 3.3 From b664d9159964f0609d50dabd02f71af0227d8718 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 28 Dec 2023 22:23:01 +0000 Subject: [PATCH 13/71] GH-113225: Speed up `pathlib._abc.PathBase.glob()` (#113556) `PathBase._scandir()` is implemented using `iterdir()`, so we can use its results directly, rather than passing them through `_make_child_relpath()`. --- Lib/pathlib/__init__.py | 4 ++++ Lib/pathlib/_abc.py | 13 ++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index ab87b49d0277f3..2b4193c400a099 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -299,6 +299,10 @@ def iterdir(self): def _scandir(self): return os.scandir(self) + def _make_child_entry(self, entry): + # Transform an entry yielded from _scandir() into a path object. + return self._make_child_relpath(entry.name) + def absolute(self): """Return an absolute version of this path No normalization or symlink resolution is performed. diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index efe56ec565c162..f75b20a1d5f1e5 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -87,9 +87,8 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match): continue except OSError: continue - name = entry.name - if match(name): - yield parent_path._make_child_relpath(name) + if match(entry.name): + yield parent_path._make_child_entry(entry) def _select_recursive(parent_paths, dir_only, follow_symlinks): @@ -112,12 +111,12 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks): for entry in entries: try: if entry.is_dir(follow_symlinks=follow_symlinks): - paths.append(path._make_child_relpath(entry.name)) + paths.append(path._make_child_entry(entry)) continue except OSError: pass if not dir_only: - yield path._make_child_relpath(entry.name) + yield path._make_child_entry(entry) def _select_unique(paths): @@ -788,6 +787,10 @@ def _scandir(self): from contextlib import nullcontext return nullcontext(self.iterdir()) + def _make_child_entry(self, entry): + # Transform an entry yielded from _scandir() into a path object. + return entry + def _make_child_relpath(self, name): path_str = str(self) tail = self._tail From 6ca0e6754eedf4c9cf48794fa6c27281668b8d7c Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 28 Dec 2023 22:44:29 +0000 Subject: [PATCH 14/71] GH-113528: Remove a couple of expensive pathlib ABC tests (#113534) Run expensive tests for walking and globbing from `test_pathlib` but not `test_pathlib_abc`. The ABCs are not as tightly optimised as the classes in top-level `pathlib`, and so these tests are taking rather a long time on some buildbots. Coverage of the main `pathlib` classes should suffice. --- Lib/test/test_pathlib/test_pathlib.py | 43 +++++++++++++++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 42 ---------------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index db5f3b2634be97..8f95c804f80e69 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -15,6 +15,7 @@ from test.support import import_helper from test.support import is_emscripten, is_wasi +from test.support import set_recursion_limit from test.support import os_helper from test.support.os_helper import TESTFN, FakePath from test.test_pathlib import test_pathlib_abc @@ -1660,6 +1661,48 @@ def test_walk_many_open_files(self): self.assertEqual(next(it), expected) path = path / 'd' + def test_walk_above_recursion_limit(self): + recursion_limit = 40 + # directory_depth > recursion_limit + directory_depth = recursion_limit + 10 + base = self.cls(self.base, 'deep') + path = base.joinpath(*(['d'] * directory_depth)) + path.mkdir(parents=True) + + with set_recursion_limit(recursion_limit): + list(base.walk()) + list(base.walk(top_down=False)) + + def test_glob_many_open_files(self): + depth = 30 + P = self.cls + p = base = P(self.base) / 'deep' + p.mkdir() + for _ in range(depth): + p /= 'd' + p.mkdir() + pattern = '/'.join(['*'] * depth) + iters = [base.glob(pattern) for j in range(100)] + for it in iters: + self.assertEqual(next(it), p) + iters = [base.rglob('d') for j in range(100)] + p = base + for i in range(depth): + p = p / 'd' + for it in iters: + self.assertEqual(next(it), p) + + def test_glob_above_recursion_limit(self): + recursion_limit = 50 + # directory_depth > recursion_limit + directory_depth = recursion_limit + 10 + base = self.cls(self.base, 'deep') + path = base.joinpath(*(['d'] * directory_depth)) + path.mkdir(parents=True) + + with set_recursion_limit(recursion_limit): + list(base.glob('**/')) + @only_posix class PosixPathTest(PathTest, PurePosixPathTest): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 568a3183b40b8d..e4a4e81e547cd1 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -8,7 +8,6 @@ from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase import posixpath -from test.support import set_recursion_limit from test.support.os_helper import TESTFN @@ -1224,25 +1223,6 @@ def test_rglob_symlink_loop(self): } self.assertEqual(given, {p / x for x in expect}) - def test_glob_many_open_files(self): - depth = 30 - P = self.cls - p = base = P(self.base) / 'deep' - p.mkdir() - for _ in range(depth): - p /= 'd' - p.mkdir() - pattern = '/'.join(['*'] * depth) - iters = [base.glob(pattern) for j in range(100)] - for it in iters: - self.assertEqual(next(it), p) - iters = [base.rglob('d') for j in range(100)] - p = base - for i in range(depth): - p = p / 'd' - for it in iters: - self.assertEqual(next(it), p) - def test_glob_dotdot(self): # ".." is not special in globs. P = self.cls @@ -1286,17 +1266,6 @@ def test_glob_long_symlink(self): bad_link.symlink_to("bad" * 200) self.assertEqual(sorted(base.glob('**/*')), [bad_link]) - def test_glob_above_recursion_limit(self): - recursion_limit = 50 - # directory_depth > recursion_limit - directory_depth = recursion_limit + 10 - base = self.cls(self.base, 'deep') - path = base.joinpath(*(['d'] * directory_depth)) - path.mkdir(parents=True) - - with set_recursion_limit(recursion_limit): - list(base.glob('**/')) - def test_glob_recursive_no_trailing_slash(self): P = self.cls p = P(self.base) @@ -1825,17 +1794,6 @@ def test_walk_symlink_location(self): else: self.fail("symlink not found") - def test_walk_above_recursion_limit(self): - recursion_limit = 40 - # directory_depth > recursion_limit - directory_depth = recursion_limit + 10 - base = self.cls(self.base, 'deep') - path = base.joinpath(*(['d'] * directory_depth)) - path.mkdir(parents=True) - - with set_recursion_limit(recursion_limit): - list(base.walk()) - list(base.walk(top_down=False)) class DummyPathWithSymlinks(DummyPath): def readlink(self): From cf34b7704be4c97d0479c04df0d9cd8fe210e5f4 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Fri, 29 Dec 2023 16:13:46 +0300 Subject: [PATCH 15/71] gh-103092: Make ``pyexpat`` module importable in sub-interpreters (#113555) --- Modules/pyexpat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 9d95309dbb7aa6..ec44892d101e44 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -2062,9 +2062,7 @@ pyexpat_free(void *module) static PyModuleDef_Slot pyexpat_slots[] = { {Py_mod_exec, pyexpat_exec}, - // XXX gh-103092: fix isolation. - {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, - //{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {0, NULL} }; From f46987b8281148503568516c29a4a04a75aaba8d Mon Sep 17 00:00:00 2001 From: Ankit Kumar Pandey <93041495+itsankitkp@users.noreply.github.com> Date: Fri, 29 Dec 2023 23:25:17 +0530 Subject: [PATCH 16/71] gh-103708: Make directory layout in sysconfig implementation configurable (#103709) --- Lib/site.py | 16 +++- Lib/sysconfig/__init__.py | 74 ++++++++++--------- ...-04-23-11-08-02.gh-issue-103708.Y17C7p.rst | 1 + 3 files changed, 52 insertions(+), 39 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-04-23-11-08-02.gh-issue-103708.Y17C7p.rst diff --git a/Lib/site.py b/Lib/site.py index 2517b7e5f1d22a..6f5738b02cb23b 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -260,6 +260,10 @@ def check_enableusersite(): # # See https://bugs.python.org/issue29585 +# Copy of sysconfig._get_implementation() +def _get_implementation(): + return 'Python' + # Copy of sysconfig._getuserbase() def _getuserbase(): env_base = os.environ.get("PYTHONUSERBASE", None) @@ -275,7 +279,7 @@ def joinuser(*args): if os.name == "nt": base = os.environ.get("APPDATA") or "~" - return joinuser(base, "Python") + return joinuser(base, _get_implementation()) if sys.platform == "darwin" and sys._framework: return joinuser("~", "Library", sys._framework, @@ -288,12 +292,14 @@ def joinuser(*args): def _get_path(userbase): version = sys.version_info + implementation = _get_implementation() + implementation_lower = implementation.lower() if os.name == 'nt': ver_nodot = sys.winver.replace('.', '') - return f'{userbase}\\Python{ver_nodot}\\site-packages' + return f'{userbase}\\{implementation}{ver_nodot}\\site-packages' if sys.platform == 'darwin' and sys._framework: - return f'{userbase}/lib/python/site-packages' + return f'{userbase}/lib/{implementation_lower}/site-packages' return f'{userbase}/lib/python{version[0]}.{version[1]}/site-packages' @@ -361,6 +367,8 @@ def getsitepackages(prefixes=None): continue seen.add(prefix) + implementation = _get_implementation().lower() + ver = sys.version_info if os.sep == '/': libdirs = [sys.platlibdir] if sys.platlibdir != "lib": @@ -368,7 +376,7 @@ def getsitepackages(prefixes=None): for libdir in libdirs: path = os.path.join(prefix, libdir, - "python%d.%d" % sys.version_info[:2], + f"{implementation}{ver[0]}.{ver[1]}", "site-packages") sitepackages.append(path) else: diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index deb438c705f3a0..07ab27c7fb0c35 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -26,24 +26,24 @@ _INSTALL_SCHEMES = { 'posix_prefix': { - 'stdlib': '{installed_base}/{platlibdir}/python{py_version_short}', - 'platstdlib': '{platbase}/{platlibdir}/python{py_version_short}', - 'purelib': '{base}/lib/python{py_version_short}/site-packages', - 'platlib': '{platbase}/{platlibdir}/python{py_version_short}/site-packages', + 'stdlib': '{installed_base}/{platlibdir}/{implementation_lower}{py_version_short}', + 'platstdlib': '{platbase}/{platlibdir}/{implementation_lower}{py_version_short}', + 'purelib': '{base}/lib/{implementation_lower}{py_version_short}/site-packages', + 'platlib': '{platbase}/{platlibdir}/{implementation_lower}{py_version_short}/site-packages', 'include': - '{installed_base}/include/python{py_version_short}{abiflags}', + '{installed_base}/include/{implementation_lower}{py_version_short}{abiflags}', 'platinclude': - '{installed_platbase}/include/python{py_version_short}{abiflags}', + '{installed_platbase}/include/{implementation_lower}{py_version_short}{abiflags}', 'scripts': '{base}/bin', 'data': '{base}', }, 'posix_home': { - 'stdlib': '{installed_base}/lib/python', - 'platstdlib': '{base}/lib/python', - 'purelib': '{base}/lib/python', - 'platlib': '{base}/lib/python', - 'include': '{installed_base}/include/python', - 'platinclude': '{installed_base}/include/python', + 'stdlib': '{installed_base}/lib/{implementation_lower}', + 'platstdlib': '{base}/lib/{implementation_lower}', + 'purelib': '{base}/lib/{implementation_lower}', + 'platlib': '{base}/lib/{implementation_lower}', + 'include': '{installed_base}/include/{implementation_lower}', + 'platinclude': '{installed_base}/include/{implementation_lower}', 'scripts': '{base}/bin', 'data': '{base}', }, @@ -75,14 +75,14 @@ # Downstream distributors who patch posix_prefix/nt scheme are encouraged to # leave the following schemes unchanged 'posix_venv': { - 'stdlib': '{installed_base}/{platlibdir}/python{py_version_short}', - 'platstdlib': '{platbase}/{platlibdir}/python{py_version_short}', - 'purelib': '{base}/lib/python{py_version_short}/site-packages', - 'platlib': '{platbase}/{platlibdir}/python{py_version_short}/site-packages', + 'stdlib': '{installed_base}/{platlibdir}/{implementation_lower}{py_version_short}', + 'platstdlib': '{platbase}/{platlibdir}/{implementation_lower}{py_version_short}', + 'purelib': '{base}/lib/{implementation_lower}{py_version_short}/site-packages', + 'platlib': '{platbase}/{platlibdir}/{implementation_lower}{py_version_short}/site-packages', 'include': - '{installed_base}/include/python{py_version_short}{abiflags}', + '{installed_base}/include/{implementation_lower}{py_version_short}{abiflags}', 'platinclude': - '{installed_platbase}/include/python{py_version_short}{abiflags}', + '{installed_platbase}/include/{implementation_lower}{py_version_short}{abiflags}', 'scripts': '{base}/bin', 'data': '{base}', }, @@ -104,6 +104,8 @@ else: _INSTALL_SCHEMES['venv'] = _INSTALL_SCHEMES['posix_venv'] +def _get_implementation(): + return 'Python' # NOTE: site.py has copy of this function. # Sync it when modify this function. @@ -121,7 +123,7 @@ def joinuser(*args): if os.name == "nt": base = os.environ.get("APPDATA") or "~" - return joinuser(base, "Python") + return joinuser(base, _get_implementation()) if sys.platform == "darwin" and sys._framework: return joinuser("~", "Library", sys._framework, @@ -135,29 +137,29 @@ def joinuser(*args): _INSTALL_SCHEMES |= { # NOTE: When modifying "purelib" scheme, update site._get_path() too. 'nt_user': { - 'stdlib': '{userbase}/Python{py_version_nodot_plat}', - 'platstdlib': '{userbase}/Python{py_version_nodot_plat}', - 'purelib': '{userbase}/Python{py_version_nodot_plat}/site-packages', - 'platlib': '{userbase}/Python{py_version_nodot_plat}/site-packages', - 'include': '{userbase}/Python{py_version_nodot_plat}/Include', - 'scripts': '{userbase}/Python{py_version_nodot_plat}/Scripts', + 'stdlib': '{userbase}/{implementation}{py_version_nodot_plat}', + 'platstdlib': '{userbase}/{implementation}{py_version_nodot_plat}', + 'purelib': '{userbase}/{implementation}{py_version_nodot_plat}/site-packages', + 'platlib': '{userbase}/{implementation}{py_version_nodot_plat}/site-packages', + 'include': '{userbase}/{implementation}{py_version_nodot_plat}/Include', + 'scripts': '{userbase}/{implementation}{py_version_nodot_plat}/Scripts', 'data': '{userbase}', }, 'posix_user': { - 'stdlib': '{userbase}/{platlibdir}/python{py_version_short}', - 'platstdlib': '{userbase}/{platlibdir}/python{py_version_short}', - 'purelib': '{userbase}/lib/python{py_version_short}/site-packages', - 'platlib': '{userbase}/lib/python{py_version_short}/site-packages', - 'include': '{userbase}/include/python{py_version_short}', + 'stdlib': '{userbase}/{platlibdir}/{implementation_lower}{py_version_short}', + 'platstdlib': '{userbase}/{platlibdir}/{implementation_lower}{py_version_short}', + 'purelib': '{userbase}/lib/{implementation_lower}{py_version_short}/site-packages', + 'platlib': '{userbase}/lib/{implementation_lower}{py_version_short}/site-packages', + 'include': '{userbase}/include/{implementation_lower}{py_version_short}', 'scripts': '{userbase}/bin', 'data': '{userbase}', }, 'osx_framework_user': { - 'stdlib': '{userbase}/lib/python', - 'platstdlib': '{userbase}/lib/python', - 'purelib': '{userbase}/lib/python/site-packages', - 'platlib': '{userbase}/lib/python/site-packages', - 'include': '{userbase}/include/python{py_version_short}', + 'stdlib': '{userbase}/lib/{implementation_lower}', + 'platstdlib': '{userbase}/lib/{implementation_lower}', + 'purelib': '{userbase}/lib/{implementation_lower}/site-packages', + 'platlib': '{userbase}/lib/{implementation_lower}/site-packages', + 'include': '{userbase}/include/{implementation_lower}{py_version_short}', 'scripts': '{userbase}/bin', 'data': '{userbase}', }, @@ -459,6 +461,8 @@ def _init_config_vars(): _CONFIG_VARS['platbase'] = _EXEC_PREFIX _CONFIG_VARS['projectbase'] = _PROJECT_BASE _CONFIG_VARS['platlibdir'] = sys.platlibdir + _CONFIG_VARS['implementation'] = _get_implementation() + _CONFIG_VARS['implementation_lower'] = _get_implementation().lower() try: _CONFIG_VARS['abiflags'] = sys.abiflags except AttributeError: diff --git a/Misc/NEWS.d/next/Library/2023-04-23-11-08-02.gh-issue-103708.Y17C7p.rst b/Misc/NEWS.d/next/Library/2023-04-23-11-08-02.gh-issue-103708.Y17C7p.rst new file mode 100644 index 00000000000000..4b7d747175df03 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-23-11-08-02.gh-issue-103708.Y17C7p.rst @@ -0,0 +1 @@ +Make hardcoded python name, a configurable parameter so that different implementations of python can override it instead of making huge diffs in sysconfig.py From 88cb9720001295f82c7771ab4ebf20f3cd0b31fb Mon Sep 17 00:00:00 2001 From: Samet YASLAN Date: Sat, 30 Dec 2023 09:17:02 +0100 Subject: [PATCH 17/71] gh-112536: Add support for thread sanitizer (TSAN) (gh-112648) --- Doc/using/configure.rst | 7 ++++++ Include/pyport.h | 5 ++++ Lib/test/libregrtest/utils.py | 7 ++++++ Lib/test/support/__init__.py | 19 ++++++++------ Lib/test/test_io.py | 9 ++++--- ...-12-17-18-23-02.gh-issue-112536.8lr3Ep.rst | 1 + configure | 25 +++++++++++++++++++ configure.ac | 18 +++++++++++++ 8 files changed, 81 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2023-12-17-18-23-02.gh-issue-112536.8lr3Ep.rst diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index cb7eda42fe3fad..aab9469b44828a 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -745,6 +745,13 @@ Debug options .. versionadded:: 3.6 +.. option:: --with-thread-sanitizer + + Enable ThreadSanitizer data race detector, ``tsan`` + (default is no). + + .. versionadded:: 3.13 + Linker options -------------- diff --git a/Include/pyport.h b/Include/pyport.h index 328471085f959d..9d7ef0061806ad 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -563,6 +563,11 @@ extern "C" { # define _Py_ADDRESS_SANITIZER # endif # endif +# if __has_feature(thread_sanitizer) +# if !defined(_Py_THREAD_SANITIZER) +# define _Py_THREAD_SANITIZER +# endif +# endif #elif defined(__GNUC__) # if defined(__SANITIZE_ADDRESS__) # define _Py_ADDRESS_SANITIZER diff --git a/Lib/test/libregrtest/utils.py b/Lib/test/libregrtest/utils.py index 26481e71221ade..b30025d962413c 100644 --- a/Lib/test/libregrtest/utils.py +++ b/Lib/test/libregrtest/utils.py @@ -340,6 +340,9 @@ def get_build_info(): # --with-undefined-behavior-sanitizer if support.check_sanitizer(ub=True): sanitizers.append("UBSAN") + # --with-thread-sanitizer + if support.check_sanitizer(thread=True): + sanitizers.append("TSAN") if sanitizers: build.append('+'.join(sanitizers)) @@ -634,6 +637,7 @@ def display_header(use_resources: tuple[str, ...], asan = support.check_sanitizer(address=True) msan = support.check_sanitizer(memory=True) ubsan = support.check_sanitizer(ub=True) + tsan = support.check_sanitizer(thread=True) sanitizers = [] if asan: sanitizers.append("address") @@ -641,12 +645,15 @@ def display_header(use_resources: tuple[str, ...], sanitizers.append("memory") if ubsan: sanitizers.append("undefined behavior") + if tsan: + sanitizers.append("thread") if sanitizers: print(f"== sanitizers: {', '.join(sanitizers)}") for sanitizer, env_var in ( (asan, "ASAN_OPTIONS"), (msan, "MSAN_OPTIONS"), (ubsan, "UBSAN_OPTIONS"), + (tsan, "TSAN_OPTIONS"), ): options= os.environ.get(env_var) if sanitizer and options is not None: diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index c8f73cede230d8..e5fb725a30b5b8 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -392,10 +392,10 @@ def skip_if_buildbot(reason=None): isbuildbot = False return unittest.skipIf(isbuildbot, reason) -def check_sanitizer(*, address=False, memory=False, ub=False): +def check_sanitizer(*, address=False, memory=False, ub=False, thread=False): """Returns True if Python is compiled with sanitizer support""" - if not (address or memory or ub): - raise ValueError('At least one of address, memory, or ub must be True') + if not (address or memory or ub or thread): + raise ValueError('At least one of address, memory, ub or thread must be True') cflags = sysconfig.get_config_var('CFLAGS') or '' @@ -412,18 +412,23 @@ def check_sanitizer(*, address=False, memory=False, ub=False): '-fsanitize=undefined' in cflags or '--with-undefined-behavior-sanitizer' in config_args ) + thread_sanitizer = ( + '-fsanitize=thread' in cflags or + '--with-thread-sanitizer' in config_args + ) return ( (memory and memory_sanitizer) or (address and address_sanitizer) or - (ub and ub_sanitizer) + (ub and ub_sanitizer) or + (thread and thread_sanitizer) ) -def skip_if_sanitizer(reason=None, *, address=False, memory=False, ub=False): +def skip_if_sanitizer(reason=None, *, address=False, memory=False, ub=False, thread=False): """Decorator raising SkipTest if running with a sanitizer active.""" if not reason: reason = 'not working with sanitizers active' - skip = check_sanitizer(address=address, memory=memory, ub=ub) + skip = check_sanitizer(address=address, memory=memory, ub=ub, thread=thread) return unittest.skipIf(skip, reason) # gh-89363: True if fork() can hang if Python is built with Address Sanitizer @@ -432,7 +437,7 @@ def skip_if_sanitizer(reason=None, *, address=False, memory=False, ub=False): def set_sanitizer_env_var(env, option): - for name in ('ASAN_OPTIONS', 'MSAN_OPTIONS', 'UBSAN_OPTIONS'): + for name in ('ASAN_OPTIONS', 'MSAN_OPTIONS', 'UBSAN_OPTIONS', 'TSAN_OPTIONS'): if name in env: env[name] += f':{option}' else: diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 09cced9baef99b..1d78876f2a1c84 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -1654,7 +1654,8 @@ def test_truncate_on_read_only(self): class CBufferedReaderTest(BufferedReaderTest, SizeofTest): tp = io.BufferedReader - @skip_if_sanitizer(memory=True, address=True, reason= "sanitizer defaults to crashing " + @skip_if_sanitizer(memory=True, address=True, thread=True, + reason="sanitizer defaults to crashing " "instead of returning NULL for malloc failure.") def test_constructor(self): BufferedReaderTest.test_constructor(self) @@ -2021,7 +2022,8 @@ def test_slow_close_from_thread(self): class CBufferedWriterTest(BufferedWriterTest, SizeofTest): tp = io.BufferedWriter - @skip_if_sanitizer(memory=True, address=True, reason= "sanitizer defaults to crashing " + @skip_if_sanitizer(memory=True, address=True, thread=True, + reason="sanitizer defaults to crashing " "instead of returning NULL for malloc failure.") def test_constructor(self): BufferedWriterTest.test_constructor(self) @@ -2520,7 +2522,8 @@ def test_interleaved_readline_write(self): class CBufferedRandomTest(BufferedRandomTest, SizeofTest): tp = io.BufferedRandom - @skip_if_sanitizer(memory=True, address=True, reason= "sanitizer defaults to crashing " + @skip_if_sanitizer(memory=True, address=True, thread=True, + reason="sanitizer defaults to crashing " "instead of returning NULL for malloc failure.") def test_constructor(self): BufferedRandomTest.test_constructor(self) diff --git a/Misc/NEWS.d/next/Build/2023-12-17-18-23-02.gh-issue-112536.8lr3Ep.rst b/Misc/NEWS.d/next/Build/2023-12-17-18-23-02.gh-issue-112536.8lr3Ep.rst new file mode 100644 index 00000000000000..a136eb47584993 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2023-12-17-18-23-02.gh-issue-112536.8lr3Ep.rst @@ -0,0 +1 @@ +Add support for thread sanitizer (TSAN) diff --git a/configure b/configure index 3322b7a682dd25..3cc9aecafad13e 100755 --- a/configure +++ b/configure @@ -1082,6 +1082,7 @@ with_dsymutil with_address_sanitizer with_memory_sanitizer with_undefined_behavior_sanitizer +with_thread_sanitizer with_hash_algorithm with_tzpath with_libs @@ -1860,6 +1861,8 @@ Optional Packages: --with-undefined-behavior-sanitizer enable UndefinedBehaviorSanitizer undefined behaviour detector, 'ubsan' (default is no) + --with-thread-sanitizer enable ThreadSanitizer data race detector, 'tsan' + (default is no) --with-hash-algorithm=[fnv|siphash13|siphash24] select hash algorithm for use in Python/pyhash.c (default is SipHash13) @@ -12506,6 +12509,28 @@ with_ubsan="no" fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-thread-sanitizer" >&5 +printf %s "checking for --with-thread-sanitizer... " >&6; } + +# Check whether --with-thread_sanitizer was given. +if test ${with_thread_sanitizer+y} +then : + withval=$with_thread_sanitizer; +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $withval" >&5 +printf "%s\n" "$withval" >&6; } +BASECFLAGS="-fsanitize=thread $BASECFLAGS" +LDFLAGS="-fsanitize=thread $LDFLAGS" +with_tsan="yes" + +else $as_nop + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +with_tsan="no" + +fi + + # Set info about shared libraries. diff --git a/configure.ac b/configure.ac index 13a6d746763d62..6a80a5d29a04ef 100644 --- a/configure.ac +++ b/configure.ac @@ -3067,6 +3067,24 @@ AC_MSG_RESULT([no]) with_ubsan="no" ]) +AC_MSG_CHECKING([for --with-thread-sanitizer]) +AC_ARG_WITH( + [thread_sanitizer], + [AS_HELP_STRING( + [--with-thread-sanitizer], + [enable ThreadSanitizer data race detector, 'tsan' (default is no)] + )], +[ +AC_MSG_RESULT([$withval]) +BASECFLAGS="-fsanitize=thread $BASECFLAGS" +LDFLAGS="-fsanitize=thread $LDFLAGS" +with_tsan="yes" +], +[ +AC_MSG_RESULT([no]) +with_tsan="no" +]) + # Set info about shared libraries. AC_SUBST([SHLIB_SUFFIX]) AC_SUBST([LDSHARED]) From f48a1bcb2914addee971814fd014e4d8075ea6a9 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Sat, 30 Dec 2023 16:19:47 +0100 Subject: [PATCH 18/71] gh-89414: Document that SIGCLD is not available on macOS (#113580) Document that SIGCLD is not available on macOS --- Doc/library/signal.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/library/signal.rst b/Doc/library/signal.rst index 7ee5ece8859825..85a073aad233ac 100644 --- a/Doc/library/signal.rst +++ b/Doc/library/signal.rst @@ -157,6 +157,8 @@ The variables defined in the :mod:`signal` module are: Alias to :data:`SIGCHLD`. + .. availability:: not macOS. + .. data:: SIGCONT Continue the process if it is currently stopped From 471aa752415029c508693fa7971076f5148022a6 Mon Sep 17 00:00:00 2001 From: Delgan <4193924+Delgan@users.noreply.github.com> Date: Sun, 31 Dec 2023 00:18:06 +0100 Subject: [PATCH 19/71] Update ConfigParser docs defining valid section name (#110506) --- Doc/library/configparser.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index 12eee47613d186..4d0dad20287a90 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -271,7 +271,7 @@ out. Values can also span multiple lines, as long as they are indented deeper than the first line of the value. Depending on the parser's mode, blank lines may be treated as parts of multiline values or ignored. -By default, a valid section name can be any string that does not contain '\\n' or ']'. +By default, a valid section name can be any string that does not contain '\\n'. To change this, see :attr:`ConfigParser.SECTCRE`. Configuration files may include comments, prefixed by specific From 30a6d79fb8bc1ef96600c290c016720103b74b2d Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 31 Dec 2023 19:57:33 +0200 Subject: [PATCH 20/71] gh-101100: Fix Sphinx warnings in `library/configparser.rst` (#113598) Co-authored-by: Alex Waygood --- Doc/library/configparser.rst | 30 +++++++++++++++--------------- Doc/library/logging.config.rst | 6 +++--- Doc/tools/.nitignore | 1 - Doc/whatsnew/2.0.rst | 2 +- Doc/whatsnew/2.4.rst | 4 ++-- Doc/whatsnew/2.7.rst | 4 ++-- Doc/whatsnew/3.11.rst | 2 +- Doc/whatsnew/3.2.rst | 10 +++++----- Misc/NEWS.d/3.11.0a1.rst | 6 +++--- Misc/NEWS.d/3.11.0a6.rst | 2 +- Misc/NEWS.d/3.8.0a1.rst | 4 ++-- 11 files changed, 35 insertions(+), 36 deletions(-) diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index 4d0dad20287a90..0031737853e7b4 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -208,7 +208,7 @@ converters and customize the provided ones. [1]_ Fallback Values --------------- -As with a dictionary, you can use a section's :meth:`get` method to +As with a dictionary, you can use a section's :meth:`~ConfigParser.get` method to provide fallback values: .. doctest:: @@ -232,7 +232,7 @@ even if we specify a fallback: >>> topsecret.get('CompressionLevel', '3') '9' -One more thing to be aware of is that the parser-level :meth:`get` method +One more thing to be aware of is that the parser-level :meth:`~ConfigParser.get` method provides a custom, more complex interface, maintained for backwards compatibility. When using this method, a fallback value can be provided via the ``fallback`` keyword-only argument: @@ -481,7 +481,7 @@ historical background and it's very likely that you will want to customize some of the features. The most common way to change the way a specific config parser works is to use -the :meth:`__init__` options: +the :meth:`!__init__` options: * *defaults*, default value: ``None`` @@ -491,7 +491,7 @@ the :meth:`__init__` options: the documented default. Hint: if you want to specify default values for a specific section, use - :meth:`read_dict` before you read the actual file. + :meth:`~ConfigParser.read_dict` before you read the actual file. * *dict_type*, default value: :class:`dict` @@ -635,8 +635,8 @@ the :meth:`__init__` options: * *strict*, default value: ``True`` When set to ``True``, the parser will not allow for any section or option - duplicates while reading from a single source (using :meth:`read_file`, - :meth:`read_string` or :meth:`read_dict`). It is recommended to use strict + duplicates while reading from a single source (using :meth:`~ConfigParser.read_file`, + :meth:`~ConfigParser.read_string` or :meth:`~ConfigParser.read_dict`). It is recommended to use strict parsers in new applications. .. versionchanged:: 3.2 @@ -697,7 +697,7 @@ the :meth:`__init__` options: desirable, users may define them in a subclass or pass a dictionary where each key is a name of the converter and each value is a callable implementing said conversion. For instance, passing ``{'decimal': decimal.Decimal}`` would add - :meth:`getdecimal` on both the parser object and all section proxies. In + :meth:`!getdecimal` on both the parser object and all section proxies. In other words, it will be possible to write both ``parser_instance.getdecimal('section', 'key', fallback=0)`` and ``parser_instance['section'].getdecimal('key', 0)``. @@ -1062,11 +1062,11 @@ ConfigParser Objects yielding Unicode strings (for example files opened in text mode). Optional argument *source* specifies the name of the file being read. If - not given and *f* has a :attr:`name` attribute, that is used for + not given and *f* has a :attr:`!name` attribute, that is used for *source*; the default is ``''``. .. versionadded:: 3.2 - Replaces :meth:`readfp`. + Replaces :meth:`!readfp`. .. method:: read_string(string, source='') @@ -1214,7 +1214,7 @@ ConfigParser Objects .. data:: MAX_INTERPOLATION_DEPTH - The maximum depth for recursive interpolation for :meth:`get` when the *raw* + The maximum depth for recursive interpolation for :meth:`~configparser.ConfigParser.get` when the *raw* parameter is false. This is relevant only when the default *interpolation* is used. @@ -1287,13 +1287,13 @@ Exceptions .. exception:: DuplicateSectionError - Exception raised if :meth:`add_section` is called with the name of a section + Exception raised if :meth:`~ConfigParser.add_section` is called with the name of a section that is already present or in strict parsers when a section if found more than once in a single input file, string or dictionary. .. versionadded:: 3.2 Optional ``source`` and ``lineno`` attributes and arguments to - :meth:`__init__` were added. + :meth:`!__init__` were added. .. exception:: DuplicateOptionError @@ -1345,9 +1345,9 @@ Exceptions Exception raised when errors occur attempting to parse a file. -.. versionchanged:: 3.12 - The ``filename`` attribute and :meth:`__init__` constructor argument were - removed. They have been available using the name ``source`` since 3.2. + .. versionchanged:: 3.12 + The ``filename`` attribute and :meth:`!__init__` constructor argument were + removed. They have been available using the name ``source`` since 3.2. .. rubric:: Footnotes diff --git a/Doc/library/logging.config.rst b/Doc/library/logging.config.rst index 85a53e6aa7a78b..85a68cb11ee22c 100644 --- a/Doc/library/logging.config.rst +++ b/Doc/library/logging.config.rst @@ -93,8 +93,8 @@ in :mod:`logging` itself) and defining handlers which are declared either in :param fname: A filename, or a file-like object, or an instance derived from :class:`~configparser.RawConfigParser`. If a - ``RawConfigParser``-derived instance is passed, it is used as - is. Otherwise, a :class:`~configparser.Configparser` is + :class:`!RawConfigParser`-derived instance is passed, it is used as + is. Otherwise, a :class:`~configparser.ConfigParser` is instantiated, and the configuration read by it from the object passed in ``fname``. If that has a :meth:`readline` method, it is assumed to be a file-like object and read using @@ -103,7 +103,7 @@ in :mod:`logging` itself) and defining handlers which are declared either in :meth:`~configparser.ConfigParser.read`. - :param defaults: Defaults to be passed to the ConfigParser can be specified + :param defaults: Defaults to be passed to the :class:`!ConfigParser` can be specified in this argument. :param disable_existing_loggers: If specified as ``False``, loggers which diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index ab6baf819de97a..05df332fa7c9a8 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -33,7 +33,6 @@ Doc/library/asyncio-task.rst Doc/library/bdb.rst Doc/library/collections.rst Doc/library/concurrent.futures.rst -Doc/library/configparser.rst Doc/library/csv.rst Doc/library/datetime.rst Doc/library/dbm.rst diff --git a/Doc/whatsnew/2.0.rst b/Doc/whatsnew/2.0.rst index 6d6e51006d5bd8..b0e495b0651789 100644 --- a/Doc/whatsnew/2.0.rst +++ b/Doc/whatsnew/2.0.rst @@ -1030,7 +1030,7 @@ Module changes Lots of improvements and bugfixes were made to Python's extensive standard library; some of the affected modules include :mod:`readline`, -:mod:`ConfigParser`, :mod:`!cgi`, :mod:`calendar`, :mod:`posix`, :mod:`readline`, +:mod:`ConfigParser `, :mod:`!cgi`, :mod:`calendar`, :mod:`posix`, :mod:`readline`, :mod:`xmllib`, :mod:`!aifc`, :mod:`!chunk`, :mod:`wave`, :mod:`random`, :mod:`shelve`, and :mod:`!nntplib`. Consult the CVS logs for the exact patch-by-patch details. diff --git a/Doc/whatsnew/2.4.rst b/Doc/whatsnew/2.4.rst index bc748dd44f5f8e..6df59dd245ff55 100644 --- a/Doc/whatsnew/2.4.rst +++ b/Doc/whatsnew/2.4.rst @@ -1052,9 +1052,9 @@ complete list of changes, or look through the CVS logs for all the details. advantage of :class:`collections.deque` for improved performance. (Contributed by Raymond Hettinger.) -* The :mod:`ConfigParser` classes have been enhanced slightly. The :meth:`read` +* The :mod:`ConfigParser ` classes have been enhanced slightly. The :meth:`~configparser.ConfigParser.read` method now returns a list of the files that were successfully parsed, and the - :meth:`set` method raises :exc:`TypeError` if passed a *value* argument that + :meth:`~configparser.ConfigParser.set` method raises :exc:`TypeError` if passed a *value* argument that isn't a string. (Contributed by John Belmonte and David Goodger.) * The :mod:`curses` module now supports the ncurses extension diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index 5af700bd5d3506..81fe132d50e1f1 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -287,7 +287,7 @@ remains O(1). The standard library now supports use of ordered dictionaries in several modules. -* The :mod:`ConfigParser` module uses them by default, meaning that +* The :mod:`ConfigParser ` module uses them by default, meaning that configuration files can now be read, modified, and then written back in their original order. @@ -1134,7 +1134,7 @@ changes, or look through the Subversion logs for all the details. another type that isn't a :class:`Mapping`. (Fixed by Daniel Stutzbach; :issue:`8729`.) -* Constructors for the parsing classes in the :mod:`ConfigParser` module now +* Constructors for the parsing classes in the :mod:`ConfigParser ` module now take an *allow_no_value* parameter, defaulting to false; if true, options without values will be allowed. For example:: diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 8db133b90a7a4b..cae5a26bae1148 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -1773,7 +1773,7 @@ Standard Library * the :class:`!configparser.SafeConfigParser` class * the :attr:`!configparser.ParsingError.filename` property - * the :meth:`configparser.RawConfigParser.readfp` method + * the :meth:`!configparser.RawConfigParser.readfp` method (Contributed by Hugo van Kemenade in :issue:`45173`.) diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index 5ef76968e9d86b..aad196478dd38b 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -183,7 +183,7 @@ PEP 391: Dictionary Based Configuration for Logging The :mod:`logging` module provided two kinds of configuration, one style with function calls for each option or another style driven by an external file saved -in a :mod:`ConfigParser` format. Those options did not provide the flexibility +in a :mod:`configparser` format. Those options did not provide the flexibility to create configurations from JSON or YAML files, nor did they support incremental configuration, which is needed for specifying logger options from a command line. @@ -2134,7 +2134,7 @@ configparser The :mod:`configparser` module was modified to improve usability and predictability of the default parser and its supported INI syntax. The old -:class:`ConfigParser` class was removed in favor of :class:`SafeConfigParser` +:class:`!ConfigParser` class was removed in favor of :class:`!SafeConfigParser` which has in turn been renamed to :class:`~configparser.ConfigParser`. Support for inline comments is now turned off by default and section or option duplicates are not allowed in a single configuration source. @@ -2414,7 +2414,7 @@ when one operand is much larger than the other (patch by Andress Bennetts in (:issue:`1569291` by Alexander Belopolsky). The :class:`BaseHTTPRequestHandler` has more efficient buffering (:issue:`3709` by Andrew Schaaf). The :func:`operator.attrgetter` function has been sped-up (:issue:`10160` by -Christos Georgiou). And :class:`ConfigParser` loads multi-line arguments a bit +Christos Georgiou). And :class:`~configparser.ConfigParser` loads multi-line arguments a bit faster (:issue:`7113` by Łukasz Langa). @@ -2614,8 +2614,8 @@ This section lists previously described changes and other bugfixes that may require changes to your code: * The :mod:`configparser` module has a number of clean-ups. The major change is - to replace the old :class:`ConfigParser` class with long-standing preferred - alternative :class:`SafeConfigParser`. In addition there are a number of + to replace the old :class:`!ConfigParser` class with long-standing preferred + alternative :class:`!SafeConfigParser`. In addition there are a number of smaller incompatibilities: * The interpolation syntax is now validated on diff --git a/Misc/NEWS.d/3.11.0a1.rst b/Misc/NEWS.d/3.11.0a1.rst index 26c44b6c1af0ed..1c96c0760a57b2 100644 --- a/Misc/NEWS.d/3.11.0a1.rst +++ b/Misc/NEWS.d/3.11.0a1.rst @@ -1642,9 +1642,9 @@ interval specified with nanosecond precision. .. nonce: UptGAn .. section: Library -Remove from the :mod:`configparser` module: the :class:`SafeConfigParser` -class, the :attr:`filename` property of the :class:`ParsingError` class, the -:meth:`readfp` method of the :class:`ConfigParser` class, deprecated since +Remove from the :mod:`configparser` module: the :class:`!SafeConfigParser` +class, the :attr:`!filename` property of the :class:`~configparser.ParsingError` class, the +:meth:`!readfp` method of the :class:`~configparser.ConfigParser` class, deprecated since Python 3.2. Patch by Hugo van Kemenade. diff --git a/Misc/NEWS.d/3.11.0a6.rst b/Misc/NEWS.d/3.11.0a6.rst index 52055b3fafd485..974d025c631a45 100644 --- a/Misc/NEWS.d/3.11.0a6.rst +++ b/Misc/NEWS.d/3.11.0a6.rst @@ -941,7 +941,7 @@ uvloop library. Make the :class:`configparser.ConfigParser` constructor raise :exc:`TypeError` if the ``interpolation`` parameter is not of type -:class:`configparser.Interpolation` +:class:`!configparser.Interpolation` .. diff --git a/Misc/NEWS.d/3.8.0a1.rst b/Misc/NEWS.d/3.8.0a1.rst index 2b9dbd5d63a87e..99f408661d9f69 100644 --- a/Misc/NEWS.d/3.8.0a1.rst +++ b/Misc/NEWS.d/3.8.0a1.rst @@ -5044,8 +5044,8 @@ functionality. .. nonce: C_K-J9 .. section: Library -`ConfigParser.items()` was fixed so that key-value pairs passed in via -`vars` are not included in the resulting output. +``ConfigParser.items()`` was fixed so that key-value pairs passed in via +:func:`vars` are not included in the resulting output. .. From 2849cbb53afc8c6a4465f1b3490c67c2455caf6f Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Sun, 31 Dec 2023 23:16:33 +0000 Subject: [PATCH 21/71] gh-101578: [doc] mention that PyErr_GetRaisedException returns NULL when the error indicator is not set (#113369) --- Doc/c-api/exceptions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 284a9c71e420da..c7e3cd9463e5d7 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -440,7 +440,7 @@ Querying the error indicator .. c:function:: PyObject *PyErr_GetRaisedException(void) Return the exception currently being raised, clearing the error indicator at - the same time. + the same time. Return ``NULL`` if the error indicator is not set. This function is used by code that needs to catch exceptions, or code that needs to save and restore the error indicator temporarily. From 9ce6c01e38a2fc7a5ce832f1f8c8d9097132556d Mon Sep 17 00:00:00 2001 From: Parth Doshi Date: Mon, 1 Jan 2024 00:08:05 -0800 Subject: [PATCH 22/71] # gh-111700: Fix syntax highlighting for C code in the "What's New In Python 3.12" documentation (#113609) Fix PEP 684 syntax highlighting in what's new Python 3.12 --- Doc/whatsnew/3.12.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 8551b35438e2c3..9a2ccf7ebc6a68 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -343,7 +343,9 @@ cores. This is currently only available through the C-API, though a Python API is :pep:`anticipated for 3.13 <554>`. Use the new :c:func:`Py_NewInterpreterFromConfig` function to -create an interpreter with its own GIL:: +create an interpreter with its own GIL: + +.. code-block:: c PyInterpreterConfig config = { .check_multi_interp_extensions = 1, From 686d65aec1fa47ccc0e20f60d17c1b961183f8ea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 09:25:58 +0000 Subject: [PATCH 23/71] build(deps): bump actions/setup-python from 4 to 5 (#113612) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 2 +- .github/workflows/lint.yml | 2 +- .github/workflows/mypy.yml | 2 +- .github/workflows/reusable-docs.yml | 4 ++-- .github/workflows/verify-ensurepip-wheels.yml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cfb36c8c32e18d..9f62c48b371902 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -128,7 +128,7 @@ jobs: if: needs.check_source.outputs.run_tests == 'true' steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.x' - name: Restore config.cache diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 6c1c29a58cf4fc..4a70ec6205a05b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,7 +20,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.x" - uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 792903a90a4880..11928e72b9b43a 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -46,7 +46,7 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.11" cache: pip diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 1c4fa4239c1e34..e534751ee1011d 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -41,7 +41,7 @@ jobs: git fetch origin ${{ env.refspec_base }} --shallow-since="${DATE}" \ --no-tags --prune --no-recurse-submodules - name: 'Set up Python' - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3' cache: 'pip' @@ -72,7 +72,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: 'Set up Python' - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' # known to work with Sphinx 4.2 cache: 'pip' diff --git a/.github/workflows/verify-ensurepip-wheels.yml b/.github/workflows/verify-ensurepip-wheels.yml index 4a545037bf6e2b..83b007f1c9c2ef 100644 --- a/.github/workflows/verify-ensurepip-wheels.yml +++ b/.github/workflows/verify-ensurepip-wheels.yml @@ -26,7 +26,7 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3' - name: Compare checksum of bundled wheels to the ones published on PyPI From 9132f4287bf022a2fa79b2cc5f130df5188801ed Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 09:30:30 +0000 Subject: [PATCH 24/71] build(deps): bump github/codeql-action from 2 to 3 (#113613) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2 to 3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v2...v3) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9f62c48b371902..e8b44a7c6952a4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -490,7 +490,7 @@ jobs: path: ./out/artifacts - name: Upload SARIF if: always() && steps.build.outcome == 'success' - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: cifuzz-sarif/results.sarif checkout_path: cifuzz-sarif From 4036e48d59b0f9e8057e01458ab7df3dfd323a10 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 09:42:44 +0000 Subject: [PATCH 25/71] build(deps): bump hypothesis from 6.91.0 to 6.92.2 in /Tools (#113615) Bumps [hypothesis](https://github.com/HypothesisWorks/hypothesis) from 6.91.0 to 6.92.2. - [Release notes](https://github.com/HypothesisWorks/hypothesis/releases) - [Commits](https://github.com/HypothesisWorks/hypothesis/compare/hypothesis-python-6.91.0...hypothesis-python-6.92.2) --- updated-dependencies: - dependency-name: hypothesis dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Tools/requirements-hypothesis.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index 1bca5d2367f4b2..0e6e16ae198162 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,4 +1,4 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. -hypothesis==6.91.0 +hypothesis==6.92.2 From 5f3cc90a12d6df404fd6f48a0df1334902e271f2 Mon Sep 17 00:00:00 2001 From: Jeffrey Kintscher <49998481+websurfer5@users.noreply.github.com> Date: Mon, 1 Jan 2024 08:24:24 -0800 Subject: [PATCH 26/71] gh-62260: Fix ctypes.Structure subclassing with multiple layers (GH-13374) The length field of StgDictObject for Structure class contains now the total number of items in ffi_type_pointer.elements (excluding the trailing null). The old behavior of using the number of elements in the parent class can cause the array to be truncated when it is copied, especially when there are multiple layers of subclassing. Co-authored-by: Serhiy Storchaka --- Lib/test/test_ctypes/test_structures.py | 63 ++++++++++++++++++- .../2019-05-17-07-22-33.bpo-18060.5mqTQM.rst | 2 + Modules/_ctypes/_ctypes.c | 10 +-- Modules/_ctypes/stgdict.c | 2 +- 4 files changed, 70 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-05-17-07-22-33.bpo-18060.5mqTQM.rst diff --git a/Lib/test/test_ctypes/test_structures.py b/Lib/test/test_ctypes/test_structures.py index 21039f04947507..3eafc77ca70aea 100644 --- a/Lib/test/test_ctypes/test_structures.py +++ b/Lib/test/test_ctypes/test_structures.py @@ -1,5 +1,5 @@ import _ctypes_test -import platform +from platform import architecture as _architecture import struct import sys import unittest @@ -8,6 +8,7 @@ c_uint8, c_uint16, c_uint32, c_short, c_ushort, c_int, c_uint, c_long, c_ulong, c_longlong, c_ulonglong, c_float, c_double) +from ctypes.util import find_library from struct import calcsize from collections import namedtuple from test import support @@ -472,6 +473,66 @@ class X(Structure): self.assertEqual(s.first, got.first) self.assertEqual(s.second, got.second) + def _test_issue18060(self, Vector): + # The call to atan2() should succeed if the + # class fields were correctly cloned in the + # subclasses. Otherwise, it will segfault. + if sys.platform == 'win32': + libm = CDLL(find_library('msvcrt.dll')) + else: + libm = CDLL(find_library('m')) + + libm.atan2.argtypes = [Vector] + libm.atan2.restype = c_double + + arg = Vector(y=0.0, x=-1.0) + self.assertAlmostEqual(libm.atan2(arg), 3.141592653589793) + + @unittest.skipIf(_architecture() == ('64bit', 'WindowsPE'), "can't test Windows x64 build") + @unittest.skipUnless(sys.byteorder == 'little', "can't test on this platform") + def test_issue18060_a(self): + # This test case calls + # PyCStructUnionType_update_stgdict() for each + # _fields_ assignment, and PyCStgDict_clone() + # for the Mid and Vector class definitions. + class Base(Structure): + _fields_ = [('y', c_double), + ('x', c_double)] + class Mid(Base): + pass + Mid._fields_ = [] + class Vector(Mid): pass + self._test_issue18060(Vector) + + @unittest.skipIf(_architecture() == ('64bit', 'WindowsPE'), "can't test Windows x64 build") + @unittest.skipUnless(sys.byteorder == 'little', "can't test on this platform") + def test_issue18060_b(self): + # This test case calls + # PyCStructUnionType_update_stgdict() for each + # _fields_ assignment. + class Base(Structure): + _fields_ = [('y', c_double), + ('x', c_double)] + class Mid(Base): + _fields_ = [] + class Vector(Mid): + _fields_ = [] + self._test_issue18060(Vector) + + @unittest.skipIf(_architecture() == ('64bit', 'WindowsPE'), "can't test Windows x64 build") + @unittest.skipUnless(sys.byteorder == 'little', "can't test on this platform") + def test_issue18060_c(self): + # This test case calls + # PyCStructUnionType_update_stgdict() for each + # _fields_ assignment. + class Base(Structure): + _fields_ = [('y', c_double)] + class Mid(Base): + _fields_ = [] + class Vector(Mid): + _fields_ = [('x', c_double)] + self._test_issue18060(Vector) + def test_array_in_struct(self): # See bpo-22273 diff --git a/Misc/NEWS.d/next/Library/2019-05-17-07-22-33.bpo-18060.5mqTQM.rst b/Misc/NEWS.d/next/Library/2019-05-17-07-22-33.bpo-18060.5mqTQM.rst new file mode 100644 index 00000000000000..3fefbc3efb63c0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-05-17-07-22-33.bpo-18060.5mqTQM.rst @@ -0,0 +1,2 @@ +Fixed a class inheritance issue that can cause segfaults when deriving two or more levels of subclasses from a base class of Structure or Union. + diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index f909a9496b6526..fc16b9176fd1c0 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -4354,10 +4354,10 @@ _init_pos_args(PyObject *self, PyTypeObject *type, return index; } - for (i = 0; - i < dict->length && (i+index) < PyTuple_GET_SIZE(args); + for (i = index; + i < dict->length && i < PyTuple_GET_SIZE(args); ++i) { - PyObject *pair = PySequence_GetItem(fields, i); + PyObject *pair = PySequence_GetItem(fields, i - index); PyObject *name, *val; int res; if (!pair) @@ -4367,7 +4367,7 @@ _init_pos_args(PyObject *self, PyTypeObject *type, Py_DECREF(pair); return -1; } - val = PyTuple_GET_ITEM(args, i + index); + val = PyTuple_GET_ITEM(args, i); if (kwds) { res = PyDict_Contains(kwds, name); if (res != 0) { @@ -4388,7 +4388,7 @@ _init_pos_args(PyObject *self, PyTypeObject *type, if (res == -1) return -1; } - return index + dict->length; + return dict->length; } static int diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index dfdb96b0e7258a..fb3e20e8db3e27 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -695,7 +695,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct stgdict->size = aligned_size; stgdict->align = total_align; - stgdict->length = len; /* ADD ffi_ofs? */ + stgdict->length = ffi_ofs + len; /* * The value of MAX_STRUCT_SIZE depends on the platform Python is running on. From d0b0e3d2eff30f699c620bc87c4dadd8cd4a77d5 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Mon, 1 Jan 2024 19:38:29 +0100 Subject: [PATCH 27/71] gh-113536: Expose `os.waitid` on macOS (#113542) * gh-113536: Expose `os.waitid` on macOS This API has been available on macOS for a long time, but was explicitly excluded due to unspecified problems with the API in ancient versions of macOS. * Document that the API is available on macOS starting in Python 3.13 --- Doc/library/os.rst | 6 +++--- .../2023-12-28-12-18-39.gh-issue-113536.0ythg7.rst | 1 + Modules/clinic/posixmodule.c.h | 6 +++--- Modules/posixmodule.c | 14 +++++++------- 4 files changed, 14 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/macOS/2023-12-28-12-18-39.gh-issue-113536.0ythg7.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 2af61f2960cc63..637191f2980a05 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -4986,11 +4986,11 @@ written in Python, such as a mail server's external command delivery program. .. availability:: Unix, not Emscripten, not WASI. - .. note:: - This function is not available on macOS. - .. versionadded:: 3.3 + .. versionchanged:: 3.13 + This function is now available on macOS as well. + .. function:: waitpid(pid, options, /) diff --git a/Misc/NEWS.d/next/macOS/2023-12-28-12-18-39.gh-issue-113536.0ythg7.rst b/Misc/NEWS.d/next/macOS/2023-12-28-12-18-39.gh-issue-113536.0ythg7.rst new file mode 100644 index 00000000000000..828b872d283627 --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2023-12-28-12-18-39.gh-issue-113536.0ythg7.rst @@ -0,0 +1 @@ +:func:`os.waitid` is now available on macOS diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index b7639af4b78a9d..ba3e1cfa8dbc21 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -5467,7 +5467,7 @@ os_wait4(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw #endif /* defined(HAVE_WAIT4) */ -#if (defined(HAVE_WAITID) && !defined(__APPLE__)) +#if defined(HAVE_WAITID) PyDoc_STRVAR(os_waitid__doc__, "waitid($module, idtype, id, options, /)\n" @@ -5510,7 +5510,7 @@ os_waitid(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } -#endif /* (defined(HAVE_WAITID) && !defined(__APPLE__)) */ +#endif /* defined(HAVE_WAITID) */ #if defined(HAVE_WAITPID) @@ -12422,4 +12422,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=b82391c4f58231b6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=18c128534c355d84 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f4a18536e8f1e1..39b1f3cb7b2b9b 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1024,7 +1024,7 @@ typedef struct { PyObject *TerminalSizeType; PyObject *TimesResultType; PyObject *UnameResultType; -#if defined(HAVE_WAITID) && !defined(__APPLE__) +#if defined(HAVE_WAITID) PyObject *WaitidResultType; #endif #if defined(HAVE_WAIT3) || defined(HAVE_WAIT4) @@ -2292,7 +2292,7 @@ static PyStructSequence_Desc statvfs_result_desc = { 10 }; -#if defined(HAVE_WAITID) && !defined(__APPLE__) +#if defined(HAVE_WAITID) PyDoc_STRVAR(waitid_result__doc__, "waitid_result: Result from waitid.\n\n\ This object may be accessed either as a tuple of\n\ @@ -2367,7 +2367,7 @@ _posix_clear(PyObject *module) Py_CLEAR(state->TerminalSizeType); Py_CLEAR(state->TimesResultType); Py_CLEAR(state->UnameResultType); -#if defined(HAVE_WAITID) && !defined(__APPLE__) +#if defined(HAVE_WAITID) Py_CLEAR(state->WaitidResultType); #endif #if defined(HAVE_WAIT3) || defined(HAVE_WAIT4) @@ -2392,7 +2392,7 @@ _posix_traverse(PyObject *module, visitproc visit, void *arg) Py_VISIT(state->TerminalSizeType); Py_VISIT(state->TimesResultType); Py_VISIT(state->UnameResultType); -#if defined(HAVE_WAITID) && !defined(__APPLE__) +#if defined(HAVE_WAITID) Py_VISIT(state->WaitidResultType); #endif #if defined(HAVE_WAIT3) || defined(HAVE_WAIT4) @@ -9518,7 +9518,7 @@ os_wait4_impl(PyObject *module, pid_t pid, int options) #endif /* HAVE_WAIT4 */ -#if defined(HAVE_WAITID) && !defined(__APPLE__) +#if defined(HAVE_WAITID) /*[clinic input] os.waitid @@ -9575,7 +9575,7 @@ os_waitid_impl(PyObject *module, idtype_t idtype, id_t id, int options) return result; } -#endif /* defined(HAVE_WAITID) && !defined(__APPLE__) */ +#endif /* defined(HAVE_WAITID) */ #if defined(HAVE_WAITPID) @@ -17309,7 +17309,7 @@ posixmodule_exec(PyObject *m) return -1; } -#if defined(HAVE_WAITID) && !defined(__APPLE__) +#if defined(HAVE_WAITID) waitid_result_desc.name = MODNAME ".waitid_result"; state->WaitidResultType = (PyObject *)PyStructSequence_NewType(&waitid_result_desc); if (PyModule_AddObjectRef(m, "waitid_result", state->WaitidResultType) < 0) { From b4b2cc101216ae1017898dfbe43c90da2fd0a308 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 2 Jan 2024 02:51:24 +0800 Subject: [PATCH 28/71] gh-53502: add a new option aware_datetime in plistlib to loads or dumps aware datetime. (#113363) * add options to loads and dumps aware datetime in plistlib --- Doc/library/plistlib.rst | 22 +++++- Lib/plistlib.py | 60 ++++++++++----- Lib/test/test_plistlib.py | 73 +++++++++++++++++++ ...3-12-21-23-47-42.gh-issue-53502.dercJI.rst | 2 + 4 files changed, 134 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-21-23-47-42.gh-issue-53502.dercJI.rst diff --git a/Doc/library/plistlib.rst b/Doc/library/plistlib.rst index 732ef3536863cc..10f1a48fc70a72 100644 --- a/Doc/library/plistlib.rst +++ b/Doc/library/plistlib.rst @@ -52,7 +52,7 @@ or :class:`datetime.datetime` objects. This module defines the following functions: -.. function:: load(fp, *, fmt=None, dict_type=dict) +.. function:: load(fp, *, fmt=None, dict_type=dict, aware_datetime=False) Read a plist file. *fp* should be a readable and binary file object. Return the unpacked root object (which usually is a @@ -69,6 +69,10 @@ This module defines the following functions: The *dict_type* is the type used for dictionaries that are read from the plist file. + When *aware_datetime* is true, fields with type ``datetime.datetime`` will + be created as :ref:`aware object `, with + :attr:`!tzinfo` as :attr:`datetime.UTC`. + XML data for the :data:`FMT_XML` format is parsed using the Expat parser from :mod:`xml.parsers.expat` -- see its documentation for possible exceptions on ill-formed XML. Unknown elements will simply be ignored @@ -79,8 +83,11 @@ This module defines the following functions: .. versionadded:: 3.4 + .. versionchanged:: 3.13 + The keyword-only parameter *aware_datetime* has been added. + -.. function:: loads(data, *, fmt=None, dict_type=dict) +.. function:: loads(data, *, fmt=None, dict_type=dict, aware_datetime=False) Load a plist from a bytes object. See :func:`load` for an explanation of the keyword arguments. @@ -88,7 +95,7 @@ This module defines the following functions: .. versionadded:: 3.4 -.. function:: dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False) +.. function:: dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False, aware_datetime=False) Write *value* to a plist file. *Fp* should be a writable, binary file object. @@ -107,6 +114,10 @@ This module defines the following functions: When *skipkeys* is false (the default) the function raises :exc:`TypeError` when a key of a dictionary is not a string, otherwise such keys are skipped. + When *aware_datetime* is true and any field with type ``datetime.datetime`` + is set as a :ref:`aware object `, it will convert to + UTC timezone before writing it. + A :exc:`TypeError` will be raised if the object is of an unsupported type or a container that contains objects of unsupported types. @@ -115,8 +126,11 @@ This module defines the following functions: .. versionadded:: 3.4 + .. versionchanged:: 3.13 + The keyword-only parameter *aware_datetime* has been added. + -.. function:: dumps(value, *, fmt=FMT_XML, sort_keys=True, skipkeys=False) +.. function:: dumps(value, *, fmt=FMT_XML, sort_keys=True, skipkeys=False, aware_datetime=False) Return *value* as a plist-formatted bytes object. See the documentation for :func:`dump` for an explanation of the keyword diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 3292c30d5fb29b..0fc1b5cbfa8c49 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -140,7 +140,7 @@ def _decode_base64(s): _dateParser = re.compile(r"(?P\d\d\d\d)(?:-(?P\d\d)(?:-(?P\d\d)(?:T(?P\d\d)(?::(?P\d\d)(?::(?P\d\d))?)?)?)?)?Z", re.ASCII) -def _date_from_string(s): +def _date_from_string(s, aware_datetime): order = ('year', 'month', 'day', 'hour', 'minute', 'second') gd = _dateParser.match(s).groupdict() lst = [] @@ -149,10 +149,14 @@ def _date_from_string(s): if val is None: break lst.append(int(val)) + if aware_datetime: + return datetime.datetime(*lst, tzinfo=datetime.UTC) return datetime.datetime(*lst) -def _date_to_string(d): +def _date_to_string(d, aware_datetime): + if aware_datetime: + d = d.astimezone(datetime.UTC) return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( d.year, d.month, d.day, d.hour, d.minute, d.second @@ -171,11 +175,12 @@ def _escape(text): return text class _PlistParser: - def __init__(self, dict_type): + def __init__(self, dict_type, aware_datetime=False): self.stack = [] self.current_key = None self.root = None self._dict_type = dict_type + self._aware_datetime = aware_datetime def parse(self, fileobj): self.parser = ParserCreate() @@ -277,7 +282,8 @@ def end_data(self): self.add_object(_decode_base64(self.get_data())) def end_date(self): - self.add_object(_date_from_string(self.get_data())) + self.add_object(_date_from_string(self.get_data(), + aware_datetime=self._aware_datetime)) class _DumbXMLWriter: @@ -321,13 +327,14 @@ def writeln(self, line): class _PlistWriter(_DumbXMLWriter): def __init__( self, file, indent_level=0, indent=b"\t", writeHeader=1, - sort_keys=True, skipkeys=False): + sort_keys=True, skipkeys=False, aware_datetime=False): if writeHeader: file.write(PLISTHEADER) _DumbXMLWriter.__init__(self, file, indent_level, indent) self._sort_keys = sort_keys self._skipkeys = skipkeys + self._aware_datetime = aware_datetime def write(self, value): self.writeln("") @@ -360,7 +367,8 @@ def write_value(self, value): self.write_bytes(value) elif isinstance(value, datetime.datetime): - self.simple_element("date", _date_to_string(value)) + self.simple_element("date", + _date_to_string(value, self._aware_datetime)) elif isinstance(value, (tuple, list)): self.write_array(value) @@ -461,8 +469,9 @@ class _BinaryPlistParser: see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c """ - def __init__(self, dict_type): + def __init__(self, dict_type, aware_datetime=False): self._dict_type = dict_type + self._aware_datime = aware_datetime def parse(self, fp): try: @@ -556,8 +565,11 @@ def _read_object(self, ref): f = struct.unpack('>d', self._fp.read(8))[0] # timestamp 0 of binary plists corresponds to 1/1/2001 # (year of Mac OS X 10.0), instead of 1/1/1970. - result = (datetime.datetime(2001, 1, 1) + - datetime.timedelta(seconds=f)) + if self._aware_datime: + epoch = datetime.datetime(2001, 1, 1, tzinfo=datetime.UTC) + else: + epoch = datetime.datetime(2001, 1, 1) + result = epoch + datetime.timedelta(seconds=f) elif tokenH == 0x40: # data s = self._get_size(tokenL) @@ -629,10 +641,11 @@ def _count_to_size(count): _scalars = (str, int, float, datetime.datetime, bytes) class _BinaryPlistWriter (object): - def __init__(self, fp, sort_keys, skipkeys): + def __init__(self, fp, sort_keys, skipkeys, aware_datetime=False): self._fp = fp self._sort_keys = sort_keys self._skipkeys = skipkeys + self._aware_datetime = aware_datetime def write(self, value): @@ -778,7 +791,12 @@ def _write_object(self, value): self._fp.write(struct.pack('>Bd', 0x23, value)) elif isinstance(value, datetime.datetime): - f = (value - datetime.datetime(2001, 1, 1)).total_seconds() + if self._aware_datetime: + dt = value.astimezone(datetime.UTC) + offset = dt - datetime.datetime(2001, 1, 1, tzinfo=datetime.UTC) + f = offset.total_seconds() + else: + f = (value - datetime.datetime(2001, 1, 1)).total_seconds() self._fp.write(struct.pack('>Bd', 0x33, f)) elif isinstance(value, (bytes, bytearray)): @@ -862,7 +880,7 @@ def _is_fmt_binary(header): } -def load(fp, *, fmt=None, dict_type=dict): +def load(fp, *, fmt=None, dict_type=dict, aware_datetime=False): """Read a .plist file. 'fp' should be a readable and binary file object. Return the unpacked root object (which usually is a dictionary). """ @@ -880,32 +898,36 @@ def load(fp, *, fmt=None, dict_type=dict): else: P = _FORMATS[fmt]['parser'] - p = P(dict_type=dict_type) + p = P(dict_type=dict_type, aware_datetime=aware_datetime) return p.parse(fp) -def loads(value, *, fmt=None, dict_type=dict): +def loads(value, *, fmt=None, dict_type=dict, aware_datetime=False): """Read a .plist file from a bytes object. Return the unpacked root object (which usually is a dictionary). """ fp = BytesIO(value) - return load(fp, fmt=fmt, dict_type=dict_type) + return load(fp, fmt=fmt, dict_type=dict_type, aware_datetime=aware_datetime) -def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False): +def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False, + aware_datetime=False): """Write 'value' to a .plist file. 'fp' should be a writable, binary file object. """ if fmt not in _FORMATS: raise ValueError("Unsupported format: %r"%(fmt,)) - writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys) + writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys, + aware_datetime=aware_datetime) writer.write(value) -def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True): +def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True, + aware_datetime=False): """Return a bytes object with the contents for a .plist file. """ fp = BytesIO() - dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys) + dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys, + aware_datetime=aware_datetime) return fp.getvalue() diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index b08ababa341cfe..d41975f1b17184 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -13,6 +13,8 @@ import subprocess import binascii import collections +import time +import zoneinfo from test import support from test.support import os_helper from io import BytesIO @@ -838,6 +840,54 @@ def test_xml_plist_with_entity_decl(self): "XML entity declarations are not supported"): plistlib.loads(XML_PLIST_WITH_ENTITY, fmt=plistlib.FMT_XML) + def test_load_aware_datetime(self): + dt = plistlib.loads(b"2023-12-10T08:03:30Z", + aware_datetime=True) + self.assertEqual(dt.tzinfo, datetime.UTC) + + @unittest.skipUnless("America/Los_Angeles" in zoneinfo.available_timezones(), + "Can't find timezone datebase") + def test_dump_aware_datetime(self): + dt = datetime.datetime(2345, 6, 7, 8, 9, 10, + tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles")) + for fmt in ALL_FORMATS: + s = plistlib.dumps(dt, fmt=fmt, aware_datetime=True) + loaded_dt = plistlib.loads(s, fmt=fmt, aware_datetime=True) + self.assertEqual(loaded_dt.tzinfo, datetime.UTC) + self.assertEqual(loaded_dt, dt) + + def test_dump_utc_aware_datetime(self): + dt = datetime.datetime(2345, 6, 7, 8, 9, 10, tzinfo=datetime.UTC) + for fmt in ALL_FORMATS: + s = plistlib.dumps(dt, fmt=fmt, aware_datetime=True) + loaded_dt = plistlib.loads(s, fmt=fmt, aware_datetime=True) + self.assertEqual(loaded_dt.tzinfo, datetime.UTC) + self.assertEqual(loaded_dt, dt) + + @unittest.skipUnless("America/Los_Angeles" in zoneinfo.available_timezones(), + "Can't find timezone datebase") + def test_dump_aware_datetime_without_aware_datetime_option(self): + dt = datetime.datetime(2345, 6, 7, 8, + tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles")) + s = plistlib.dumps(dt, fmt=plistlib.FMT_XML, aware_datetime=False) + self.assertIn(b"2345-06-07T08:00:00Z", s) + + def test_dump_utc_aware_datetime_without_aware_datetime_option(self): + dt = datetime.datetime(2345, 6, 7, 8, tzinfo=datetime.UTC) + s = plistlib.dumps(dt, fmt=plistlib.FMT_XML, aware_datetime=False) + self.assertIn(b"2345-06-07T08:00:00Z", s) + + def test_dump_naive_datetime_with_aware_datetime_option(self): + # Save a naive datetime with aware_datetime set to true. This will lead + # to having different time as compared to the current machine's + # timezone, which is UTC. + dt = datetime.datetime(2345, 6, 7, 8, tzinfo=None) + for fmt in ALL_FORMATS: + s = plistlib.dumps(dt, fmt=fmt, aware_datetime=True) + parsed = plistlib.loads(s, aware_datetime=False) + expected = dt + datetime.timedelta(seconds=time.timezone) + self.assertEqual(parsed, expected) + class TestBinaryPlistlib(unittest.TestCase): @@ -962,6 +1012,28 @@ def test_invalid_binary(self): with self.assertRaises(plistlib.InvalidFileException): plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY) + def test_load_aware_datetime(self): + data = (b'bplist003B\x04>\xd0d\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00' + b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11') + self.assertEqual(plistlib.loads(data, aware_datetime=True), + datetime.datetime(2345, 6, 7, 8, tzinfo=datetime.UTC)) + + @unittest.skipUnless("America/Los_Angeles" in zoneinfo.available_timezones(), + "Can't find timezone datebase") + def test_dump_aware_datetime_without_aware_datetime_option(self): + dt = datetime.datetime(2345, 6, 7, 8, + tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles")) + msg = "can't subtract offset-naive and offset-aware datetimes" + with self.assertRaisesRegex(TypeError, msg): + plistlib.dumps(dt, fmt=plistlib.FMT_BINARY, aware_datetime=False) + + def test_dump_utc_aware_datetime_without_aware_datetime_option(self): + dt = datetime.datetime(2345, 6, 7, 8, tzinfo=datetime.UTC) + msg = "can't subtract offset-naive and offset-aware datetimes" + with self.assertRaisesRegex(TypeError, msg): + plistlib.dumps(dt, fmt=plistlib.FMT_BINARY, aware_datetime=False) + class TestKeyedArchive(unittest.TestCase): def test_keyed_archive_data(self): @@ -1072,5 +1144,6 @@ def test_octal_and_hex(self): self.assertEqual(p.get("HexType"), 16777228) self.assertEqual(p.get("IntType"), 83) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2023-12-21-23-47-42.gh-issue-53502.dercJI.rst b/Misc/NEWS.d/next/Library/2023-12-21-23-47-42.gh-issue-53502.dercJI.rst new file mode 100644 index 00000000000000..aa7274161d4166 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-21-23-47-42.gh-issue-53502.dercJI.rst @@ -0,0 +1,2 @@ +Add a new option ``aware_datetime`` in :mod:`plistlib` to loads or dumps +aware datetime. From 8e4ff5c7885abb04a66d079499335c4d46106aff Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Mon, 1 Jan 2024 21:31:43 +0100 Subject: [PATCH 29/71] gh-53502: Fixes for tests in gh-113363 (#113627) * gh-53502: Fixes for tests in gh-113363 * Use 32-bit compatible date in test_dump_naive_datetime_with_aware_datetime_option * Saving non-aware datetimes will use the old behaviour regardless of the aware_datimetime setting --- Lib/plistlib.py | 4 ++-- Lib/test/test_plistlib.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 0fc1b5cbfa8c49..6eb70cedd7aec6 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -155,7 +155,7 @@ def _date_from_string(s, aware_datetime): def _date_to_string(d, aware_datetime): - if aware_datetime: + if aware_datetime and d.tzinfo is not None: d = d.astimezone(datetime.UTC) return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( d.year, d.month, d.day, @@ -791,7 +791,7 @@ def _write_object(self, value): self._fp.write(struct.pack('>Bd', 0x23, value)) elif isinstance(value, datetime.datetime): - if self._aware_datetime: + if self._aware_datetime and value.tzinfo is not None: dt = value.astimezone(datetime.UTC) offset = dt - datetime.datetime(2001, 1, 1, tzinfo=datetime.UTC) f = offset.total_seconds() diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index d41975f1b17184..010393a417b946 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -881,12 +881,11 @@ def test_dump_naive_datetime_with_aware_datetime_option(self): # Save a naive datetime with aware_datetime set to true. This will lead # to having different time as compared to the current machine's # timezone, which is UTC. - dt = datetime.datetime(2345, 6, 7, 8, tzinfo=None) + dt = datetime.datetime(2003, 6, 7, 8, tzinfo=None) for fmt in ALL_FORMATS: s = plistlib.dumps(dt, fmt=fmt, aware_datetime=True) parsed = plistlib.loads(s, aware_datetime=False) - expected = dt + datetime.timedelta(seconds=time.timezone) - self.assertEqual(parsed, expected) + self.assertEqual(parsed, dt) class TestBinaryPlistlib(unittest.TestCase): From b2566d89ce50e9924bb2fccb87dcfa3ceb6cc0d6 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 1 Jan 2024 15:04:09 -0800 Subject: [PATCH 30/71] GH-113633: Use module state structure for _testcapi. (GH-113634) Use module state structure for _testcapi. --- ...-01-01-14-40-02.gh-issue-113633.VOY5ai.rst | 1 + Modules/_testcapimodule.c | 115 ++++++++++-------- 2 files changed, 65 insertions(+), 51 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-01-01-14-40-02.gh-issue-113633.VOY5ai.rst diff --git a/Misc/NEWS.d/next/Tests/2024-01-01-14-40-02.gh-issue-113633.VOY5ai.rst b/Misc/NEWS.d/next/Tests/2024-01-01-14-40-02.gh-issue-113633.VOY5ai.rst new file mode 100644 index 00000000000000..150c0d91852cdf --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-01-01-14-40-02.gh-issue-113633.VOY5ai.rst @@ -0,0 +1 @@ +Use module state for the _testcapi extension module. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3527dfa77279ac..6762c611fb12a2 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -33,15 +33,32 @@ // Forward declarations static struct PyModuleDef _testcapimodule; -static PyObject *TestError; /* set to exception object in init */ +// Module state +typedef struct { + PyObject *error; // _testcapi.error object +} testcapistate_t; + +static testcapistate_t* +get_testcapi_state(PyObject *module) +{ + void *state = PyModule_GetState(module); + assert(state != NULL); + return (testcapistate_t *)state; +} -/* Raise TestError with test_name + ": " + msg, and return NULL. */ +static PyObject * +get_testerror(PyObject *self) { + testcapistate_t *state = get_testcapi_state((PyObject *)Py_TYPE(self)); + return state->error; +} + +/* Raise _testcapi.error with test_name + ": " + msg, and return NULL. */ static PyObject * -raiseTestError(const char* test_name, const char* msg) +raiseTestError(PyObject *self, const char* test_name, const char* msg) { - PyErr_Format(TestError, "%s: %s", test_name, msg); + PyErr_Format(get_testerror(self), "%s: %s", test_name, msg); return NULL; } @@ -52,10 +69,10 @@ raiseTestError(const char* test_name, const char* msg) platforms have these hardcoded. Better safe than sorry. */ static PyObject* -sizeof_error(const char* fatname, const char* typname, +sizeof_error(PyObject *self, const char* fatname, const char* typname, int expected, int got) { - PyErr_Format(TestError, + PyErr_Format(get_testerror(self), "%s #define == %d but sizeof(%s) == %d", fatname, expected, typname, got); return (PyObject*)NULL; @@ -66,7 +83,7 @@ test_config(PyObject *self, PyObject *Py_UNUSED(ignored)) { #define CHECK_SIZEOF(FATNAME, TYPE) \ if (FATNAME != sizeof(TYPE)) \ - return sizeof_error(#FATNAME, #TYPE, FATNAME, sizeof(TYPE)) + return sizeof_error(self, #FATNAME, #TYPE, FATNAME, sizeof(TYPE)) CHECK_SIZEOF(SIZEOF_SHORT, short); CHECK_SIZEOF(SIZEOF_INT, int); @@ -89,7 +106,7 @@ test_sizeof_c_types(PyObject *self, PyObject *Py_UNUSED(ignored)) #endif #define CHECK_SIZEOF(TYPE, EXPECTED) \ if (EXPECTED != sizeof(TYPE)) { \ - PyErr_Format(TestError, \ + PyErr_Format(get_testerror(self), \ "sizeof(%s) = %u instead of %u", \ #TYPE, sizeof(TYPE), EXPECTED); \ return (PyObject*)NULL; \ @@ -97,7 +114,7 @@ test_sizeof_c_types(PyObject *self, PyObject *Py_UNUSED(ignored)) #define IS_SIGNED(TYPE) (((TYPE)-1) < (TYPE)0) #define CHECK_SIGNNESS(TYPE, SIGNED) \ if (IS_SIGNED(TYPE) != SIGNED) { \ - PyErr_Format(TestError, \ + PyErr_Format(get_testerror(self), \ "%s signness is, instead of %i", \ #TYPE, IS_SIGNED(TYPE), SIGNED); \ return (PyObject*)NULL; \ @@ -170,7 +187,7 @@ test_list_api(PyObject *self, PyObject *Py_UNUSED(ignored)) for (i = 0; i < NLIST; ++i) { PyObject* anint = PyList_GET_ITEM(list, i); if (PyLong_AS_LONG(anint) != NLIST-1-i) { - PyErr_SetString(TestError, + PyErr_SetString(get_testerror(self), "test_list_api: reverse screwed up"); Py_DECREF(list); return (PyObject*)NULL; @@ -183,7 +200,7 @@ test_list_api(PyObject *self, PyObject *Py_UNUSED(ignored)) } static int -test_dict_inner(int count) +test_dict_inner(PyObject *self, int count) { Py_ssize_t pos = 0, iterations = 0; int i; @@ -231,7 +248,7 @@ test_dict_inner(int count) if (iterations != count) { PyErr_SetString( - TestError, + get_testerror(self), "test_dict_iteration: dict iteration went wrong "); return -1; } else { @@ -250,7 +267,7 @@ test_dict_iteration(PyObject* self, PyObject *Py_UNUSED(ignored)) int i; for (i = 0; i < 200; i++) { - if (test_dict_inner(i) < 0) { + if (test_dict_inner(self, i) < 0) { return NULL; } } @@ -334,14 +351,14 @@ test_lazy_hash_inheritance(PyObject* self, PyObject *Py_UNUSED(ignored)) if (obj == NULL) { PyErr_Clear(); PyErr_SetString( - TestError, + get_testerror(self), "test_lazy_hash_inheritance: failed to create object"); return NULL; } if (type->tp_dict != NULL) { PyErr_SetString( - TestError, + get_testerror(self), "test_lazy_hash_inheritance: type initialised too soon"); Py_DECREF(obj); return NULL; @@ -351,7 +368,7 @@ test_lazy_hash_inheritance(PyObject* self, PyObject *Py_UNUSED(ignored)) if ((hash == -1) && PyErr_Occurred()) { PyErr_Clear(); PyErr_SetString( - TestError, + get_testerror(self), "test_lazy_hash_inheritance: could not hash object"); Py_DECREF(obj); return NULL; @@ -359,7 +376,7 @@ test_lazy_hash_inheritance(PyObject* self, PyObject *Py_UNUSED(ignored)) if (type->tp_dict == NULL) { PyErr_SetString( - TestError, + get_testerror(self), "test_lazy_hash_inheritance: type not initialised by hash()"); Py_DECREF(obj); return NULL; @@ -367,7 +384,7 @@ test_lazy_hash_inheritance(PyObject* self, PyObject *Py_UNUSED(ignored)) if (type->tp_hash != PyType_Type.tp_hash) { PyErr_SetString( - TestError, + get_testerror(self), "test_lazy_hash_inheritance: unexpected hash function"); Py_DECREF(obj); return NULL; @@ -427,7 +444,7 @@ py_buildvalue_ints(PyObject *self, PyObject *args) } static int -test_buildvalue_N_error(const char *fmt) +test_buildvalue_N_error(PyObject *self, const char *fmt) { PyObject *arg, *res; @@ -443,7 +460,7 @@ test_buildvalue_N_error(const char *fmt) } Py_DECREF(res); if (Py_REFCNT(arg) != 1) { - PyErr_Format(TestError, "test_buildvalue_N: " + PyErr_Format(get_testerror(self), "test_buildvalue_N: " "arg was not decrefed in successful " "Py_BuildValue(\"%s\")", fmt); return -1; @@ -452,13 +469,13 @@ test_buildvalue_N_error(const char *fmt) Py_INCREF(arg); res = Py_BuildValue(fmt, raise_error, NULL, arg); if (res != NULL || !PyErr_Occurred()) { - PyErr_Format(TestError, "test_buildvalue_N: " + PyErr_Format(get_testerror(self), "test_buildvalue_N: " "Py_BuildValue(\"%s\") didn't complain", fmt); return -1; } PyErr_Clear(); if (Py_REFCNT(arg) != 1) { - PyErr_Format(TestError, "test_buildvalue_N: " + PyErr_Format(get_testerror(self), "test_buildvalue_N: " "arg was not decrefed in failed " "Py_BuildValue(\"%s\")", fmt); return -1; @@ -482,25 +499,25 @@ test_buildvalue_N(PyObject *self, PyObject *Py_UNUSED(ignored)) return NULL; } if (res != arg) { - return raiseTestError("test_buildvalue_N", + return raiseTestError(self, "test_buildvalue_N", "Py_BuildValue(\"N\") returned wrong result"); } if (Py_REFCNT(arg) != 2) { - return raiseTestError("test_buildvalue_N", + return raiseTestError(self, "test_buildvalue_N", "arg was not decrefed in Py_BuildValue(\"N\")"); } Py_DECREF(res); Py_DECREF(arg); - if (test_buildvalue_N_error("O&N") < 0) + if (test_buildvalue_N_error(self, "O&N") < 0) return NULL; - if (test_buildvalue_N_error("(O&N)") < 0) + if (test_buildvalue_N_error(self, "(O&N)") < 0) return NULL; - if (test_buildvalue_N_error("[O&N]") < 0) + if (test_buildvalue_N_error(self, "[O&N]") < 0) return NULL; - if (test_buildvalue_N_error("{O&N}") < 0) + if (test_buildvalue_N_error(self, "{O&N}") < 0) return NULL; - if (test_buildvalue_N_error("{()O&(())N}") < 0) + if (test_buildvalue_N_error(self, "{()O&(())N}") < 0) return NULL; Py_RETURN_NONE; @@ -910,7 +927,7 @@ test_string_to_double(PyObject *self, PyObject *Py_UNUSED(ignored)) { Py_RETURN_NONE; fail: - return raiseTestError("test_string_to_double", msg); + return raiseTestError(self, "test_string_to_double", msg); #undef CHECK_STRING #undef CHECK_INVALID } @@ -1061,7 +1078,7 @@ test_capsule(PyObject *self, PyObject *Py_UNUSED(ignored)) exit: if (error) { - return raiseTestError("test_capsule", error); + return raiseTestError(self, "test_capsule", error); } Py_RETURN_NONE; #undef FAIL @@ -1272,7 +1289,7 @@ test_from_contiguous(PyObject* self, PyObject *Py_UNUSED(ignored)) ptr = view.buf; for (i = 0; i < 5; i++) { if (ptr[2*i] != i) { - PyErr_SetString(TestError, + PyErr_SetString(get_testerror(self), "test_from_contiguous: incorrect result"); return NULL; } @@ -1285,7 +1302,7 @@ test_from_contiguous(PyObject* self, PyObject *Py_UNUSED(ignored)) ptr = view.buf; for (i = 0; i < 5; i++) { if (*(ptr-2*i) != i) { - PyErr_SetString(TestError, + PyErr_SetString(get_testerror(self), "test_from_contiguous: incorrect result"); return NULL; } @@ -1338,7 +1355,7 @@ test_pep3118_obsolete_write_locks(PyObject* self, PyObject *Py_UNUSED(ignored)) Py_RETURN_NONE; error: - PyErr_SetString(TestError, + PyErr_SetString(get_testerror(self), "test_pep3118_obsolete_write_locks: failure"); return NULL; } @@ -1959,7 +1976,7 @@ test_pythread_tss_key_state(PyObject *self, PyObject *args) { Py_tss_t tss_key = Py_tss_NEEDS_INIT; if (PyThread_tss_is_created(&tss_key)) { - return raiseTestError("test_pythread_tss_key_state", + return raiseTestError(self, "test_pythread_tss_key_state", "TSS key not in an uninitialized state at " "creation time"); } @@ -1968,19 +1985,19 @@ test_pythread_tss_key_state(PyObject *self, PyObject *args) return NULL; } if (!PyThread_tss_is_created(&tss_key)) { - return raiseTestError("test_pythread_tss_key_state", + return raiseTestError(self, "test_pythread_tss_key_state", "PyThread_tss_create succeeded, " "but with TSS key in an uninitialized state"); } if (PyThread_tss_create(&tss_key) != 0) { - return raiseTestError("test_pythread_tss_key_state", + return raiseTestError(self, "test_pythread_tss_key_state", "PyThread_tss_create unsuccessful with " "an already initialized key"); } #define CHECK_TSS_API(expr) \ (void)(expr); \ if (!PyThread_tss_is_created(&tss_key)) { \ - return raiseTestError("test_pythread_tss_key_state", \ + return raiseTestError(self, "test_pythread_tss_key_state", \ "TSS key initialization state was not " \ "preserved after calling " #expr); } CHECK_TSS_API(PyThread_tss_set(&tss_key, NULL)); @@ -1988,7 +2005,7 @@ test_pythread_tss_key_state(PyObject *self, PyObject *args) #undef CHECK_TSS_API PyThread_tss_delete(&tss_key); if (PyThread_tss_is_created(&tss_key)) { - return raiseTestError("test_pythread_tss_key_state", + return raiseTestError(self, "test_pythread_tss_key_state", "PyThread_tss_delete called, but did not " "set the key state to uninitialized"); } @@ -1999,7 +2016,7 @@ test_pythread_tss_key_state(PyObject *self, PyObject *args) return NULL; } if (PyThread_tss_is_created(ptr_key)) { - return raiseTestError("test_pythread_tss_key_state", + return raiseTestError(self, "test_pythread_tss_key_state", "TSS key not in an uninitialized state at " "allocation time"); } @@ -3831,14 +3848,9 @@ static PyTypeObject ContainerNoGC_type = { static struct PyModuleDef _testcapimodule = { PyModuleDef_HEAD_INIT, - "_testcapi", - NULL, - -1, - TestMethods, - NULL, - NULL, - NULL, - NULL + .m_name = "_testcapi", + .m_size = sizeof(testcapistate_t), + .m_methods = TestMethods, }; /* Per PEP 489, this module will not be converted to multi-phase initialization @@ -3933,9 +3945,10 @@ PyInit__testcapi(void) PyModule_AddIntConstant(m, "the_number_three", 3); PyModule_AddIntMacro(m, Py_C_RECURSION_LIMIT); - TestError = PyErr_NewException("_testcapi.error", NULL, NULL); - Py_INCREF(TestError); - PyModule_AddObject(m, "error", TestError); + testcapistate_t *state = get_testcapi_state(m); + state->error = PyErr_NewException("_testcapi.error", NULL, NULL); + Py_INCREF(state->error); + PyModule_AddObject(m, "error", state->error); if (PyType_Ready(&ContainerNoGC_type) < 0) { return NULL; From 3aadb9508592877c429083f213fa03bda1045ca1 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 2 Jan 2024 09:16:53 +0900 Subject: [PATCH 31/71] no-issue: Use the official term "free-threading" for GitHub Action (gh-113622) --- .github/workflows/build.yml | 28 +++++++++++++------------- .github/workflows/reusable-macos.yml | 4 ++-- .github/workflows/reusable-windows.yml | 8 ++++---- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e8b44a7c6952a4..9f67f30ed07d74 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -187,13 +187,13 @@ jobs: if: needs.check_source.outputs.run_tests == 'true' uses: ./.github/workflows/reusable-windows.yml - build_windows_free_threaded: - name: 'Windows (free-threaded)' + build_windows_free_threading: + name: 'Windows (free-threading)' needs: check_source if: needs.check_source.outputs.run_tests == 'true' uses: ./.github/workflows/reusable-windows.yml with: - free-threaded: true + free-threading: true build_macos: name: 'macOS' @@ -203,14 +203,14 @@ jobs: with: config_hash: ${{ needs.check_source.outputs.config_hash }} - build_macos_free_threaded: - name: 'macOS (free-threaded)' + build_macos_free_threading: + name: 'macOS (free-threading)' needs: check_source if: needs.check_source.outputs.run_tests == 'true' uses: ./.github/workflows/reusable-macos.yml with: config_hash: ${{ needs.check_source.outputs.config_hash }} - free-threaded: true + free-threading: true build_ubuntu: name: 'Ubuntu' @@ -225,8 +225,8 @@ jobs: --with-pydebug \ --with-openssl=$OPENSSL_DIR - build_ubuntu_free_threaded: - name: 'Ubuntu (free-threaded)' + build_ubuntu_free_threading: + name: 'Ubuntu (free-threading)' needs: check_source if: needs.check_source.outputs.run_tests == 'true' uses: ./.github/workflows/reusable-ubuntu.yml @@ -504,12 +504,12 @@ jobs: - check-docs - check_generated_files - build_macos - - build_macos_free_threaded + - build_macos_free_threading - build_ubuntu - - build_ubuntu_free_threaded + - build_ubuntu_free_threading - build_ubuntu_ssltests - build_windows - - build_windows_free_threaded + - build_windows_free_threading - test_hypothesis - build_asan - cifuzz @@ -537,12 +537,12 @@ jobs: && ' check_generated_files, build_macos, - build_macos_free_threaded, + build_macos_free_threading, build_ubuntu, - build_ubuntu_free_threaded, + build_ubuntu_free_threading, build_ubuntu_ssltests, build_windows, - build_windows_free_threaded, + build_windows_free_threading, build_asan, ' || '' diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 22f46d18e1b43a..c24b6e963ddfd6 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -4,7 +4,7 @@ on: config_hash: required: true type: string - free-threaded: + free-threading: required: false type: boolean default: false @@ -35,7 +35,7 @@ jobs: ./configure \ --config-cache \ --with-pydebug \ - ${{ inputs.free-threaded && '--disable-gil' || '' }} \ + ${{ inputs.free-threading && '--disable-gil' || '' }} \ --prefix=/opt/python-dev \ --with-openssl="$(brew --prefix openssl@3.0)" - name: Build CPython diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index 47a3c10d2ca4c1..ae27c108d8368c 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -1,7 +1,7 @@ on: workflow_call: inputs: - free-threaded: + free-threading: required: false type: boolean default: false @@ -16,7 +16,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Build CPython - run: .\PCbuild\build.bat -e -d -v -p Win32 ${{ inputs.free-threaded && '--disable-gil' || '' }} + run: .\PCbuild\build.bat -e -d -v -p Win32 ${{ inputs.free-threading && '--disable-gil' || '' }} - name: Display build info run: .\python.bat -m test.pythoninfo - name: Tests @@ -33,7 +33,7 @@ jobs: - name: Register MSVC problem matcher run: echo "::add-matcher::.github/problem-matchers/msvc.json" - name: Build CPython - run: .\PCbuild\build.bat -e -d -v -p x64 ${{ inputs.free-threaded && '--disable-gil' || '' }} + run: .\PCbuild\build.bat -e -d -v -p x64 ${{ inputs.free-threading && '--disable-gil' || '' }} - name: Display build info run: .\python.bat -m test.pythoninfo - name: Tests @@ -50,4 +50,4 @@ jobs: - name: Register MSVC problem matcher run: echo "::add-matcher::.github/problem-matchers/msvc.json" - name: Build CPython - run: .\PCbuild\build.bat -e -d -v -p arm64 ${{ inputs.free-threaded && '--disable-gil' || '' }} + run: .\PCbuild\build.bat -e -d -v -p arm64 ${{ inputs.free-threading && '--disable-gil' || '' }} From 7595380347610598a3f5529214a449660892537b Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Tue, 2 Jan 2024 09:37:37 +0200 Subject: [PATCH 32/71] gh-101100: Fix Sphinx warnings from removed `~!` references (#113629) Co-authored-by: Alex Waygood --- Doc/whatsnew/3.11.rst | 2 +- Doc/whatsnew/3.3.rst | 2 +- Doc/whatsnew/3.4.rst | 14 +++++++------- Doc/whatsnew/3.5.rst | 2 +- Doc/whatsnew/3.7.rst | 8 ++++---- Doc/whatsnew/3.9.rst | 2 +- Misc/NEWS.d/3.11.0a1.rst | 6 +++--- Misc/NEWS.d/3.11.0a7.rst | 4 ++-- Misc/NEWS.d/3.9.0a3.rst | 4 ++-- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index cae5a26bae1148..ce4c98eba71443 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -1860,7 +1860,7 @@ Standard Library (Contributed by Erlend E. Aasland in :issue:`5846`.) -* :meth:`~!unittest.TestProgram.usageExit` is marked deprecated, to be removed +* :meth:`!unittest.TestProgram.usageExit` is marked deprecated, to be removed in 3.13. (Contributed by Carlos Damázio in :gh:`67048`.) diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index 5674bc7f359b72..79e2dd9dcee361 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -1052,7 +1052,7 @@ their ``__init__`` method (for example, file objects) or in their crypt ----- -Addition of salt and modular crypt format (hashing method) and the :func:`~!crypt.mksalt` +Addition of salt and modular crypt format (hashing method) and the :func:`!mksalt` function to the :mod:`!crypt` module. (:issue:`10924`) diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 72d12461d8f730..b26e3d36c4bfbc 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -605,15 +605,15 @@ Using ``ABC`` as a base class has essentially the same effect as specifying aifc ---- -The :meth:`~!aifc.aifc.getparams` method now returns a namedtuple rather than a +The :meth:`!getparams` method now returns a namedtuple rather than a plain tuple. (Contributed by Claudiu Popa in :issue:`17818`.) :func:`!aifc.open` now supports the context management protocol: when used in a -:keyword:`with` block, the :meth:`~!aifc.aifc.close` method of the returned +:keyword:`with` block, the :meth:`!close` method of the returned object will be called automatically at the end of the block. (Contributed by Serhiy Storchacha in :issue:`16486`.) -The :meth:`~!aifc.aifc.writeframesraw` and :meth:`~!aifc.aifc.writeframes` +The :meth:`!writeframesraw` and :meth:`!writeframes` methods now accept any :term:`bytes-like object`. (Contributed by Serhiy Storchaka in :issue:`8311`.) @@ -632,7 +632,7 @@ audioop :mod:`!audioop` now supports 24-bit samples. (Contributed by Serhiy Storchaka in :issue:`12866`.) -New :func:`~!audioop.byteswap` function converts big-endian samples to +New :func:`!byteswap` function converts big-endian samples to little-endian and vice versa. (Contributed by Serhiy Storchaka in :issue:`19641`.) @@ -1528,7 +1528,7 @@ work on Windows. This change was actually inadvertently made in 3.3.4. sunau ----- -The :meth:`~!sunau.getparams` method now returns a namedtuple rather than a +The :meth:`!getparams` method now returns a namedtuple rather than a plain tuple. (Contributed by Claudiu Popa in :issue:`18901`.) :meth:`!sunau.open` now supports the context management protocol: when used in a @@ -1540,8 +1540,8 @@ in :issue:`18878`.) support for writing 24 sample using the module. (Contributed by Serhiy Storchaka in :issue:`19261`.) -The :meth:`~!sunau.AU_write.writeframesraw` and -:meth:`~!sunau.AU_write.writeframes` methods now accept any :term:`bytes-like +The :meth:`!writeframesraw` and +:meth:`!writeframes` methods now accept any :term:`bytes-like object`. (Contributed by Serhiy Storchaka in :issue:`8311`.) diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index a32866094ffeb5..bbf2dc59a9f60a 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -1252,7 +1252,7 @@ Oberkirch in :issue:`21800`.) imghdr ------ -The :func:`~!imghdr.what` function now recognizes the +The :func:`!what` function now recognizes the `OpenEXR `_ format (contributed by Martin Vignali and Claudiu Popa in :issue:`20295`), and the `WebP `_ format diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 616e51571388a8..775a45a1b3ff06 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -851,7 +851,7 @@ crypt The :mod:`!crypt` module now supports the Blowfish hashing method. (Contributed by Serhiy Storchaka in :issue:`31664`.) -The :func:`~!crypt.mksalt` function now allows specifying the number of rounds +The :func:`!mksalt` function now allows specifying the number of rounds for hashing. (Contributed by Serhiy Storchaka in :issue:`31702`.) @@ -2004,15 +2004,15 @@ importlib --------- Methods -:meth:`MetaPathFinder.find_module() ` +:meth:`!MetaPathFinder.find_module()` (replaced by :meth:`MetaPathFinder.find_spec() `) and -:meth:`PathEntryFinder.find_loader() ` +:meth:`!PathEntryFinder.find_loader()` (replaced by :meth:`PathEntryFinder.find_spec() `) both deprecated in Python 3.4 now emit :exc:`DeprecationWarning`. -(Contributed by Matthias Bussonnier in :issue:`29576`) +(Contributed by Matthias Bussonnier in :issue:`29576`.) The :class:`importlib.abc.ResourceLoader` ABC has been deprecated in favour of :class:`importlib.abc.ResourceReader`. diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index cb2482ee48d7fa..0c85fe15915518 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -585,7 +585,7 @@ queue. nntplib ------- -:class:`~!nntplib.NNTP` and :class:`~!nntplib.NNTP_SSL` now raise a :class:`ValueError` +:class:`!NNTP` and :class:`!NNTP_SSL` now raise a :class:`ValueError` if the given timeout for their constructor is zero to prevent the creation of a non-blocking socket. (Contributed by Donghee Na in :issue:`39259`.) diff --git a/Misc/NEWS.d/3.11.0a1.rst b/Misc/NEWS.d/3.11.0a1.rst index 1c96c0760a57b2..ba7fb515305ff5 100644 --- a/Misc/NEWS.d/3.11.0a1.rst +++ b/Misc/NEWS.d/3.11.0a1.rst @@ -3483,9 +3483,9 @@ Improved reprs of :mod:`threading` synchronization objects: Deprecated the following :mod:`unittest` functions, scheduled for removal in Python 3.13: -* :func:`~!unittest.findTestCases` -* :func:`~!unittest.makeSuite` -* :func:`~!unittest.getTestCaseNames` +* :func:`!findTestCases` +* :func:`!makeSuite` +* :func:`!getTestCaseNames` Use :class:`~unittest.TestLoader` methods instead: diff --git a/Misc/NEWS.d/3.11.0a7.rst b/Misc/NEWS.d/3.11.0a7.rst index 6e41f9cbd933b5..79557d5c436593 100644 --- a/Misc/NEWS.d/3.11.0a7.rst +++ b/Misc/NEWS.d/3.11.0a7.rst @@ -1038,8 +1038,8 @@ Add optional parameter *dir_fd* in :func:`shutil.rmtree`. .. nonce: AixHW7 .. section: Library -:meth:`~!unittest.TestProgram.usageExit` is marked deprecated, to be removed -in 3.13. +:meth:`!unittest.TestProgram.usageExit` is marked as deprecated, +to be removed in Python 3.13. .. diff --git a/Misc/NEWS.d/3.9.0a3.rst b/Misc/NEWS.d/3.9.0a3.rst index 8a94848427382b..bc7f4f9c5d39c1 100644 --- a/Misc/NEWS.d/3.9.0a3.rst +++ b/Misc/NEWS.d/3.9.0a3.rst @@ -454,7 +454,7 @@ resilients to inaccessible sys.path entries (importlib_metadata v1.4.0). .. nonce: _S5VjC .. section: Library -:class:`~!nntplib.NNTP` and :class:`~!nntplib.NNTP_SSL` now raise a +:class:`!NNTP` and :class:`!NNTP_SSL` now raise a :class:`ValueError` if the given timeout for their constructor is zero to prevent the creation of a non-blocking socket. Patch by Donghee Na. @@ -498,7 +498,7 @@ prevent the creation of a non-blocking socket. Patch by Donghee Na. .. section: Library Updated the Gmane domain from news.gmane.org to news.gmane.io which is used -for examples of :class:`~!nntplib.NNTP` news reader server and nntplib tests. +for examples of :class:`!NNTP` news reader server and nntplib tests. .. From 8ff44f855450244d965dbf82c7f0a31de666007c Mon Sep 17 00:00:00 2001 From: "John D. McDonald" <43117960+Rasputin2@users.noreply.github.com> Date: Tue, 2 Jan 2024 02:40:14 -0600 Subject: [PATCH 33/71] gh-81094: Refer to PEP 318 in compound_statements.rst (#113588) Co-authored-by: Hugo van Kemenade --- Doc/reference/compound_stmts.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst index 7a735095bdecb2..374404bf33abbe 100644 --- a/Doc/reference/compound_stmts.rst +++ b/Doc/reference/compound_stmts.rst @@ -1362,12 +1362,15 @@ access the local variables of the function containing the def. See section :pep:`526` - Syntax for Variable Annotations Ability to type hint variable declarations, including class - variables and instance variables + variables and instance variables. :pep:`563` - Postponed Evaluation of Annotations Support for forward references within annotations by preserving annotations in a string form at runtime instead of eager evaluation. + :pep:`318` - Decorators for Functions and Methods + Function and method decorators were introduced. + Class decorators were introduced in :pep:`3129`. .. _class: From 9ed36d533ab8b256f0a589b5be6d7a2fdcf4aff2 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 2 Jan 2024 13:00:52 +0000 Subject: [PATCH 34/71] gh-113602: Bail out when the parser tries to override existing errors (#113607) Signed-off-by: Pablo Galindo --- Lib/test/test_syntax.py | 2 ++ .../2024-01-01-00-07-02.gh-issue-113602.cWuTzk.rst | 2 ++ Parser/pegen_errors.c | 4 ++++ 3 files changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-01-00-07-02.gh-issue-113602.cWuTzk.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 8b3ca69c9fe155..83cbf5ec865dbb 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -2360,6 +2360,8 @@ def test_error_parenthesis(self): """ self._check_error(code, "parenthesis '\\)' does not match opening parenthesis '\\['") + self._check_error("match y:\n case e(e=v,v,", " was never closed") + # Examples with dencodings s = b'# coding=latin\n(aaaaaaaaaaaaaaaaa\naaaaaaaaaaa\xb5' self._check_error(s, r"'\(' was never closed") diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-01-00-07-02.gh-issue-113602.cWuTzk.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-01-00-07-02.gh-issue-113602.cWuTzk.rst new file mode 100644 index 00000000000000..5e064657348720 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-01-00-07-02.gh-issue-113602.cWuTzk.rst @@ -0,0 +1,2 @@ +Fix an error that was causing the parser to try to overwrite existing errors +and crashing in the process. Patch by Pablo Galindo diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index 8a02aab1f4e504..e15673d02dd3b0 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -311,6 +311,10 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t end_lineno, Py_ssize_t end_col_offset, const char *errmsg, va_list va) { + // Bail out if we already have an error set. + if (p->error_indicator && PyErr_Occurred()) { + return NULL; + } PyObject *value = NULL; PyObject *errstr = NULL; PyObject *error_line = NULL; From 5d36a95e64e30606e8f8e332edf6bde91ac344cf Mon Sep 17 00:00:00 2001 From: Christopher Chavez Date: Tue, 2 Jan 2024 07:41:32 -0600 Subject: [PATCH 35/71] gh-111178: Avoid calling functions from incompatible pointer types in listobject.c (GH-112820) Fix undefined behavior warnings (UBSan -fsanitize=function), for example: Objects/object.c:674:11: runtime error: call to function list_repr through pointer to incorrect function type 'struct _object *(*)(struct _object *)' listobject.c:382: note: list_repr defined here SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior Objects/object.c:674:11 in --- Objects/listobject.c | 164 ++++++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 72 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 2d04218439bd20..dfb8cd2b106511 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -343,8 +343,9 @@ PyList_Append(PyObject *op, PyObject *newitem) /* Methods */ static void -list_dealloc(PyListObject *op) +list_dealloc(PyObject *self) { + PyListObject *op = (PyListObject *)self; Py_ssize_t i; PyObject_GC_UnTrack(op); Py_TRASHCAN_BEGIN(op, list_dealloc) @@ -378,8 +379,9 @@ list_dealloc(PyListObject *op) } static PyObject * -list_repr(PyListObject *v) +list_repr(PyObject *self) { + PyListObject *v = (PyListObject *)self; Py_ssize_t i; PyObject *s; _PyUnicodeWriter writer; @@ -434,14 +436,15 @@ list_repr(PyListObject *v) } static Py_ssize_t -list_length(PyListObject *a) +list_length(PyObject *a) { return Py_SIZE(a); } static int -list_contains(PyListObject *a, PyObject *el) +list_contains(PyObject *aa, PyObject *el) { + PyListObject *a = (PyListObject *)aa; PyObject *item; Py_ssize_t i; int cmp; @@ -456,8 +459,9 @@ list_contains(PyListObject *a, PyObject *el) } static PyObject * -list_item(PyListObject *a, Py_ssize_t i) +list_item(PyObject *aa, Py_ssize_t i) { + PyListObject *a = (PyListObject *)aa; if (!valid_index(i, Py_SIZE(a))) { PyErr_SetObject(PyExc_IndexError, &_Py_STR(list_err)); return NULL; @@ -512,8 +516,9 @@ PyList_GetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh) } static PyObject * -list_concat(PyListObject *a, PyObject *bb) +list_concat(PyObject *aa, PyObject *bb) { + PyListObject *a = (PyListObject *)aa; Py_ssize_t size; Py_ssize_t i; PyObject **src, **dest; @@ -552,8 +557,9 @@ list_concat(PyListObject *a, PyObject *bb) } static PyObject * -list_repeat(PyListObject *a, Py_ssize_t n) +list_repeat(PyObject *aa, Py_ssize_t n) { + PyListObject *a = (PyListObject *)aa; const Py_ssize_t input_size = Py_SIZE(a); if (input_size == 0 || n <= 0) return PyList_New(0); @@ -616,9 +622,9 @@ list_clear(PyListObject *a) } static int -list_clear_slot(PyListObject *self) +list_clear_slot(PyObject *self) { - list_clear(self); + list_clear((PyListObject *)self); return 0; } @@ -745,8 +751,9 @@ PyList_SetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) } static PyObject * -list_inplace_repeat(PyListObject *self, Py_ssize_t n) +list_inplace_repeat(PyObject *_self, Py_ssize_t n) { + PyListObject *self = (PyListObject *)_self; Py_ssize_t input_size = PyList_GET_SIZE(self); if (input_size == 0 || n == 1) { return Py_NewRef(self); @@ -776,8 +783,9 @@ list_inplace_repeat(PyListObject *self, Py_ssize_t n) } static int -list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v) +list_ass_item(PyObject *aa, Py_ssize_t i, PyObject *v) { + PyListObject *a = (PyListObject *)aa; if (!valid_index(i, Py_SIZE(a))) { PyErr_SetString(PyExc_IndexError, "list assignment index out of range"); @@ -1044,8 +1052,9 @@ PyList_Clear(PyObject *self) static PyObject * -list_inplace_concat(PyListObject *self, PyObject *other) +list_inplace_concat(PyObject *_self, PyObject *other) { + PyListObject *self = (PyListObject *)_self; if (list_extend(self, other) < 0) { return NULL; } @@ -2756,8 +2765,9 @@ list_remove(PyListObject *self, PyObject *value) } static int -list_traverse(PyListObject *o, visitproc visit, void *arg) +list_traverse(PyObject *self, visitproc visit, void *arg) { + PyListObject *o = (PyListObject *)self; Py_ssize_t i; for (i = Py_SIZE(o); --i >= 0; ) @@ -2897,10 +2907,10 @@ list___sizeof___impl(PyListObject *self) } static PyObject *list_iter(PyObject *seq); -static PyObject *list_subscript(PyListObject*, PyObject*); +static PyObject *list_subscript(PyObject*, PyObject*); static PyMethodDef list_methods[] = { - {"__getitem__", (PyCFunction)list_subscript, METH_O|METH_COEXIST, + {"__getitem__", list_subscript, METH_O|METH_COEXIST, PyDoc_STR("__getitem__($self, index, /)\n--\n\nReturn self[index].")}, LIST___REVERSED___METHODDEF LIST___SIZEOF___METHODDEF @@ -2920,21 +2930,22 @@ static PyMethodDef list_methods[] = { }; static PySequenceMethods list_as_sequence = { - (lenfunc)list_length, /* sq_length */ - (binaryfunc)list_concat, /* sq_concat */ - (ssizeargfunc)list_repeat, /* sq_repeat */ - (ssizeargfunc)list_item, /* sq_item */ + list_length, /* sq_length */ + list_concat, /* sq_concat */ + list_repeat, /* sq_repeat */ + list_item, /* sq_item */ 0, /* sq_slice */ - (ssizeobjargproc)list_ass_item, /* sq_ass_item */ + list_ass_item, /* sq_ass_item */ 0, /* sq_ass_slice */ - (objobjproc)list_contains, /* sq_contains */ - (binaryfunc)list_inplace_concat, /* sq_inplace_concat */ - (ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */ + list_contains, /* sq_contains */ + list_inplace_concat, /* sq_inplace_concat */ + list_inplace_repeat, /* sq_inplace_repeat */ }; static PyObject * -list_subscript(PyListObject* self, PyObject* item) +list_subscript(PyObject* _self, PyObject* item) { + PyListObject* self = (PyListObject*)_self; if (_PyIndex_Check(item)) { Py_ssize_t i; i = PyNumber_AsSsize_t(item, PyExc_IndexError); @@ -2942,7 +2953,7 @@ list_subscript(PyListObject* self, PyObject* item) return NULL; if (i < 0) i += PyList_GET_SIZE(self); - return list_item(self, i); + return list_item((PyObject *)self, i); } else if (PySlice_Check(item)) { Py_ssize_t start, stop, step, slicelength, i; @@ -2987,15 +2998,16 @@ list_subscript(PyListObject* self, PyObject* item) } static int -list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) +list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) { + PyListObject *self = (PyListObject *)_self; if (_PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); if (i == -1 && PyErr_Occurred()) return -1; if (i < 0) i += PyList_GET_SIZE(self); - return list_ass_item(self, i, value); + return list_ass_item((PyObject *)self, i, value); } else if (PySlice_Check(item)) { Py_ssize_t start, stop, step, slicelength; @@ -3149,9 +3161,9 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) } static PyMappingMethods list_as_mapping = { - (lenfunc)list_length, - (binaryfunc)list_subscript, - (objobjargproc)list_ass_subscript + list_length, + list_subscript, + list_ass_subscript }; PyTypeObject PyList_Type = { @@ -3159,12 +3171,12 @@ PyTypeObject PyList_Type = { "list", sizeof(PyListObject), 0, - (destructor)list_dealloc, /* tp_dealloc */ + list_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)list_repr, /* tp_repr */ + list_repr, /* tp_repr */ 0, /* tp_as_number */ &list_as_sequence, /* tp_as_sequence */ &list_as_mapping, /* tp_as_mapping */ @@ -3178,8 +3190,8 @@ PyTypeObject PyList_Type = { Py_TPFLAGS_BASETYPE | Py_TPFLAGS_LIST_SUBCLASS | _Py_TPFLAGS_MATCH_SELF | Py_TPFLAGS_SEQUENCE, /* tp_flags */ list___init____doc__, /* tp_doc */ - (traverseproc)list_traverse, /* tp_traverse */ - (inquiry)list_clear_slot, /* tp_clear */ + list_traverse, /* tp_traverse */ + list_clear_slot, /* tp_clear */ list_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ list_iter, /* tp_iter */ @@ -3201,22 +3213,22 @@ PyTypeObject PyList_Type = { /*********************** List Iterator **************************/ -static void listiter_dealloc(_PyListIterObject *); -static int listiter_traverse(_PyListIterObject *, visitproc, void *); -static PyObject *listiter_next(_PyListIterObject *); -static PyObject *listiter_len(_PyListIterObject *, PyObject *); +static void listiter_dealloc(PyObject *); +static int listiter_traverse(PyObject *, visitproc, void *); +static PyObject *listiter_next(PyObject *); +static PyObject *listiter_len(PyObject *, PyObject *); static PyObject *listiter_reduce_general(void *_it, int forward); -static PyObject *listiter_reduce(_PyListIterObject *, PyObject *); -static PyObject *listiter_setstate(_PyListIterObject *, PyObject *state); +static PyObject *listiter_reduce(PyObject *, PyObject *); +static PyObject *listiter_setstate(PyObject *, PyObject *state); PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); PyDoc_STRVAR(setstate_doc, "Set state information for unpickling."); static PyMethodDef listiter_methods[] = { - {"__length_hint__", (PyCFunction)listiter_len, METH_NOARGS, length_hint_doc}, - {"__reduce__", (PyCFunction)listiter_reduce, METH_NOARGS, reduce_doc}, - {"__setstate__", (PyCFunction)listiter_setstate, METH_O, setstate_doc}, + {"__length_hint__", listiter_len, METH_NOARGS, length_hint_doc}, + {"__reduce__", listiter_reduce, METH_NOARGS, reduce_doc}, + {"__setstate__", listiter_setstate, METH_O, setstate_doc}, {NULL, NULL} /* sentinel */ }; @@ -3226,7 +3238,7 @@ PyTypeObject PyListIter_Type = { sizeof(_PyListIterObject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)listiter_dealloc, /* tp_dealloc */ + listiter_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -3243,12 +3255,12 @@ PyTypeObject PyListIter_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ - (traverseproc)listiter_traverse, /* tp_traverse */ + listiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ - (iternextfunc)listiter_next, /* tp_iternext */ + listiter_next, /* tp_iternext */ listiter_methods, /* tp_methods */ 0, /* tp_members */ }; @@ -3273,23 +3285,25 @@ list_iter(PyObject *seq) } static void -listiter_dealloc(_PyListIterObject *it) +listiter_dealloc(PyObject *self) { + _PyListIterObject *it = (_PyListIterObject *)self; _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } static int -listiter_traverse(_PyListIterObject *it, visitproc visit, void *arg) +listiter_traverse(PyObject *it, visitproc visit, void *arg) { - Py_VISIT(it->it_seq); + Py_VISIT(((_PyListIterObject *)it)->it_seq); return 0; } static PyObject * -listiter_next(_PyListIterObject *it) +listiter_next(PyObject *self) { + _PyListIterObject *it = (_PyListIterObject *)self; PyListObject *seq; PyObject *item; @@ -3311,8 +3325,9 @@ listiter_next(_PyListIterObject *it) } static PyObject * -listiter_len(_PyListIterObject *it, PyObject *Py_UNUSED(ignored)) +listiter_len(PyObject *self, PyObject *Py_UNUSED(ignored)) { + _PyListIterObject *it = (_PyListIterObject *)self; Py_ssize_t len; if (it->it_seq) { len = PyList_GET_SIZE(it->it_seq) - it->it_index; @@ -3323,14 +3338,15 @@ listiter_len(_PyListIterObject *it, PyObject *Py_UNUSED(ignored)) } static PyObject * -listiter_reduce(_PyListIterObject *it, PyObject *Py_UNUSED(ignored)) +listiter_reduce(PyObject *it, PyObject *Py_UNUSED(ignored)) { return listiter_reduce_general(it, 1); } static PyObject * -listiter_setstate(_PyListIterObject *it, PyObject *state) +listiter_setstate(PyObject *self, PyObject *state) { + _PyListIterObject *it = (_PyListIterObject *)self; Py_ssize_t index = PyLong_AsSsize_t(state); if (index == -1 && PyErr_Occurred()) return NULL; @@ -3352,17 +3368,17 @@ typedef struct { PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ } listreviterobject; -static void listreviter_dealloc(listreviterobject *); -static int listreviter_traverse(listreviterobject *, visitproc, void *); -static PyObject *listreviter_next(listreviterobject *); -static PyObject *listreviter_len(listreviterobject *, PyObject *); -static PyObject *listreviter_reduce(listreviterobject *, PyObject *); -static PyObject *listreviter_setstate(listreviterobject *, PyObject *); +static void listreviter_dealloc(PyObject *); +static int listreviter_traverse(PyObject *, visitproc, void *); +static PyObject *listreviter_next(PyObject *); +static PyObject *listreviter_len(PyObject *, PyObject *); +static PyObject *listreviter_reduce(PyObject *, PyObject *); +static PyObject *listreviter_setstate(PyObject *, PyObject *); static PyMethodDef listreviter_methods[] = { - {"__length_hint__", (PyCFunction)listreviter_len, METH_NOARGS, length_hint_doc}, - {"__reduce__", (PyCFunction)listreviter_reduce, METH_NOARGS, reduce_doc}, - {"__setstate__", (PyCFunction)listreviter_setstate, METH_O, setstate_doc}, + {"__length_hint__", listreviter_len, METH_NOARGS, length_hint_doc}, + {"__reduce__", listreviter_reduce, METH_NOARGS, reduce_doc}, + {"__setstate__", listreviter_setstate, METH_O, setstate_doc}, {NULL, NULL} /* sentinel */ }; @@ -3372,7 +3388,7 @@ PyTypeObject PyListRevIter_Type = { sizeof(listreviterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)listreviter_dealloc, /* tp_dealloc */ + listreviter_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -3389,12 +3405,12 @@ PyTypeObject PyListRevIter_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ - (traverseproc)listreviter_traverse, /* tp_traverse */ + listreviter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ - (iternextfunc)listreviter_next, /* tp_iternext */ + listreviter_next, /* tp_iternext */ listreviter_methods, /* tp_methods */ 0, }; @@ -3422,23 +3438,25 @@ list___reversed___impl(PyListObject *self) } static void -listreviter_dealloc(listreviterobject *it) +listreviter_dealloc(PyObject *self) { + listreviterobject *it = (listreviterobject *)self; PyObject_GC_UnTrack(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } static int -listreviter_traverse(listreviterobject *it, visitproc visit, void *arg) +listreviter_traverse(PyObject *it, visitproc visit, void *arg) { - Py_VISIT(it->it_seq); + Py_VISIT(((listreviterobject *)it)->it_seq); return 0; } static PyObject * -listreviter_next(listreviterobject *it) +listreviter_next(PyObject *self) { + listreviterobject *it = (listreviterobject *)self; PyObject *item; Py_ssize_t index; PyListObject *seq; @@ -3463,8 +3481,9 @@ listreviter_next(listreviterobject *it) } static PyObject * -listreviter_len(listreviterobject *it, PyObject *Py_UNUSED(ignored)) +listreviter_len(PyObject *self, PyObject *Py_UNUSED(ignored)) { + listreviterobject *it = (listreviterobject *)self; Py_ssize_t len = it->it_index + 1; if (it->it_seq == NULL || PyList_GET_SIZE(it->it_seq) < len) len = 0; @@ -3472,14 +3491,15 @@ listreviter_len(listreviterobject *it, PyObject *Py_UNUSED(ignored)) } static PyObject * -listreviter_reduce(listreviterobject *it, PyObject *Py_UNUSED(ignored)) +listreviter_reduce(PyObject *it, PyObject *Py_UNUSED(ignored)) { return listiter_reduce_general(it, 0); } static PyObject * -listreviter_setstate(listreviterobject *it, PyObject *state) +listreviter_setstate(PyObject *self, PyObject *state) { + listreviterobject *it = (listreviterobject *)self; Py_ssize_t index = PyLong_AsSsize_t(state); if (index == -1 && PyErr_Occurred()) return NULL; From acf4cf5ca5ef62407e35609fb365e7dfaa362648 Mon Sep 17 00:00:00 2001 From: Christopher Chavez Date: Tue, 2 Jan 2024 08:03:39 -0600 Subject: [PATCH 36/71] gh-111178: Avoid calling functions from incompatible pointer types in descrobject.c (GH-112861) Fix undefined behavior warnings (UBSan -fsanitize=function), for example: Python/generated_cases.c.h:3315:13: runtime error: call to function mappingproxy_dealloc through pointer to incorrect function type 'void (*)(struct _object *)' descrobject.c:1160: note: mappingproxy_dealloc defined here SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior Python/generated_cases.c.h:3315:13 in --- Objects/descrobject.c | 253 ++++++++++++++++++++++++------------------ 1 file changed, 147 insertions(+), 106 deletions(-) diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 57921b110591e5..8d771adf307dc4 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -19,8 +19,9 @@ class property "propertyobject *" "&PyProperty_Type" /*[clinic end generated code: output=da39a3ee5e6b4b0d input=556352653fd4c02e]*/ static void -descr_dealloc(PyDescrObject *descr) +descr_dealloc(PyObject *self) { + PyDescrObject *descr = (PyDescrObject *)self; _PyObject_GC_UNTRACK(descr); Py_XDECREF(descr->d_type); Py_XDECREF(descr->d_name); @@ -47,28 +48,28 @@ descr_repr(PyDescrObject *descr, const char *format) } static PyObject * -method_repr(PyMethodDescrObject *descr) +method_repr(PyObject *descr) { return descr_repr((PyDescrObject *)descr, ""); } static PyObject * -member_repr(PyMemberDescrObject *descr) +member_repr(PyObject *descr) { return descr_repr((PyDescrObject *)descr, ""); } static PyObject * -getset_repr(PyGetSetDescrObject *descr) +getset_repr(PyObject *descr) { return descr_repr((PyDescrObject *)descr, ""); } static PyObject * -wrapperdescr_repr(PyWrapperDescrObject *descr) +wrapperdescr_repr(PyObject *descr) { return descr_repr((PyDescrObject *)descr, ""); @@ -90,8 +91,9 @@ descr_check(PyDescrObject *descr, PyObject *obj) } static PyObject * -classmethod_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type) +classmethod_get(PyObject *self, PyObject *obj, PyObject *type) { + PyMethodDescrObject *descr = (PyMethodDescrObject *)self; /* Ensure a valid type. Class methods ignore obj. */ if (type == NULL) { if (obj != NULL) @@ -132,8 +134,9 @@ classmethod_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type) } static PyObject * -method_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type) +method_get(PyObject *self, PyObject *obj, PyObject *type) { + PyMethodDescrObject *descr = (PyMethodDescrObject *)self; if (obj == NULL) { return Py_NewRef(descr); } @@ -156,8 +159,9 @@ method_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type) } static PyObject * -member_get(PyMemberDescrObject *descr, PyObject *obj, PyObject *type) +member_get(PyObject *self, PyObject *obj, PyObject *type) { + PyMemberDescrObject *descr = (PyMemberDescrObject *)self; if (obj == NULL) { return Py_NewRef(descr); } @@ -176,8 +180,9 @@ member_get(PyMemberDescrObject *descr, PyObject *obj, PyObject *type) } static PyObject * -getset_get(PyGetSetDescrObject *descr, PyObject *obj, PyObject *type) +getset_get(PyObject *self, PyObject *obj, PyObject *type) { + PyGetSetDescrObject *descr = (PyGetSetDescrObject *)self; if (obj == NULL) { return Py_NewRef(descr); } @@ -195,8 +200,9 @@ getset_get(PyGetSetDescrObject *descr, PyObject *obj, PyObject *type) } static PyObject * -wrapperdescr_get(PyWrapperDescrObject *descr, PyObject *obj, PyObject *type) +wrapperdescr_get(PyObject *self, PyObject *obj, PyObject *type) { + PyWrapperDescrObject *descr = (PyWrapperDescrObject *)self; if (obj == NULL) { return Py_NewRef(descr); } @@ -223,8 +229,9 @@ descr_setcheck(PyDescrObject *descr, PyObject *obj, PyObject *value) } static int -member_set(PyMemberDescrObject *descr, PyObject *obj, PyObject *value) +member_set(PyObject *self, PyObject *obj, PyObject *value) { + PyMemberDescrObject *descr = (PyMemberDescrObject *)self; if (descr_setcheck((PyDescrObject *)descr, obj, value) < 0) { return -1; } @@ -232,8 +239,9 @@ member_set(PyMemberDescrObject *descr, PyObject *obj, PyObject *value) } static int -getset_set(PyGetSetDescrObject *descr, PyObject *obj, PyObject *value) +getset_set(PyObject *self, PyObject *obj, PyObject *value) { + PyGetSetDescrObject *descr = (PyGetSetDescrObject *)self; if (descr_setcheck((PyDescrObject *)descr, obj, value) < 0) { return -1; } @@ -479,9 +487,10 @@ method_vectorcall_O( we implement this simply by calling __get__ and then calling the result. */ static PyObject * -classmethoddescr_call(PyMethodDescrObject *descr, PyObject *args, +classmethoddescr_call(PyObject *_descr, PyObject *args, PyObject *kwds) { + PyMethodDescrObject *descr = (PyMethodDescrObject *)_descr; Py_ssize_t argc = PyTuple_GET_SIZE(args); if (argc < 1) { PyErr_Format(PyExc_TypeError, @@ -492,7 +501,7 @@ classmethoddescr_call(PyMethodDescrObject *descr, PyObject *args, return NULL; } PyObject *self = PyTuple_GET_ITEM(args, 0); - PyObject *bound = classmethod_get(descr, NULL, self); + PyObject *bound = classmethod_get((PyObject *)descr, NULL, self); if (bound == NULL) { return NULL; } @@ -523,8 +532,9 @@ wrapperdescr_raw_call(PyWrapperDescrObject *descr, PyObject *self, } static PyObject * -wrapperdescr_call(PyWrapperDescrObject *descr, PyObject *args, PyObject *kwds) +wrapperdescr_call(PyObject *_descr, PyObject *args, PyObject *kwds) { + PyWrapperDescrObject *descr = (PyWrapperDescrObject *)_descr; Py_ssize_t argc; PyObject *self, *result; @@ -563,14 +573,16 @@ wrapperdescr_call(PyWrapperDescrObject *descr, PyObject *args, PyObject *kwds) static PyObject * -method_get_doc(PyMethodDescrObject *descr, void *closure) +method_get_doc(PyObject *_descr, void *closure) { + PyMethodDescrObject *descr = (PyMethodDescrObject *)_descr; return _PyType_GetDocFromInternalDoc(descr->d_method->ml_name, descr->d_method->ml_doc); } static PyObject * -method_get_text_signature(PyMethodDescrObject *descr, void *closure) +method_get_text_signature(PyObject *_descr, void *closure) { + PyMethodDescrObject *descr = (PyMethodDescrObject *)_descr; return _PyType_GetTextSignatureFromInternalDoc(descr->d_method->ml_name, descr->d_method->ml_doc, descr->d_method->ml_flags); @@ -605,22 +617,24 @@ calculate_qualname(PyDescrObject *descr) } static PyObject * -descr_get_qualname(PyDescrObject *descr, void *Py_UNUSED(ignored)) +descr_get_qualname(PyObject *self, void *Py_UNUSED(ignored)) { + PyDescrObject *descr = (PyDescrObject *)self; if (descr->d_qualname == NULL) descr->d_qualname = calculate_qualname(descr); return Py_XNewRef(descr->d_qualname); } static PyObject * -descr_reduce(PyDescrObject *descr, PyObject *Py_UNUSED(ignored)) +descr_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) { + PyDescrObject *descr = (PyDescrObject *)self; return Py_BuildValue("N(OO)", _PyEval_GetBuiltin(&_Py_ID(getattr)), PyDescr_TYPE(descr), PyDescr_NAME(descr)); } static PyMethodDef descr_methods[] = { - {"__reduce__", (PyCFunction)descr_reduce, METH_NOARGS, NULL}, + {"__reduce__", descr_reduce, METH_NOARGS, NULL}, {NULL, NULL} }; @@ -631,15 +645,16 @@ static PyMemberDef descr_members[] = { }; static PyGetSetDef method_getset[] = { - {"__doc__", (getter)method_get_doc}, - {"__qualname__", (getter)descr_get_qualname}, - {"__text_signature__", (getter)method_get_text_signature}, + {"__doc__", method_get_doc}, + {"__qualname__", descr_get_qualname}, + {"__text_signature__", method_get_text_signature}, {0} }; static PyObject * -member_get_doc(PyMemberDescrObject *descr, void *closure) +member_get_doc(PyObject *_descr, void *closure) { + PyMemberDescrObject *descr = (PyMemberDescrObject *)_descr; if (descr->d_member->doc == NULL) { Py_RETURN_NONE; } @@ -647,14 +662,15 @@ member_get_doc(PyMemberDescrObject *descr, void *closure) } static PyGetSetDef member_getset[] = { - {"__doc__", (getter)member_get_doc}, - {"__qualname__", (getter)descr_get_qualname}, + {"__doc__", member_get_doc}, + {"__qualname__", descr_get_qualname}, {0} }; static PyObject * -getset_get_doc(PyGetSetDescrObject *descr, void *closure) +getset_get_doc(PyObject *self, void *closure) { + PyGetSetDescrObject *descr = (PyGetSetDescrObject *)self; if (descr->d_getset->doc == NULL) { Py_RETURN_NONE; } @@ -662,28 +678,30 @@ getset_get_doc(PyGetSetDescrObject *descr, void *closure) } static PyGetSetDef getset_getset[] = { - {"__doc__", (getter)getset_get_doc}, - {"__qualname__", (getter)descr_get_qualname}, + {"__doc__", getset_get_doc}, + {"__qualname__", descr_get_qualname}, {0} }; static PyObject * -wrapperdescr_get_doc(PyWrapperDescrObject *descr, void *closure) +wrapperdescr_get_doc(PyObject *self, void *closure) { + PyWrapperDescrObject *descr = (PyWrapperDescrObject *)self; return _PyType_GetDocFromInternalDoc(descr->d_base->name, descr->d_base->doc); } static PyObject * -wrapperdescr_get_text_signature(PyWrapperDescrObject *descr, void *closure) +wrapperdescr_get_text_signature(PyObject *self, void *closure) { + PyWrapperDescrObject *descr = (PyWrapperDescrObject *)self; return _PyType_GetTextSignatureFromInternalDoc(descr->d_base->name, descr->d_base->doc, 0); } static PyGetSetDef wrapperdescr_getset[] = { - {"__doc__", (getter)wrapperdescr_get_doc}, - {"__qualname__", (getter)descr_get_qualname}, - {"__text_signature__", (getter)wrapperdescr_get_text_signature}, + {"__doc__", wrapperdescr_get_doc}, + {"__qualname__", descr_get_qualname}, + {"__text_signature__", wrapperdescr_get_text_signature}, {0} }; @@ -700,12 +718,12 @@ PyTypeObject PyMethodDescr_Type = { "method_descriptor", sizeof(PyMethodDescrObject), 0, - (destructor)descr_dealloc, /* tp_dealloc */ + descr_dealloc, /* tp_dealloc */ offsetof(PyMethodDescrObject, vectorcall), /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)method_repr, /* tp_repr */ + method_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -730,7 +748,7 @@ PyTypeObject PyMethodDescr_Type = { method_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ - (descrgetfunc)method_get, /* tp_descr_get */ + method_get, /* tp_descr_get */ 0, /* tp_descr_set */ }; @@ -740,17 +758,17 @@ PyTypeObject PyClassMethodDescr_Type = { "classmethod_descriptor", sizeof(PyMethodDescrObject), 0, - (destructor)descr_dealloc, /* tp_dealloc */ + descr_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)method_repr, /* tp_repr */ + method_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ - (ternaryfunc)classmethoddescr_call, /* tp_call */ + classmethoddescr_call, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ @@ -768,7 +786,7 @@ PyTypeObject PyClassMethodDescr_Type = { method_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ - (descrgetfunc)classmethod_get, /* tp_descr_get */ + classmethod_get, /* tp_descr_get */ 0, /* tp_descr_set */ }; @@ -777,12 +795,12 @@ PyTypeObject PyMemberDescr_Type = { "member_descriptor", sizeof(PyMemberDescrObject), 0, - (destructor)descr_dealloc, /* tp_dealloc */ + descr_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)member_repr, /* tp_repr */ + member_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -805,8 +823,8 @@ PyTypeObject PyMemberDescr_Type = { member_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ - (descrgetfunc)member_get, /* tp_descr_get */ - (descrsetfunc)member_set, /* tp_descr_set */ + member_get, /* tp_descr_get */ + member_set, /* tp_descr_set */ }; PyTypeObject PyGetSetDescr_Type = { @@ -814,12 +832,12 @@ PyTypeObject PyGetSetDescr_Type = { "getset_descriptor", sizeof(PyGetSetDescrObject), 0, - (destructor)descr_dealloc, /* tp_dealloc */ + descr_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)getset_repr, /* tp_repr */ + getset_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -842,8 +860,8 @@ PyTypeObject PyGetSetDescr_Type = { getset_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ - (descrgetfunc)getset_get, /* tp_descr_get */ - (descrsetfunc)getset_set, /* tp_descr_set */ + getset_get, /* tp_descr_get */ + getset_set, /* tp_descr_set */ }; PyTypeObject PyWrapperDescr_Type = { @@ -851,17 +869,17 @@ PyTypeObject PyWrapperDescr_Type = { "wrapper_descriptor", sizeof(PyWrapperDescrObject), 0, - (destructor)descr_dealloc, /* tp_dealloc */ + descr_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)wrapperdescr_repr, /* tp_repr */ + wrapperdescr_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ - (ternaryfunc)wrapperdescr_call, /* tp_call */ + wrapperdescr_call, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ @@ -880,7 +898,7 @@ PyTypeObject PyWrapperDescr_Type = { wrapperdescr_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ - (descrgetfunc)wrapperdescr_get, /* tp_descr_get */ + wrapperdescr_get, /* tp_descr_get */ 0, /* tp_descr_set */ }; @@ -1022,20 +1040,22 @@ typedef struct { } mappingproxyobject; static Py_ssize_t -mappingproxy_len(mappingproxyobject *pp) +mappingproxy_len(PyObject *self) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_Size(pp->mapping); } static PyObject * -mappingproxy_getitem(mappingproxyobject *pp, PyObject *key) +mappingproxy_getitem(PyObject *self, PyObject *key) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_GetItem(pp->mapping, key); } static PyMappingMethods mappingproxy_as_mapping = { - (lenfunc)mappingproxy_len, /* mp_length */ - (binaryfunc)mappingproxy_getitem, /* mp_subscript */ + mappingproxy_len, /* mp_length */ + mappingproxy_getitem, /* mp_subscript */ 0, /* mp_ass_subscript */ }; @@ -1064,8 +1084,9 @@ static PyNumberMethods mappingproxy_as_number = { }; static int -mappingproxy_contains(mappingproxyobject *pp, PyObject *key) +mappingproxy_contains(PyObject *self, PyObject *key) { + mappingproxyobject *pp = (mappingproxyobject *)self; if (PyDict_CheckExact(pp->mapping)) return PyDict_Contains(pp->mapping, key); else @@ -1080,14 +1101,15 @@ static PySequenceMethods mappingproxy_as_sequence = { 0, /* sq_slice */ 0, /* sq_ass_item */ 0, /* sq_ass_slice */ - (objobjproc)mappingproxy_contains, /* sq_contains */ + mappingproxy_contains, /* sq_contains */ 0, /* sq_inplace_concat */ 0, /* sq_inplace_repeat */ }; static PyObject * -mappingproxy_get(mappingproxyobject *pp, PyObject *const *args, Py_ssize_t nargs) +mappingproxy_get(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { + mappingproxyobject *pp = (mappingproxyobject *)self; /* newargs: mapping, key, default=None */ PyObject *newargs[3]; newargs[0] = pp->mapping; @@ -1104,32 +1126,37 @@ mappingproxy_get(mappingproxyobject *pp, PyObject *const *args, Py_ssize_t nargs } static PyObject * -mappingproxy_keys(mappingproxyobject *pp, PyObject *Py_UNUSED(ignored)) +mappingproxy_keys(PyObject *self, PyObject *Py_UNUSED(ignored)) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_CallMethodNoArgs(pp->mapping, &_Py_ID(keys)); } static PyObject * -mappingproxy_values(mappingproxyobject *pp, PyObject *Py_UNUSED(ignored)) +mappingproxy_values(PyObject *self, PyObject *Py_UNUSED(ignored)) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_CallMethodNoArgs(pp->mapping, &_Py_ID(values)); } static PyObject * -mappingproxy_items(mappingproxyobject *pp, PyObject *Py_UNUSED(ignored)) +mappingproxy_items(PyObject *self, PyObject *Py_UNUSED(ignored)) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_CallMethodNoArgs(pp->mapping, &_Py_ID(items)); } static PyObject * -mappingproxy_copy(mappingproxyobject *pp, PyObject *Py_UNUSED(ignored)) +mappingproxy_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_CallMethodNoArgs(pp->mapping, &_Py_ID(copy)); } static PyObject * -mappingproxy_reversed(mappingproxyobject *pp, PyObject *Py_UNUSED(ignored)) +mappingproxy_reversed(PyObject *self, PyObject *Py_UNUSED(ignored)) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_CallMethodNoArgs(pp->mapping, &_Py_ID(__reversed__)); } @@ -1140,50 +1167,55 @@ static PyMethodDef mappingproxy_methods[] = { {"get", _PyCFunction_CAST(mappingproxy_get), METH_FASTCALL, PyDoc_STR("D.get(k[,d]) -> D[k] if k in D, else d." " d defaults to None.")}, - {"keys", (PyCFunction)mappingproxy_keys, METH_NOARGS, + {"keys", mappingproxy_keys, METH_NOARGS, PyDoc_STR("D.keys() -> a set-like object providing a view on D's keys")}, - {"values", (PyCFunction)mappingproxy_values, METH_NOARGS, + {"values", mappingproxy_values, METH_NOARGS, PyDoc_STR("D.values() -> an object providing a view on D's values")}, - {"items", (PyCFunction)mappingproxy_items, METH_NOARGS, + {"items", mappingproxy_items, METH_NOARGS, PyDoc_STR("D.items() -> a set-like object providing a view on D's items")}, - {"copy", (PyCFunction)mappingproxy_copy, METH_NOARGS, + {"copy", mappingproxy_copy, METH_NOARGS, PyDoc_STR("D.copy() -> a shallow copy of D")}, {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, - {"__reversed__", (PyCFunction)mappingproxy_reversed, METH_NOARGS, + {"__reversed__", mappingproxy_reversed, METH_NOARGS, PyDoc_STR("D.__reversed__() -> reverse iterator")}, {0} }; static void -mappingproxy_dealloc(mappingproxyobject *pp) +mappingproxy_dealloc(PyObject *self) { + mappingproxyobject *pp = (mappingproxyobject *)self; _PyObject_GC_UNTRACK(pp); Py_DECREF(pp->mapping); PyObject_GC_Del(pp); } static PyObject * -mappingproxy_getiter(mappingproxyobject *pp) +mappingproxy_getiter(PyObject *self) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_GetIter(pp->mapping); } static Py_hash_t -mappingproxy_hash(mappingproxyobject *pp) +mappingproxy_hash(PyObject *self) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_Hash(pp->mapping); } static PyObject * -mappingproxy_str(mappingproxyobject *pp) +mappingproxy_str(PyObject *self) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyObject_Str(pp->mapping); } static PyObject * -mappingproxy_repr(mappingproxyobject *pp) +mappingproxy_repr(PyObject *self) { + mappingproxyobject *pp = (mappingproxyobject *)self; return PyUnicode_FromFormat("mappingproxy(%R)", pp->mapping); } @@ -1196,8 +1228,9 @@ mappingproxy_traverse(PyObject *self, visitproc visit, void *arg) } static PyObject * -mappingproxy_richcompare(mappingproxyobject *v, PyObject *w, int op) +mappingproxy_richcompare(PyObject *self, PyObject *w, int op) { + mappingproxyobject *v = (mappingproxyobject *)self; return PyObject_RichCompare(v->mapping, w, op); } @@ -1271,8 +1304,9 @@ typedef struct { #define Wrapper_Check(v) Py_IS_TYPE(v, &_PyMethodWrapper_Type) static void -wrapper_dealloc(wrapperobject *wp) +wrapper_dealloc(PyObject *self) { + wrapperobject *wp = (wrapperobject *)self; PyObject_GC_UnTrack(wp); Py_TRASHCAN_BEGIN(wp, wrapper_dealloc) Py_XDECREF(wp->descr); @@ -1308,8 +1342,9 @@ wrapper_richcompare(PyObject *a, PyObject *b, int op) } static Py_hash_t -wrapper_hash(wrapperobject *wp) +wrapper_hash(PyObject *self) { + wrapperobject *wp = (wrapperobject *)self; Py_hash_t x, y; x = _Py_HashPointer(wp->self); y = _Py_HashPointer(wp->descr); @@ -1320,8 +1355,9 @@ wrapper_hash(wrapperobject *wp) } static PyObject * -wrapper_repr(wrapperobject *wp) +wrapper_repr(PyObject *self) { + wrapperobject *wp = (wrapperobject *)self; return PyUnicode_FromFormat("", wp->descr->d_base->name, Py_TYPE(wp->self)->tp_name, @@ -1329,14 +1365,15 @@ wrapper_repr(wrapperobject *wp) } static PyObject * -wrapper_reduce(wrapperobject *wp, PyObject *Py_UNUSED(ignored)) +wrapper_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) { + wrapperobject *wp = (wrapperobject *)self; return Py_BuildValue("N(OO)", _PyEval_GetBuiltin(&_Py_ID(getattr)), wp->self, PyDescr_NAME(wp->descr)); } static PyMethodDef wrapper_methods[] = { - {"__reduce__", (PyCFunction)wrapper_reduce, METH_NOARGS, NULL}, + {"__reduce__", wrapper_reduce, METH_NOARGS, NULL}, {NULL, NULL} }; @@ -1346,52 +1383,56 @@ static PyMemberDef wrapper_members[] = { }; static PyObject * -wrapper_objclass(wrapperobject *wp, void *Py_UNUSED(ignored)) +wrapper_objclass(PyObject *wp, void *Py_UNUSED(ignored)) { - PyObject *c = (PyObject *)PyDescr_TYPE(wp->descr); + PyObject *c = (PyObject *)PyDescr_TYPE(((wrapperobject *)wp)->descr); return Py_NewRef(c); } static PyObject * -wrapper_name(wrapperobject *wp, void *Py_UNUSED(ignored)) +wrapper_name(PyObject *wp, void *Py_UNUSED(ignored)) { - const char *s = wp->descr->d_base->name; + const char *s = ((wrapperobject *)wp)->descr->d_base->name; return PyUnicode_FromString(s); } static PyObject * -wrapper_doc(wrapperobject *wp, void *Py_UNUSED(ignored)) +wrapper_doc(PyObject *self, void *Py_UNUSED(ignored)) { + wrapperobject *wp = (wrapperobject *)self; return _PyType_GetDocFromInternalDoc(wp->descr->d_base->name, wp->descr->d_base->doc); } static PyObject * -wrapper_text_signature(wrapperobject *wp, void *Py_UNUSED(ignored)) +wrapper_text_signature(PyObject *self, void *Py_UNUSED(ignored)) { + wrapperobject *wp = (wrapperobject *)self; return _PyType_GetTextSignatureFromInternalDoc(wp->descr->d_base->name, wp->descr->d_base->doc, 0); } static PyObject * -wrapper_qualname(wrapperobject *wp, void *Py_UNUSED(ignored)) +wrapper_qualname(PyObject *self, void *Py_UNUSED(ignored)) { - return descr_get_qualname((PyDescrObject *)wp->descr, NULL); + wrapperobject *wp = (wrapperobject *)self; + return descr_get_qualname((PyObject *)wp->descr, NULL); } static PyGetSetDef wrapper_getsets[] = { - {"__objclass__", (getter)wrapper_objclass}, - {"__name__", (getter)wrapper_name}, - {"__qualname__", (getter)wrapper_qualname}, - {"__doc__", (getter)wrapper_doc}, - {"__text_signature__", (getter)wrapper_text_signature}, + {"__objclass__", wrapper_objclass}, + {"__name__", wrapper_name}, + {"__qualname__", wrapper_qualname}, + {"__doc__", wrapper_doc}, + {"__text_signature__", wrapper_text_signature}, {0} }; static PyObject * -wrapper_call(wrapperobject *wp, PyObject *args, PyObject *kwds) +wrapper_call(PyObject *self, PyObject *args, PyObject *kwds) { + wrapperobject *wp = (wrapperobject *)self; return wrapperdescr_raw_call(wp->descr, wp->self, args, kwds); } @@ -1410,17 +1451,17 @@ PyTypeObject _PyMethodWrapper_Type = { sizeof(wrapperobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)wrapper_dealloc, /* tp_dealloc */ + wrapper_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)wrapper_repr, /* tp_repr */ + wrapper_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ - (hashfunc)wrapper_hash, /* tp_hash */ - (ternaryfunc)wrapper_call, /* tp_call */ + wrapper_hash, /* tp_hash */ + wrapper_call, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ @@ -1910,18 +1951,18 @@ PyTypeObject PyDictProxy_Type = { sizeof(mappingproxyobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)mappingproxy_dealloc, /* tp_dealloc */ + mappingproxy_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)mappingproxy_repr, /* tp_repr */ + mappingproxy_repr, /* tp_repr */ &mappingproxy_as_number, /* tp_as_number */ &mappingproxy_as_sequence, /* tp_as_sequence */ &mappingproxy_as_mapping, /* tp_as_mapping */ - (hashfunc)mappingproxy_hash, /* tp_hash */ + mappingproxy_hash, /* tp_hash */ 0, /* tp_call */ - (reprfunc)mappingproxy_str, /* tp_str */ + mappingproxy_str, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ @@ -1930,9 +1971,9 @@ PyTypeObject PyDictProxy_Type = { 0, /* tp_doc */ mappingproxy_traverse, /* tp_traverse */ 0, /* tp_clear */ - (richcmpfunc)mappingproxy_richcompare, /* tp_richcompare */ + mappingproxy_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - (getiterfunc)mappingproxy_getiter, /* tp_iter */ + mappingproxy_getiter, /* tp_iter */ 0, /* tp_iternext */ mappingproxy_methods, /* tp_methods */ 0, /* tp_members */ @@ -1972,7 +2013,7 @@ PyTypeObject PyProperty_Type = { Py_TPFLAGS_BASETYPE, /* tp_flags */ property_init__doc__, /* tp_doc */ property_traverse, /* tp_traverse */ - (inquiry)property_clear, /* tp_clear */ + property_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ From a1eea1d032d4436131716aec3d8936042bcbfa9d Mon Sep 17 00:00:00 2001 From: Christopher Chavez Date: Tue, 2 Jan 2024 08:32:37 -0600 Subject: [PATCH 37/71] gh-111178: Avoid calling functions from incompatible pointer types in dictobject.c (#112892) Fix undefined behavior warnings (UBSan -fsanitize=function). --- Objects/dictobject.c | 226 ++++++++++++++++++++++++------------------- 1 file changed, 126 insertions(+), 100 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 70f424e07ece9a..2482a918ba983b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -237,7 +237,7 @@ equally good collision statistics, needed less code & used less memory. static int dictresize(PyInterpreterState *interp, PyDictObject *mp, uint8_t log_newsize, int unicode); -static PyObject* dict_iter(PyDictObject *dict); +static PyObject* dict_iter(PyObject *dict); #include "clinic/dictobject.c.h" @@ -792,7 +792,7 @@ static PyDictKeysObject * clone_combined_dict_keys(PyDictObject *orig) { assert(PyDict_Check(orig)); - assert(Py_TYPE(orig)->tp_iter == (getiterfunc)dict_iter); + assert(Py_TYPE(orig)->tp_iter == dict_iter); assert(orig->ma_values == NULL); assert(orig->ma_keys != Py_EMPTY_KEYS); assert(orig->ma_keys->dk_refcnt == 1); @@ -2450,8 +2450,9 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) /* Methods */ static void -dict_dealloc(PyDictObject *mp) +dict_dealloc(PyObject *self) { + PyDictObject *mp = (PyDictObject *)self; PyInterpreterState *interp = _PyInterpreterState_GET(); assert(Py_REFCNT(mp) == 0); Py_SET_REFCNT(mp, 1); @@ -2499,8 +2500,9 @@ dict_dealloc(PyDictObject *mp) static PyObject * -dict_repr(PyDictObject *mp) +dict_repr(PyObject *self) { + PyDictObject *mp = (PyDictObject *)self; Py_ssize_t i; PyObject *key = NULL, *value = NULL; _PyUnicodeWriter writer; @@ -2582,14 +2584,16 @@ dict_repr(PyDictObject *mp) } static Py_ssize_t -dict_length(PyDictObject *mp) +dict_length(PyObject *self) { + PyDictObject *mp = (PyDictObject *)self; return mp->ma_used; } static PyObject * -dict_subscript(PyDictObject *mp, PyObject *key) +dict_subscript(PyObject *self, PyObject *key) { + PyDictObject *mp = (PyDictObject *)self; Py_ssize_t ix; Py_hash_t hash; PyObject *value; @@ -2623,18 +2627,18 @@ dict_subscript(PyDictObject *mp, PyObject *key) } static int -dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w) +dict_ass_sub(PyObject *mp, PyObject *v, PyObject *w) { if (w == NULL) - return PyDict_DelItem((PyObject *)mp, v); + return PyDict_DelItem(mp, v); else - return PyDict_SetItem((PyObject *)mp, v, w); + return PyDict_SetItem(mp, v, w); } static PyMappingMethods dict_as_mapping = { - (lenfunc)dict_length, /*mp_length*/ - (binaryfunc)dict_subscript, /*mp_subscript*/ - (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/ + dict_length, /*mp_length*/ + dict_subscript, /*mp_subscript*/ + dict_ass_sub, /*mp_ass_subscript*/ }; static PyObject * @@ -2925,7 +2929,7 @@ dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) return -1; } mp = (PyDictObject*)a; - if (PyDict_Check(b) && (Py_TYPE(b)->tp_iter == (getiterfunc)dict_iter)) { + if (PyDict_Check(b) && (Py_TYPE(b)->tp_iter == dict_iter)) { other = (PyDictObject*)b; if (other == mp || other->ma_used == 0) /* a.update(a) or a.update({}); nothing to do */ @@ -3105,9 +3109,9 @@ _PyDict_MergeEx(PyObject *a, PyObject *b, int override) } static PyObject * -dict_copy(PyDictObject *mp, PyObject *Py_UNUSED(ignored)) +dict_copy(PyObject *mp, PyObject *Py_UNUSED(ignored)) { - return PyDict_Copy((PyObject*)mp); + return PyDict_Copy(mp); } PyObject * @@ -3155,7 +3159,7 @@ PyDict_Copy(PyObject *o) return (PyObject *)split_copy; } - if (Py_TYPE(mp)->tp_iter == (getiterfunc)dict_iter && + if (Py_TYPE(mp)->tp_iter == dict_iter && mp->ma_values == NULL && (mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3)) { @@ -3509,9 +3513,9 @@ dict_setdefault_impl(PyDictObject *self, PyObject *key, } static PyObject * -dict_clear(PyDictObject *mp, PyObject *Py_UNUSED(ignored)) +dict_clear(PyObject *mp, PyObject *Py_UNUSED(ignored)) { - PyDict_Clear((PyObject *)mp); + PyDict_Clear(mp); Py_RETURN_NONE; } @@ -3700,8 +3704,9 @@ _PyDict_KeysSize(PyDictKeysObject *keys) } static PyObject * -dict_sizeof(PyDictObject *mp, PyObject *Py_UNUSED(ignored)) +dict_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { + PyDictObject *mp = (PyDictObject *)self; return PyLong_FromSsize_t(_PyDict_SizeOf(mp)); } @@ -3763,9 +3768,9 @@ PyDoc_STRVAR(values__doc__, static PyMethodDef mapp_methods[] = { DICT___CONTAINS___METHODDEF - {"__getitem__", _PyCFunction_CAST(dict_subscript), METH_O | METH_COEXIST, + {"__getitem__", dict_subscript, METH_O | METH_COEXIST, getitem__doc__}, - {"__sizeof__", _PyCFunction_CAST(dict_sizeof), METH_NOARGS, + {"__sizeof__", dict_sizeof, METH_NOARGS, sizeof__doc__}, DICT_GET_METHODDEF DICT_SETDEFAULT_METHODDEF @@ -3780,9 +3785,9 @@ static PyMethodDef mapp_methods[] = { {"update", _PyCFunction_CAST(dict_update), METH_VARARGS | METH_KEYWORDS, update__doc__}, DICT_FROMKEYS_METHODDEF - {"clear", (PyCFunction)dict_clear, METH_NOARGS, + {"clear", dict_clear, METH_NOARGS, clear__doc__}, - {"copy", (PyCFunction)dict_copy, METH_NOARGS, + {"copy", dict_copy, METH_NOARGS, copy__doc__}, DICT___REVERSED___METHODDEF {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, @@ -3937,8 +3942,9 @@ dict_vectorcall(PyObject *type, PyObject * const*args, } static PyObject * -dict_iter(PyDictObject *dict) +dict_iter(PyObject *self) { + PyDictObject *dict = (PyDictObject *)self; return dictiter_new(dict, &PyDictIterKey_Type); } @@ -3958,12 +3964,12 @@ PyTypeObject PyDict_Type = { "dict", sizeof(PyDictObject), 0, - (destructor)dict_dealloc, /* tp_dealloc */ + dict_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)dict_repr, /* tp_repr */ + dict_repr, /* tp_repr */ &dict_as_number, /* tp_as_number */ &dict_as_sequence, /* tp_as_sequence */ &dict_as_mapping, /* tp_as_mapping */ @@ -3981,7 +3987,7 @@ PyTypeObject PyDict_Type = { dict_tp_clear, /* tp_clear */ dict_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - (getiterfunc)dict_iter, /* tp_iter */ + dict_iter, /* tp_iter */ 0, /* tp_iternext */ mapp_methods, /* tp_methods */ 0, /* tp_members */ @@ -4128,8 +4134,9 @@ dictiter_new(PyDictObject *dict, PyTypeObject *itertype) } static void -dictiter_dealloc(dictiterobject *di) +dictiter_dealloc(PyObject *self) { + dictiterobject *di = (dictiterobject *)self; /* bpo-31095: UnTrack is needed before calling any callbacks */ _PyObject_GC_UNTRACK(di); Py_XDECREF(di->di_dict); @@ -4138,16 +4145,18 @@ dictiter_dealloc(dictiterobject *di) } static int -dictiter_traverse(dictiterobject *di, visitproc visit, void *arg) +dictiter_traverse(PyObject *self, visitproc visit, void *arg) { + dictiterobject *di = (dictiterobject *)self; Py_VISIT(di->di_dict); Py_VISIT(di->di_result); return 0; } static PyObject * -dictiter_len(dictiterobject *di, PyObject *Py_UNUSED(ignored)) +dictiter_len(PyObject *self, PyObject *Py_UNUSED(ignored)) { + dictiterobject *di = (dictiterobject *)self; Py_ssize_t len = 0; if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used) len = di->len; @@ -4158,21 +4167,22 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); static PyObject * -dictiter_reduce(dictiterobject *di, PyObject *Py_UNUSED(ignored)); +dictiter_reduce(PyObject *di, PyObject *Py_UNUSED(ignored)); PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); static PyMethodDef dictiter_methods[] = { - {"__length_hint__", _PyCFunction_CAST(dictiter_len), METH_NOARGS, + {"__length_hint__", dictiter_len, METH_NOARGS, length_hint_doc}, - {"__reduce__", _PyCFunction_CAST(dictiter_reduce), METH_NOARGS, + {"__reduce__", dictiter_reduce, METH_NOARGS, reduce_doc}, {NULL, NULL} /* sentinel */ }; static PyObject* -dictiter_iternextkey(dictiterobject *di) +dictiter_iternextkey(PyObject *self) { + dictiterobject *di = (dictiterobject *)self; PyObject *key; Py_ssize_t i; PyDictKeysObject *k; @@ -4244,7 +4254,7 @@ PyTypeObject PyDictIterKey_Type = { sizeof(dictiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)dictiter_dealloc, /* tp_dealloc */ + dictiter_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -4261,19 +4271,20 @@ PyTypeObject PyDictIterKey_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ - (traverseproc)dictiter_traverse, /* tp_traverse */ + dictiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ - (iternextfunc)dictiter_iternextkey, /* tp_iternext */ + dictiter_iternextkey, /* tp_iternext */ dictiter_methods, /* tp_methods */ 0, }; static PyObject * -dictiter_iternextvalue(dictiterobject *di) +dictiter_iternextvalue(PyObject *self) { + dictiterobject *di = (dictiterobject *)self; PyObject *value; Py_ssize_t i; PyDictObject *d = di->di_dict; @@ -4343,7 +4354,7 @@ PyTypeObject PyDictIterValue_Type = { sizeof(dictiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)dictiter_dealloc, /* tp_dealloc */ + dictiter_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -4360,19 +4371,20 @@ PyTypeObject PyDictIterValue_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ - (traverseproc)dictiter_traverse, /* tp_traverse */ + dictiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ - (iternextfunc)dictiter_iternextvalue, /* tp_iternext */ + dictiter_iternextvalue, /* tp_iternext */ dictiter_methods, /* tp_methods */ 0, }; static PyObject * -dictiter_iternextitem(dictiterobject *di) +dictiter_iternextitem(PyObject *self) { + dictiterobject *di = (dictiterobject *)self; PyObject *key, *value, *result; Py_ssize_t i; PyDictObject *d = di->di_dict; @@ -4467,7 +4479,7 @@ PyTypeObject PyDictIterItem_Type = { sizeof(dictiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)dictiter_dealloc, /* tp_dealloc */ + dictiter_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -4484,12 +4496,12 @@ PyTypeObject PyDictIterItem_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ - (traverseproc)dictiter_traverse, /* tp_traverse */ + dictiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ - (iternextfunc)dictiter_iternextitem, /* tp_iternext */ + dictiter_iternextitem, /* tp_iternext */ dictiter_methods, /* tp_methods */ 0, }; @@ -4498,8 +4510,9 @@ PyTypeObject PyDictIterItem_Type = { /* dictreviter */ static PyObject * -dictreviter_iternext(dictiterobject *di) +dictreviter_iternext(PyObject *self) { + dictiterobject *di = (dictiterobject *)self; PyDictObject *d = di->di_dict; if (d == NULL) { @@ -4600,11 +4613,11 @@ PyTypeObject PyDictRevIterKey_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "dict_reversekeyiterator", sizeof(dictiterobject), - .tp_dealloc = (destructor)dictiter_dealloc, + .tp_dealloc = dictiter_dealloc, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, - .tp_traverse = (traverseproc)dictiter_traverse, + .tp_traverse = dictiter_traverse, .tp_iter = PyObject_SelfIter, - .tp_iternext = (iternextfunc)dictreviter_iternext, + .tp_iternext = dictreviter_iternext, .tp_methods = dictiter_methods }; @@ -4624,8 +4637,9 @@ dict___reversed___impl(PyDictObject *self) } static PyObject * -dictiter_reduce(dictiterobject *di, PyObject *Py_UNUSED(ignored)) +dictiter_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) { + dictiterobject *di = (dictiterobject *)self; /* copy the iterator state */ dictiterobject tmp = *di; Py_XINCREF(tmp.di_dict); @@ -4641,11 +4655,11 @@ PyTypeObject PyDictRevIterItem_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "dict_reverseitemiterator", sizeof(dictiterobject), - .tp_dealloc = (destructor)dictiter_dealloc, + .tp_dealloc = dictiter_dealloc, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, - .tp_traverse = (traverseproc)dictiter_traverse, + .tp_traverse = dictiter_traverse, .tp_iter = PyObject_SelfIter, - .tp_iternext = (iternextfunc)dictreviter_iternext, + .tp_iternext = dictreviter_iternext, .tp_methods = dictiter_methods }; @@ -4653,11 +4667,11 @@ PyTypeObject PyDictRevIterValue_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "dict_reversevalueiterator", sizeof(dictiterobject), - .tp_dealloc = (destructor)dictiter_dealloc, + .tp_dealloc = dictiter_dealloc, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, - .tp_traverse = (traverseproc)dictiter_traverse, + .tp_traverse = dictiter_traverse, .tp_iter = PyObject_SelfIter, - .tp_iternext = (iternextfunc)dictreviter_iternext, + .tp_iternext = dictreviter_iternext, .tp_methods = dictiter_methods }; @@ -4668,8 +4682,9 @@ PyTypeObject PyDictRevIterValue_Type = { /* The instance lay-out is the same for all three; but the type differs. */ static void -dictview_dealloc(_PyDictViewObject *dv) +dictview_dealloc(PyObject *self) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; /* bpo-31095: UnTrack is needed before calling any callbacks */ _PyObject_GC_UNTRACK(dv); Py_XDECREF(dv->dv_dict); @@ -4677,15 +4692,17 @@ dictview_dealloc(_PyDictViewObject *dv) } static int -dictview_traverse(_PyDictViewObject *dv, visitproc visit, void *arg) +dictview_traverse(PyObject *self, visitproc visit, void *arg) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; Py_VISIT(dv->dv_dict); return 0; } static Py_ssize_t -dictview_len(_PyDictViewObject *dv) +dictview_len(PyObject *self) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; Py_ssize_t len = 0; if (dv->dv_dict != NULL) len = dv->dv_dict->ma_used; @@ -4825,8 +4842,9 @@ dictview_richcompare(PyObject *self, PyObject *other, int op) } static PyObject * -dictview_repr(_PyDictViewObject *dv) +dictview_repr(PyObject *self) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; PyObject *seq; PyObject *result = NULL; Py_ssize_t rc; @@ -4850,8 +4868,9 @@ dictview_repr(_PyDictViewObject *dv) /*** dict_keys ***/ static PyObject * -dictkeys_iter(_PyDictViewObject *dv) +dictkeys_iter(PyObject *self) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; if (dv->dv_dict == NULL) { Py_RETURN_NONE; } @@ -4859,22 +4878,23 @@ dictkeys_iter(_PyDictViewObject *dv) } static int -dictkeys_contains(_PyDictViewObject *dv, PyObject *obj) +dictkeys_contains(PyObject *self, PyObject *obj) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; if (dv->dv_dict == NULL) return 0; return PyDict_Contains((PyObject *)dv->dv_dict, obj); } static PySequenceMethods dictkeys_as_sequence = { - (lenfunc)dictview_len, /* sq_length */ + dictview_len, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ 0, /* sq_item */ 0, /* sq_slice */ 0, /* sq_ass_item */ 0, /* sq_ass_slice */ - (objobjproc)dictkeys_contains, /* sq_contains */ + dictkeys_contains, /* sq_contains */ }; // Create a set object from dictviews object. @@ -4914,7 +4934,7 @@ dictviews_sub(PyObject *self, PyObject *other) } static int -dictitems_contains(_PyDictViewObject *dv, PyObject *obj); +dictitems_contains(PyObject *dv, PyObject *obj); PyObject * _PyDictView_Intersect(PyObject* self, PyObject *other) @@ -4924,7 +4944,7 @@ _PyDictView_Intersect(PyObject* self, PyObject *other) PyObject *key; Py_ssize_t len_self; int rv; - int (*dict_contains)(_PyDictViewObject *, PyObject *); + objobjproc dict_contains; /* Python interpreter swaps parameters when dict view is on right side of & */ @@ -4934,7 +4954,7 @@ _PyDictView_Intersect(PyObject* self, PyObject *other) self = tmp; } - len_self = dictview_len((_PyDictViewObject *)self); + len_self = dictview_len(self); /* if other is a set and self is smaller than other, reuse set intersection logic */ @@ -4946,7 +4966,7 @@ _PyDictView_Intersect(PyObject* self, PyObject *other) /* if other is another dict view, and it is bigger than self, swap them */ if (PyDictViewSet_Check(other)) { - Py_ssize_t len_other = dictview_len((_PyDictViewObject *)other); + Py_ssize_t len_other = dictview_len(other); if (len_other > len_self) { PyObject *tmp = other; other = self; @@ -4976,7 +4996,7 @@ _PyDictView_Intersect(PyObject* self, PyObject *other) } while ((key = PyIter_Next(it)) != NULL) { - rv = dict_contains((_PyDictViewObject *)self, key); + rv = dict_contains(self, key); if (rv < 0) { goto error; } @@ -5150,7 +5170,7 @@ dictviews_isdisjoint(PyObject *self, PyObject *other) PyObject *item = NULL; if (self == other) { - if (dictview_len((_PyDictViewObject *)self) == 0) + if (dictview_len(self) == 0) Py_RETURN_TRUE; else Py_RETURN_FALSE; @@ -5159,7 +5179,7 @@ dictviews_isdisjoint(PyObject *self, PyObject *other) /* Iterate over the shorter object (only if other is a set, * because PySequence_Contains may be expensive otherwise): */ if (PyAnySet_Check(other) || PyDictViewSet_Check(other)) { - Py_ssize_t len_self = dictview_len((_PyDictViewObject *)self); + Py_ssize_t len_self = dictview_len(self); Py_ssize_t len_other = PyObject_Size(other); if (len_other == -1) return NULL; @@ -5197,15 +5217,15 @@ dictviews_isdisjoint(PyObject *self, PyObject *other) PyDoc_STRVAR(isdisjoint_doc, "Return True if the view and the given iterable have a null intersection."); -static PyObject* dictkeys_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)); +static PyObject* dictkeys_reversed(PyObject *dv, PyObject *Py_UNUSED(ignored)); PyDoc_STRVAR(reversed_keys_doc, "Return a reverse iterator over the dict keys."); static PyMethodDef dictkeys_methods[] = { - {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O, + {"isdisjoint", dictviews_isdisjoint, METH_O, isdisjoint_doc}, - {"__reversed__", _PyCFunction_CAST(dictkeys_reversed), METH_NOARGS, + {"__reversed__", dictkeys_reversed, METH_NOARGS, reversed_keys_doc}, {NULL, NULL} /* sentinel */ }; @@ -5216,12 +5236,12 @@ PyTypeObject PyDictKeys_Type = { sizeof(_PyDictViewObject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)dictview_dealloc, /* tp_dealloc */ + dictview_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)dictview_repr, /* tp_repr */ + dictview_repr, /* tp_repr */ &dictviews_as_number, /* tp_as_number */ &dictkeys_as_sequence, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -5233,11 +5253,11 @@ PyTypeObject PyDictKeys_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ - (traverseproc)dictview_traverse, /* tp_traverse */ + dictview_traverse, /* tp_traverse */ 0, /* tp_clear */ dictview_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - (getiterfunc)dictkeys_iter, /* tp_iter */ + dictkeys_iter, /* tp_iter */ 0, /* tp_iternext */ dictkeys_methods, /* tp_methods */ .tp_getset = dictview_getset, @@ -5250,8 +5270,9 @@ dictkeys_new(PyObject *dict, PyObject *Py_UNUSED(ignored)) } static PyObject * -dictkeys_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)) +dictkeys_reversed(PyObject *self, PyObject *Py_UNUSED(ignored)) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; if (dv->dv_dict == NULL) { Py_RETURN_NONE; } @@ -5261,8 +5282,9 @@ dictkeys_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)) /*** dict_items ***/ static PyObject * -dictitems_iter(_PyDictViewObject *dv) +dictitems_iter(PyObject *self) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; if (dv->dv_dict == NULL) { Py_RETURN_NONE; } @@ -5270,8 +5292,9 @@ dictitems_iter(_PyDictViewObject *dv) } static int -dictitems_contains(_PyDictViewObject *dv, PyObject *obj) +dictitems_contains(PyObject *self, PyObject *obj) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; int result; PyObject *key, *value, *found; if (dv->dv_dict == NULL) @@ -5289,25 +5312,25 @@ dictitems_contains(_PyDictViewObject *dv, PyObject *obj) } static PySequenceMethods dictitems_as_sequence = { - (lenfunc)dictview_len, /* sq_length */ + dictview_len, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ 0, /* sq_item */ 0, /* sq_slice */ 0, /* sq_ass_item */ 0, /* sq_ass_slice */ - (objobjproc)dictitems_contains, /* sq_contains */ + dictitems_contains, /* sq_contains */ }; -static PyObject* dictitems_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)); +static PyObject* dictitems_reversed(PyObject *dv, PyObject *Py_UNUSED(ignored)); PyDoc_STRVAR(reversed_items_doc, "Return a reverse iterator over the dict items."); static PyMethodDef dictitems_methods[] = { - {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O, + {"isdisjoint", dictviews_isdisjoint, METH_O, isdisjoint_doc}, - {"__reversed__", (PyCFunction)dictitems_reversed, METH_NOARGS, + {"__reversed__", dictitems_reversed, METH_NOARGS, reversed_items_doc}, {NULL, NULL} /* sentinel */ }; @@ -5318,12 +5341,12 @@ PyTypeObject PyDictItems_Type = { sizeof(_PyDictViewObject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)dictview_dealloc, /* tp_dealloc */ + dictview_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)dictview_repr, /* tp_repr */ + dictview_repr, /* tp_repr */ &dictviews_as_number, /* tp_as_number */ &dictitems_as_sequence, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -5335,11 +5358,11 @@ PyTypeObject PyDictItems_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ - (traverseproc)dictview_traverse, /* tp_traverse */ + dictview_traverse, /* tp_traverse */ 0, /* tp_clear */ dictview_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - (getiterfunc)dictitems_iter, /* tp_iter */ + dictitems_iter, /* tp_iter */ 0, /* tp_iternext */ dictitems_methods, /* tp_methods */ .tp_getset = dictview_getset, @@ -5352,8 +5375,9 @@ dictitems_new(PyObject *dict, PyObject *Py_UNUSED(ignored)) } static PyObject * -dictitems_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)) +dictitems_reversed(PyObject *self, PyObject *Py_UNUSED(ignored)) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; if (dv->dv_dict == NULL) { Py_RETURN_NONE; } @@ -5363,8 +5387,9 @@ dictitems_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)) /*** dict_values ***/ static PyObject * -dictvalues_iter(_PyDictViewObject *dv) +dictvalues_iter(PyObject *self) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; if (dv->dv_dict == NULL) { Py_RETURN_NONE; } @@ -5372,7 +5397,7 @@ dictvalues_iter(_PyDictViewObject *dv) } static PySequenceMethods dictvalues_as_sequence = { - (lenfunc)dictview_len, /* sq_length */ + dictview_len, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ 0, /* sq_item */ @@ -5382,13 +5407,13 @@ static PySequenceMethods dictvalues_as_sequence = { (objobjproc)0, /* sq_contains */ }; -static PyObject* dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)); +static PyObject* dictvalues_reversed(PyObject *dv, PyObject *Py_UNUSED(ignored)); PyDoc_STRVAR(reversed_values_doc, "Return a reverse iterator over the dict values."); static PyMethodDef dictvalues_methods[] = { - {"__reversed__", (PyCFunction)dictvalues_reversed, METH_NOARGS, + {"__reversed__", dictvalues_reversed, METH_NOARGS, reversed_values_doc}, {NULL, NULL} /* sentinel */ }; @@ -5399,12 +5424,12 @@ PyTypeObject PyDictValues_Type = { sizeof(_PyDictViewObject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)dictview_dealloc, /* tp_dealloc */ + dictview_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_as_async */ - (reprfunc)dictview_repr, /* tp_repr */ + dictview_repr, /* tp_repr */ 0, /* tp_as_number */ &dictvalues_as_sequence, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -5416,11 +5441,11 @@ PyTypeObject PyDictValues_Type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ - (traverseproc)dictview_traverse, /* tp_traverse */ + dictview_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - (getiterfunc)dictvalues_iter, /* tp_iter */ + dictvalues_iter, /* tp_iter */ 0, /* tp_iternext */ dictvalues_methods, /* tp_methods */ .tp_getset = dictview_getset, @@ -5433,8 +5458,9 @@ dictvalues_new(PyObject *dict, PyObject *Py_UNUSED(ignored)) } static PyObject * -dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored)) +dictvalues_reversed(PyObject *self, PyObject *Py_UNUSED(ignored)) { + _PyDictViewObject *dv = (_PyDictViewObject *)self; if (dv->dv_dict == NULL) { Py_RETURN_NONE; } From f637b44dd279a7e42d34dc3a00959315b1778072 Mon Sep 17 00:00:00 2001 From: Christopher Chavez Date: Tue, 2 Jan 2024 08:51:32 -0600 Subject: [PATCH 38/71] gh-111178: Avoid calling functions from incompatible pointer types in _tkinter.c (GH-112893) Fix undefined behavior warnings (UBSan -fsanitize=function). --- Modules/_tkinter.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 64e752c305aae1..f6181168a85ae1 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -735,8 +735,9 @@ newPyTclObject(Tcl_Obj *arg) } static void -PyTclObject_dealloc(PyTclObject *self) +PyTclObject_dealloc(PyObject *_self) { + PyTclObject *self = (PyTclObject *)_self; PyObject *tp = (PyObject *) Py_TYPE(self); Tcl_DecrRefCount(self->value); Py_XDECREF(self->string); @@ -749,8 +750,9 @@ PyDoc_STRVAR(PyTclObject_string__doc__, "the string representation of this object, either as str or bytes"); static PyObject * -PyTclObject_string(PyTclObject *self, void *ignored) +PyTclObject_string(PyObject *_self, void *ignored) { + PyTclObject *self = (PyTclObject *)_self; if (!self->string) { self->string = unicodeFromTclObj(self->value); if (!self->string) @@ -760,8 +762,9 @@ PyTclObject_string(PyTclObject *self, void *ignored) } static PyObject * -PyTclObject_str(PyTclObject *self) +PyTclObject_str(PyObject *_self) { + PyTclObject *self = (PyTclObject *)_self; if (self->string) { return Py_NewRef(self->string); } @@ -770,9 +773,10 @@ PyTclObject_str(PyTclObject *self) } static PyObject * -PyTclObject_repr(PyTclObject *self) +PyTclObject_repr(PyObject *_self) { - PyObject *repr, *str = PyTclObject_str(self); + PyTclObject *self = (PyTclObject *)_self; + PyObject *repr, *str = PyTclObject_str(_self); if (str == NULL) return NULL; repr = PyUnicode_FromFormat("<%s object: %R>", @@ -809,23 +813,24 @@ PyTclObject_richcompare(PyObject *self, PyObject *other, int op) PyDoc_STRVAR(get_typename__doc__, "name of the Tcl type"); static PyObject* -get_typename(PyTclObject* obj, void* ignored) +get_typename(PyObject *self, void* ignored) { + PyTclObject *obj = (PyTclObject *)self; return unicodeFromTclString(obj->value->typePtr->name); } static PyGetSetDef PyTclObject_getsetlist[] = { - {"typename", (getter)get_typename, NULL, get_typename__doc__}, - {"string", (getter)PyTclObject_string, NULL, + {"typename", get_typename, NULL, get_typename__doc__}, + {"string", PyTclObject_string, NULL, PyTclObject_string__doc__}, {0}, }; static PyType_Slot PyTclObject_Type_slots[] = { - {Py_tp_dealloc, (destructor)PyTclObject_dealloc}, - {Py_tp_repr, (reprfunc)PyTclObject_repr}, - {Py_tp_str, (reprfunc)PyTclObject_str}, + {Py_tp_dealloc, PyTclObject_dealloc}, + {Py_tp_repr, PyTclObject_repr}, + {Py_tp_str, PyTclObject_str}, {Py_tp_getattro, PyObject_GenericGetAttr}, {Py_tp_richcompare, PyTclObject_richcompare}, {Py_tp_getset, PyTclObject_getsetlist}, @@ -1306,8 +1311,9 @@ Tkapp_ObjectResult(TkappObject *self) hold the Python lock. */ static int -Tkapp_CallProc(Tkapp_CallEvent *e, int flags) +Tkapp_CallProc(Tcl_Event *evPtr, int flags) { + Tkapp_CallEvent *e = (Tkapp_CallEvent *)evPtr; Tcl_Obj *objStore[ARGSZ]; Tcl_Obj **objv; int objc; @@ -1385,7 +1391,7 @@ Tkapp_Call(PyObject *selfptr, PyObject *args) PyErr_NoMemory(); return NULL; } - ev->ev.proc = (Tcl_EventProc*)Tkapp_CallProc; + ev->ev.proc = Tkapp_CallProc; ev->self = self; ev->args = args; ev->res = &res; @@ -1624,8 +1630,9 @@ var_perform(VarEvent *ev) } static int -var_proc(VarEvent* ev, int flags) +var_proc(Tcl_Event *evPtr, int flags) { + VarEvent *ev = (VarEvent *)evPtr; ENTER_PYTHON var_perform(ev); Tcl_MutexLock(&var_mutex); @@ -1663,7 +1670,7 @@ var_invoke(EventFunc func, PyObject *selfptr, PyObject *args, int flags) ev->res = &res; ev->exc = &exc; ev->cond = &cond; - ev->ev.proc = (Tcl_EventProc*)var_proc; + ev->ev.proc = var_proc; Tkapp_ThreadSend(self, (Tcl_Event*)ev, &cond, &var_mutex); Tcl_ConditionFinalize(&cond); if (!res) { @@ -2236,8 +2243,9 @@ typedef struct CommandEvent{ } CommandEvent; static int -Tkapp_CommandProc(CommandEvent *ev, int flags) +Tkapp_CommandProc(Tcl_Event *evPtr, int flags) { + CommandEvent *ev = (CommandEvent *)evPtr; if (ev->create) *ev->status = Tcl_CreateObjCommand( ev->interp, ev->name, PythonCmd, @@ -2290,7 +2298,7 @@ _tkinter_tkapp_createcommand_impl(TkappObject *self, const char *name, PyMem_Free(data); return NULL; } - ev->ev.proc = (Tcl_EventProc*)Tkapp_CommandProc; + ev->ev.proc = Tkapp_CommandProc; ev->interp = self->interp; ev->create = 1; ev->name = name; @@ -2343,7 +2351,7 @@ _tkinter_tkapp_deletecommand_impl(TkappObject *self, const char *name) PyErr_NoMemory(); return NULL; } - ev->ev.proc = (Tcl_EventProc*)Tkapp_CommandProc; + ev->ev.proc = Tkapp_CommandProc; ev->interp = self->interp; ev->create = 0; ev->name = name; From ce7a8eef79c1f81358e00aa84b906540edd91458 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jan 2024 18:19:01 +0200 Subject: [PATCH 39/71] build(deps): bump actions/stale from 8 to 9 (#113611) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 94676f5ee5fffc..07608fe91b4dbe 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -16,7 +16,7 @@ jobs: steps: - name: "Check PRs" - uses: actions/stale@v8 + uses: actions/stale@v9 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity.' From fff1e8a50b4eeea83090f4c11e21b4577e8d09e3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jan 2024 18:20:17 +0200 Subject: [PATCH 40/71] build(deps): bump actions/upload-artifact from 3 to 4 (#113614) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9f67f30ed07d74..2168ec101cf3d9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -395,7 +395,7 @@ jobs: -x test_subprocess \ -x test_signal \ -x test_sysconfig - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: hypothesis-example-db @@ -483,7 +483,7 @@ jobs: output-sarif: true sanitizer: ${{ matrix.sanitizer }} - name: Upload crash - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() && steps.build.outcome == 'success' with: name: ${{ matrix.sanitizer }}-artifacts From 50b093f5c7060c0b44c264808411346cee7becf0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 2 Jan 2024 21:45:36 +0200 Subject: [PATCH 41/71] gh-53502: Fix plistlib.dump() for naive datetime with aware_datetime option (GH-113645) --- Lib/plistlib.py | 4 ++-- Lib/test/test_plistlib.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 6eb70cedd7aec6..0fc1b5cbfa8c49 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -155,7 +155,7 @@ def _date_from_string(s, aware_datetime): def _date_to_string(d, aware_datetime): - if aware_datetime and d.tzinfo is not None: + if aware_datetime: d = d.astimezone(datetime.UTC) return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( d.year, d.month, d.day, @@ -791,7 +791,7 @@ def _write_object(self, value): self._fp.write(struct.pack('>Bd', 0x23, value)) elif isinstance(value, datetime.datetime): - if self._aware_datetime and value.tzinfo is not None: + if self._aware_datetime: dt = value.astimezone(datetime.UTC) offset = dt - datetime.datetime(2001, 1, 1, tzinfo=datetime.UTC) f = offset.total_seconds() diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index 010393a417b946..1d2e14a30c4e13 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -885,7 +885,8 @@ def test_dump_naive_datetime_with_aware_datetime_option(self): for fmt in ALL_FORMATS: s = plistlib.dumps(dt, fmt=fmt, aware_datetime=True) parsed = plistlib.loads(s, aware_datetime=False) - self.assertEqual(parsed, dt) + expected = dt.astimezone(datetime.UTC).replace(tzinfo=None) + self.assertEqual(parsed, expected) class TestBinaryPlistlib(unittest.TestCase): From bab0758ea4a1d4666a973ae2d65f21a09e4478ba Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Tue, 2 Jan 2024 15:29:08 -0600 Subject: [PATCH 42/71] gh-110824 Temporarily skip test_sysconfig.test_library on macOS framework builds. (GH-113298) Co-authored-by: Ned Deily --- Lib/test/test_sysconfig.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index a19c04b1b2cde5..be609a0abd29c8 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -43,6 +43,7 @@ def setUp(self): self.name = os.name self.platform = sys.platform self.version = sys.version + self._framework = sys._framework self.sep = os.sep self.join = os.path.join self.isabs = os.path.isabs @@ -66,6 +67,7 @@ def tearDown(self): os.name = self.name sys.platform = self.platform sys.version = self.version + sys._framework = self._framework os.sep = self.sep os.path.join = self.join os.path.isabs = self.isabs @@ -139,7 +141,7 @@ def test_get_preferred_schemes(self): # Mac, framework build. os.name = 'posix' sys.platform = 'darwin' - sys._framework = True + sys._framework = "MyPython" self.assertIsInstance(schemes, dict) self.assertEqual(set(schemes), expected_schemes) @@ -413,7 +415,10 @@ def test_library(self): else: self.assertTrue(library.startswith(f'libpython{major}.{minor}')) self.assertTrue(library.endswith('.a')) - self.assertTrue(ldlibrary.startswith(f'libpython{major}.{minor}')) + if sys.platform == 'darwin' and sys._framework: + self.skipTest('gh-110824: skip LDLIBRARY test for framework build') + else: + self.assertTrue(ldlibrary.startswith(f'libpython{major}.{minor}')) @unittest.skipUnless(sys.platform == "darwin", "test only relevant on MacOSX") @requires_subprocess() From b0fb074d5983f07517cec76a37268f13c986d314 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 2 Jan 2024 14:09:57 -0800 Subject: [PATCH 43/71] GH-113657: Add back missing _SET_IP uops in tier two (GH-113662) --- .../2024-01-02-11-14-29.gh-issue-113657.CQo9vF.rst | 2 ++ Python/optimizer_analysis.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-02-11-14-29.gh-issue-113657.CQo9vF.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-02-11-14-29.gh-issue-113657.CQo9vF.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-02-11-14-29.gh-issue-113657.CQo9vF.rst new file mode 100644 index 00000000000000..b520b5c2529425 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-02-11-14-29.gh-issue-113657.CQo9vF.rst @@ -0,0 +1,2 @@ +Fix an issue that caused important instruction pointer updates to be +optimized out of tier two traces. diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b471d70a10d7d..4eb2d9711f5e56 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -4,6 +4,7 @@ #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_uop_metadata.h" #include "pycore_uops.h" #include "pycore_long.h" #include "cpython/optimizer.h" @@ -35,13 +36,13 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) break; } else { - if (OPCODE_HAS_ESCAPES(opcode)) { + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { maybe_invalid = true; if (last_set_ip >= 0) { buffer[last_set_ip].opcode = _SET_IP; } } - if (OPCODE_HAS_ERROR(opcode) || opcode == _PUSH_FRAME) { + if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { if (last_set_ip >= 0) { buffer[last_set_ip].opcode = _SET_IP; } From 5dc79e3d7f26a6a871a89ce3efc9f1bcee7bb447 Mon Sep 17 00:00:00 2001 From: Itamar Oren Date: Tue, 2 Jan 2024 16:30:53 -0800 Subject: [PATCH 44/71] gh-113628: Fix test_site test with long stdlib paths (#113640) --- Lib/test/test_site.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index 33d0975bda8eaa..9f199d9069d207 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -641,10 +641,24 @@ def _calc_sys_path_for_underpth_nosite(self, sys_prefix, lines): sys_path.append(abs_path) return sys_path + def _get_pth_lines(self, libpath: str, *, import_site: bool): + pth_lines = ['fake-path-name'] + # include 200 lines of `libpath` in _pth lines (or fewer + # if the `libpath` is long enough to get close to 32KB + # see https://github.com/python/cpython/issues/113628) + encoded_libpath_length = len(libpath.encode("utf-8")) + repetitions = min(200, 30000 // encoded_libpath_length) + if repetitions <= 2: + self.skipTest( + f"Python stdlib path is too long ({encoded_libpath_length:,} bytes)") + pth_lines.extend(libpath for _ in range(repetitions)) + pth_lines.extend(['', '# comment']) + if import_site: + pth_lines.append('import site') + return pth_lines + @support.requires_subprocess() def test_underpth_basic(self): - libpath = test.support.STDLIB_DIR - exe_prefix = os.path.dirname(sys.executable) pth_lines = ['#.', '# ..', *sys.path, '.', '..'] exe_file = self._create_underpth_exe(pth_lines) sys_path = self._calc_sys_path_for_underpth_nosite( @@ -666,12 +680,7 @@ def test_underpth_basic(self): def test_underpth_nosite_file(self): libpath = test.support.STDLIB_DIR exe_prefix = os.path.dirname(sys.executable) - pth_lines = [ - 'fake-path-name', - *[libpath for _ in range(200)], - '', - '# comment', - ] + pth_lines = self._get_pth_lines(libpath, import_site=False) exe_file = self._create_underpth_exe(pth_lines) sys_path = self._calc_sys_path_for_underpth_nosite( os.path.dirname(exe_file), @@ -695,13 +704,8 @@ def test_underpth_nosite_file(self): def test_underpth_file(self): libpath = test.support.STDLIB_DIR exe_prefix = os.path.dirname(sys.executable) - exe_file = self._create_underpth_exe([ - 'fake-path-name', - *[libpath for _ in range(200)], - '', - '# comment', - 'import site' - ]) + exe_file = self._create_underpth_exe( + self._get_pth_lines(libpath, import_site=True)) sys_prefix = os.path.dirname(exe_file) env = os.environ.copy() env['PYTHONPATH'] = 'from-env' @@ -720,13 +724,8 @@ def test_underpth_file(self): def test_underpth_dll_file(self): libpath = test.support.STDLIB_DIR exe_prefix = os.path.dirname(sys.executable) - exe_file = self._create_underpth_exe([ - 'fake-path-name', - *[libpath for _ in range(200)], - '', - '# comment', - 'import site' - ], exe_pth=False) + exe_file = self._create_underpth_exe( + self._get_pth_lines(libpath, import_site=True), exe_pth=False) sys_prefix = os.path.dirname(exe_file) env = os.environ.copy() env['PYTHONPATH'] = 'from-env' From dc8df6e84024b79aa96e85a64f354bf8e827bcba Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 3 Jan 2024 11:01:13 +0000 Subject: [PATCH 45/71] GH-113595: Don't enter invalid executor (GH-113596) --- Python/bytecodes.c | 28 +++++++++++++++++++--------- Python/generated_cases.c.h | 30 ++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 29e1dab184ef4e..2eeeac53e1dd7e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2364,17 +2364,27 @@ dummy_func( PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; - Py_INCREF(executor); - if (executor->execute == _PyUOpExecute) { - current_executor = (_PyUOpExecutorObject *)executor; - GOTO_TIER_TWO(); + if (executor->vm_data.valid) { + Py_INCREF(executor); + if (executor->execute == _PyUOpExecute) { + current_executor = (_PyUOpExecutorObject *)executor; + GOTO_TIER_TWO(); + } + next_instr = executor->execute(executor, frame, stack_pointer); + frame = tstate->current_frame; + if (next_instr == NULL) { + goto resume_with_error; + } + stack_pointer = _PyFrame_GetStackPointer(frame); } - next_instr = executor->execute(executor, frame, stack_pointer); - frame = tstate->current_frame; - if (next_instr == NULL) { - goto resume_with_error; + else { + opcode = this_instr->op.code = executor->vm_data.opcode; + this_instr->op.arg = executor->vm_data.oparg; + oparg = (oparg & (~255)) | executor->vm_data.oparg; + code->co_executors->executors[oparg&255] = NULL; + Py_DECREF(executor); + DISPATCH_GOTO(); } - stack_pointer = _PyFrame_GetStackPointer(frame); } replaced op(_POP_JUMP_IF_FALSE, (cond -- )) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ce31967b7912d7..99fd169ca4fec3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2371,24 +2371,34 @@ } TARGET(ENTER_EXECUTOR) { - frame->instr_ptr = next_instr; + _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(ENTER_EXECUTOR); TIER_ONE_ONLY CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; - Py_INCREF(executor); - if (executor->execute == _PyUOpExecute) { - current_executor = (_PyUOpExecutorObject *)executor; - GOTO_TIER_TWO(); + if (executor->vm_data.valid) { + Py_INCREF(executor); + if (executor->execute == _PyUOpExecute) { + current_executor = (_PyUOpExecutorObject *)executor; + GOTO_TIER_TWO(); + } + next_instr = executor->execute(executor, frame, stack_pointer); + frame = tstate->current_frame; + if (next_instr == NULL) { + goto resume_with_error; + } + stack_pointer = _PyFrame_GetStackPointer(frame); } - next_instr = executor->execute(executor, frame, stack_pointer); - frame = tstate->current_frame; - if (next_instr == NULL) { - goto resume_with_error; + else { + opcode = this_instr->op.code = executor->vm_data.opcode; + this_instr->op.arg = executor->vm_data.oparg; + oparg = (oparg & (~255)) | executor->vm_data.oparg; + code->co_executors->executors[oparg&255] = NULL; + Py_DECREF(executor); + DISPATCH_GOTO(); } - stack_pointer = _PyFrame_GetStackPointer(frame); DISPATCH(); } From ea978c645edd7bc29d811c61477dff766d7318b6 Mon Sep 17 00:00:00 2001 From: Ege Akman Date: Wed, 3 Jan 2024 14:22:38 +0300 Subject: [PATCH 46/71] gh-113637: Let c_annotations.py to handle the spacing of Limited/Unstable API & Stable ABI translation strings (#113638) --- Doc/tools/extensions/c_annotations.py | 13 +++++++------ Doc/tools/templates/dummy.html | 12 ++++++------ Misc/ACKS | 1 + 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Doc/tools/extensions/c_annotations.py b/Doc/tools/extensions/c_annotations.py index 42c2f10e0be260..ba37634545c2cf 100644 --- a/Doc/tools/extensions/c_annotations.py +++ b/Doc/tools/extensions/c_annotations.py @@ -126,7 +126,8 @@ def add_annotations(self, app, doctree): f"Object type mismatch in limited API annotation " f"for {name}: {record['role']!r} != {objtype!r}") stable_added = record['added'] - message = sphinx_gettext(' Part of the ') + message = sphinx_gettext('Part of the') + message = message.center(len(message) + 2) emph_node = nodes.emphasis(message, message, classes=['stableabi']) ref_node = addnodes.pending_xref( @@ -139,27 +140,27 @@ def add_annotations(self, app, doctree): ref_node += nodes.Text(sphinx_gettext('Stable ABI')) emph_node += ref_node if struct_abi_kind == 'opaque': - emph_node += nodes.Text(sphinx_gettext(' (as an opaque struct)')) + emph_node += nodes.Text(' ' + sphinx_gettext('(as an opaque struct)')) elif struct_abi_kind == 'full-abi': - emph_node += nodes.Text(sphinx_gettext(' (including all members)')) + emph_node += nodes.Text(' ' + sphinx_gettext('(including all members)')) if record['ifdef_note']: emph_node += nodes.Text(' ' + record['ifdef_note']) if stable_added == '3.2': # Stable ABI was introduced in 3.2. pass else: - emph_node += nodes.Text(sphinx_gettext(' since version %s') % stable_added) + emph_node += nodes.Text(' ' + sphinx_gettext('since version %s') % stable_added) emph_node += nodes.Text('.') if struct_abi_kind == 'members': emph_node += nodes.Text( - sphinx_gettext(' (Only some members are part of the stable ABI.)')) + ' ' + sphinx_gettext('(Only some members are part of the stable ABI.)')) node.insert(0, emph_node) # Unstable API annotation. if name.startswith('PyUnstable'): warn_node = nodes.admonition( classes=['unstable-c-api', 'warning']) - message = sphinx_gettext('This is ') + message = sphinx_gettext('This is') + ' ' emph_node = nodes.emphasis(message, message) ref_node = addnodes.pending_xref( 'Unstable API', refdomain="std", diff --git a/Doc/tools/templates/dummy.html b/Doc/tools/templates/dummy.html index 3a0acab8836b11..49c2a71a5e40cf 100644 --- a/Doc/tools/templates/dummy.html +++ b/Doc/tools/templates/dummy.html @@ -9,14 +9,14 @@ In extensions/c_annotations.py: -{% trans %} Part of the {% endtrans %} +{% trans %}Part of the{% endtrans %} {% trans %}Limited API{% endtrans %} {% trans %}Stable ABI{% endtrans %} -{% trans %} (as an opaque struct){% endtrans %} -{% trans %} (including all members){% endtrans %} -{% trans %} since version %s{% endtrans %} -{% trans %} (Only some members are part of the stable ABI.){% endtrans %} -{% trans %}This is {% endtrans %} +{% trans %}(as an opaque struct){% endtrans %} +{% trans %}(including all members){% endtrans %} +{% trans %}since version %s{% endtrans %} +{% trans %}(Only some members are part of the stable ABI.){% endtrans %} +{% trans %}This is{% endtrans %} {% trans %}Unstable API{% endtrans %} {% trans %}. It may change without warning in minor releases.{% endtrans %} {% trans %}Return value: Always NULL.{% endtrans %} diff --git a/Misc/ACKS b/Misc/ACKS index 6b98be32905391..ab1255be2d58fa 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -31,6 +31,7 @@ Farhan Ahmad Matthew Ahrens Nir Aides Akira +Ege Akman Yaniv Aknin Jyrki Alakuijala Tatiana Al-Chueyr From 4de468cce106221968d7ac08ddd94571b903c194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodrigo=20Gir=C3=A3o=20Serr=C3=A3o?= <5621605+rodrigogiraoserrao@users.noreply.github.com> Date: Wed, 3 Jan 2024 12:50:44 +0000 Subject: [PATCH 47/71] `functools.partial` docs: Use the more common spelling for "referenceable" (#113675) --- Doc/library/functools.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 69ec1eb3ecd89d..6749a5137b446f 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -742,7 +742,7 @@ have three read-only attributes: called. :class:`partial` objects are like :class:`function` objects in that they are -callable, weak referencable, and can have attributes. There are some important +callable, weak referenceable, and can have attributes. There are some important differences. For instance, the :attr:`~definition.__name__` and :attr:`__doc__` attributes are not created automatically. Also, :class:`partial` objects defined in classes behave like static methods and do not transform into bound methods From fab7ad62ceca1f88767bca4e1f06f8e4b1faef2f Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Wed, 3 Jan 2024 15:04:26 +0200 Subject: [PATCH 48/71] gh-101100: Fix Sphinx warnings for removed dead batteries (#113669) Co-authored-by: Alex Waygood --- Doc/whatsnew/2.4.rst | 2 +- Doc/whatsnew/2.6.rst | 2 +- Doc/whatsnew/3.10.rst | 6 +++--- Doc/whatsnew/3.11.rst | 12 ++++++------ Doc/whatsnew/3.2.rst | 12 ++++++------ Doc/whatsnew/3.3.rst | 2 +- Doc/whatsnew/3.4.rst | 6 +++--- Doc/whatsnew/3.5.rst | 22 +++++++++++----------- Doc/whatsnew/3.6.rst | 14 +++++++------- Doc/whatsnew/3.7.rst | 4 ++-- Doc/whatsnew/3.8.rst | 2 +- Doc/whatsnew/3.9.rst | 2 +- Misc/NEWS.d/3.10.0a1.rst | 2 +- Misc/NEWS.d/3.11.0a1.rst | 4 ++-- Misc/NEWS.d/3.11.0a7.rst | 4 ++-- Misc/NEWS.d/3.12.0a1.rst | 2 +- Misc/NEWS.d/3.12.0a2.rst | 2 +- Misc/NEWS.d/3.8.0a1.rst | 4 ++-- Misc/NEWS.d/3.8.0a4.rst | 2 +- 19 files changed, 53 insertions(+), 53 deletions(-) diff --git a/Doc/whatsnew/2.4.rst b/Doc/whatsnew/2.4.rst index 6df59dd245ff55..e9a59f4a62551a 100644 --- a/Doc/whatsnew/2.4.rst +++ b/Doc/whatsnew/2.4.rst @@ -995,7 +995,7 @@ fixes. Here's a partial list of the most notable changes, sorted alphabetically by module name. Consult the :file:`Misc/NEWS` file in the source tree for a more complete list of changes, or look through the CVS logs for all the details. -* The :mod:`asyncore` module's :func:`loop` function now has a *count* parameter +* The :mod:`!asyncore` module's :func:`!loop` function now has a *count* parameter that lets you perform a limited number of passes through the polling loop. The default is still to loop forever. diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index e8c1709c42abac..d947f61b50cfe0 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -1789,7 +1789,7 @@ changes, sorted alphabetically by module name. Consult the :file:`Misc/NEWS` file in the source tree for a more complete list of changes, or look through the Subversion logs for all the details. -* The :mod:`asyncore` and :mod:`asynchat` modules are +* The :mod:`!asyncore` and :mod:`!asynchat` modules are being actively maintained again, and a number of patches and bugfixes were applied. (Maintained by Josiah Carlson; see :issue:`1736190` for one patch.) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 2da90b7ed55744..a8a27bfd3dc1bc 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -1278,7 +1278,7 @@ Add negative indexing support to :attr:`PurePath.parents (Contributed by Yaroslav Pankovych in :issue:`21041`.) Add :meth:`Path.hardlink_to ` method that -supersedes :meth:`~pathlib.Path.link_to`. The new method has the same argument +supersedes :meth:`!link_to`. The new method has the same argument order as :meth:`~pathlib.Path.symlink_to`. (Contributed by Barney Gale in :issue:`39950`.) @@ -1740,7 +1740,7 @@ Deprecated (Contributed by Jelle Zijlstra in :gh:`87889`.) -* :meth:`pathlib.Path.link_to` is deprecated and slated for removal in +* :meth:`!pathlib.Path.link_to` is deprecated and slated for removal in Python 3.12. Use :meth:`pathlib.Path.hardlink_to` instead. (Contributed by Barney Gale in :issue:`39950`.) @@ -1771,7 +1771,7 @@ Deprecated * NPN features like :meth:`ssl.SSLSocket.selected_npn_protocol` and :meth:`ssl.SSLContext.set_npn_protocols` are replaced by ALPN. -* The threading debug (:envvar:`PYTHONTHREADDEBUG` environment variable) is +* The threading debug (:envvar:`!PYTHONTHREADDEBUG` environment variable) is deprecated in Python 3.10 and will be removed in Python 3.12. This feature requires a :ref:`debug build of Python `. (Contributed by Victor Stinner in :issue:`44584`.) diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index ce4c98eba71443..cb646a54df3607 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -1747,7 +1747,7 @@ Modules (Contributed by Brett Cannon in :issue:`47061` and Victor Stinner in :gh:`68966`.) -* The :mod:`asynchat`, :mod:`asyncore` and :mod:`smtpd` modules have been +* The :mod:`!asynchat`, :mod:`!asyncore` and :mod:`!smtpd` modules have been deprecated since at least Python 3.6. Their documentation and deprecation warnings have now been updated to note they will be removed in Python 3.12. (Contributed by Hugo van Kemenade in :issue:`47022`.) @@ -1877,8 +1877,8 @@ and will be removed in Python 3.12. C APIs pending removal are :ref:`listed separately `. -* The :mod:`asynchat` module -* The :mod:`asyncore` module +* The :mod:`!asynchat` module +* The :mod:`!asyncore` module * The :ref:`entire distutils package ` * The :mod:`!imp` module * The :class:`typing.io ` namespace @@ -1902,10 +1902,10 @@ C APIs pending removal are * :func:`!importlib.util.set_package_wrapper` * :class:`!pkgutil.ImpImporter` * :class:`!pkgutil.ImpLoader` -* :meth:`pathlib.Path.link_to` +* :meth:`!pathlib.Path.link_to` * :func:`!sqlite3.enable_shared_cache` * :func:`!sqlite3.OptimizedUnicode` -* :envvar:`PYTHONTHREADDEBUG` environment variable +* :envvar:`!PYTHONTHREADDEBUG` environment variable * The following deprecated aliases in :mod:`unittest`: ============================ =============================== =============== @@ -2007,7 +2007,7 @@ Removed C APIs are :ref:`listed separately `. because it was not used and added by mistake in previous versions. (Contributed by Nikita Sobolev in :issue:`46483`.) -* Removed the :class:`!MailmanProxy` class in the :mod:`smtpd` module, +* Removed the :class:`!MailmanProxy` class in the :mod:`!smtpd` module, as it is unusable without the external :mod:`!mailman` package. (Contributed by Donghee Na in :issue:`35800`.) diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index aad196478dd38b..9834bc03dc4b74 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -1858,12 +1858,12 @@ structure. asyncore -------- -:class:`asyncore.dispatcher` now provides a -:meth:`~asyncore.dispatcher.handle_accepted()` method +:class:`!asyncore.dispatcher` now provides a +:meth:`!handle_accepted()` method returning a ``(sock, addr)`` pair which is called when a connection has actually been established with a new remote endpoint. This is supposed to be used as a -replacement for old :meth:`~asyncore.dispatcher.handle_accept()` and avoids -the user to call :meth:`~asyncore.dispatcher.accept()` directly. +replacement for old :meth:`!handle_accept()` and avoids +the user to call :meth:`!accept()` directly. (Contributed by Giampaolo Rodolà; :issue:`6706`.) @@ -2737,8 +2737,8 @@ require changes to your code: thread-state aware APIs (such as :c:func:`PyEval_SaveThread` and :c:func:`PyEval_RestoreThread`) should be used instead. -* Due to security risks, :func:`asyncore.handle_accept` has been deprecated, and - a new function, :func:`asyncore.handle_accepted`, was added to replace it. +* Due to security risks, :func:`!asyncore.handle_accept` has been deprecated, and + a new function, :func:`!asyncore.handle_accepted`, was added to replace it. (Contributed by Giampaolo Rodola in :issue:`6706`.) diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index 79e2dd9dcee361..760324ae66a3af 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -1845,7 +1845,7 @@ signal smtpd ----- -The :mod:`smtpd` module now supports :rfc:`5321` (extended SMTP) and :rfc:`1870` +The :mod:`!smtpd` module now supports :rfc:`5321` (extended SMTP) and :rfc:`1870` (size extension). Per the standard, these extensions are enabled if and only if the client initiates the session with an ``EHLO`` command. diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index b26e3d36c4bfbc..e07eda985d9bad 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -1369,9 +1369,9 @@ error. (Contributed by Atsuo Ishimoto and Hynek Schlawack in smtpd ----- -The :class:`~smtpd.SMTPServer` and :class:`~smtpd.SMTPChannel` classes now +The :class:`!SMTPServer` and :class:`!SMTPChannel` classes now accept a *map* keyword argument which, if specified, is passed in to -:class:`asynchat.async_chat` as its *map* argument. This allows an application +:class:`!asynchat.async_chat` as its *map* argument. This allows an application to avoid affecting the global socket map. (Contributed by Vinay Sajip in :issue:`11959`.) @@ -2370,7 +2370,7 @@ Changes in the Python API :issue:`18011`.) Note: this change was also inadvertently applied in Python 3.3.3. -* The :attr:`~cgi.FieldStorage.file` attribute is now automatically closed when +* The :attr:`!file` attribute is now automatically closed when the creating :class:`!cgi.FieldStorage` instance is garbage collected. If you were pulling the file object out separately from the :class:`!cgi.FieldStorage` instance and not keeping the instance alive, then you should either store the diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index bbf2dc59a9f60a..1c7a9270af0aab 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -878,7 +878,7 @@ size of decompressed data. (Contributed by Nikolaus Rath in :issue:`15955`.) cgi --- -The :class:`~cgi.FieldStorage` class now supports the :term:`context manager` +The :class:`!FieldStorage` class now supports the :term:`context manager` protocol. (Contributed by Berker Peksag in :issue:`20289`.) @@ -1663,34 +1663,34 @@ during debugging, instead of integer "magic numbers". smtpd ----- -Both the :class:`~smtpd.SMTPServer` and :class:`~smtpd.SMTPChannel` classes now +Both the :class:`!SMTPServer` and :class:`!SMTPChannel` classes now accept a *decode_data* keyword argument to determine if the ``DATA`` portion of the SMTP transaction is decoded using the ``"utf-8"`` codec or is instead provided to the -:meth:`SMTPServer.process_message() ` +:meth:`!SMTPServer.process_message()` method as a byte string. The default is ``True`` for backward compatibility reasons, but will change to ``False`` in Python 3.6. If *decode_data* is set to ``False``, the ``process_message`` method must be prepared to accept keyword arguments. (Contributed by Maciej Szulik in :issue:`19662`.) -The :class:`~smtpd.SMTPServer` class now advertises the ``8BITMIME`` extension +The :class:`!SMTPServer` class now advertises the ``8BITMIME`` extension (:rfc:`6152`) if *decode_data* has been set ``True``. If the client specifies ``BODY=8BITMIME`` on the ``MAIL`` command, it is passed to -:meth:`SMTPServer.process_message() ` +:meth:`!SMTPServer.process_message()` via the *mail_options* keyword. (Contributed by Milan Oberkirch and R. David Murray in :issue:`21795`.) -The :class:`~smtpd.SMTPServer` class now also supports the ``SMTPUTF8`` +The :class:`!SMTPServer` class now also supports the ``SMTPUTF8`` extension (:rfc:`6531`: Internationalized Email). If the client specified ``SMTPUTF8 BODY=8BITMIME`` on the ``MAIL`` command, they are passed to -:meth:`SMTPServer.process_message() ` +:meth:`!SMTPServer.process_message()` via the *mail_options* keyword. It is the responsibility of the ``process_message`` method to correctly handle the ``SMTPUTF8`` data. (Contributed by Milan Oberkirch in :issue:`21725`.) It is now possible to provide, directly or via name resolution, IPv6 -addresses in the :class:`~smtpd.SMTPServer` constructor, and have it +addresses in the :class:`!SMTPServer` constructor, and have it successfully connect. (Contributed by Milan Oberkirch in :issue:`14758`.) @@ -1714,7 +1714,7 @@ support :rfc:`6531` (SMTPUTF8). sndhdr ------ -The :func:`~sndhdr.what` and :func:`~sndhdr.whathdr` functions now return +The :func:`!what` and :func:`!whathdr` functions now return a :func:`~collections.namedtuple`. (Contributed by Claudiu Popa in :issue:`18615`.) @@ -2296,9 +2296,9 @@ slated for removal in Python 3.6. The :func:`asyncio.async` function is deprecated in favor of :func:`~asyncio.ensure_future`. -The :mod:`smtpd` module has in the past always decoded the DATA portion of +The :mod:`!smtpd` module has in the past always decoded the DATA portion of email messages using the ``utf-8`` codec. This can now be controlled by the -new *decode_data* keyword to :class:`~smtpd.SMTPServer`. The default value is +new *decode_data* keyword to :class:`!SMTPServer`. The default value is ``True``, but this default is deprecated. Specify the *decode_data* keyword with an appropriate value to avoid the deprecation warning. diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 5a3cea0ec87cb2..11e1d73232a96d 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -1961,14 +1961,14 @@ Deprecated Python modules, functions and methods asynchat ~~~~~~~~ -The :mod:`asynchat` has been deprecated in favor of :mod:`asyncio`. +The :mod:`!asynchat` has been deprecated in favor of :mod:`asyncio`. (Contributed by Mariatta in :issue:`25002`.) asyncore ~~~~~~~~ -The :mod:`asyncore` has been deprecated in favor of :mod:`asyncio`. +The :mod:`!asyncore` has been deprecated in favor of :mod:`asyncio`. (Contributed by Mariatta in :issue:`25002`.) @@ -2189,7 +2189,7 @@ Changes in the Python API :mod:`calendar`, :mod:`!cgi`, :mod:`csv`, :mod:`~xml.etree.ElementTree`, :mod:`enum`, :mod:`fileinput`, :mod:`ftplib`, :mod:`logging`, :mod:`mailbox`, - :mod:`mimetypes`, :mod:`optparse`, :mod:`plistlib`, :mod:`smtpd`, + :mod:`mimetypes`, :mod:`optparse`, :mod:`plistlib`, :mod:`!smtpd`, :mod:`subprocess`, :mod:`tarfile`, :mod:`threading` and :mod:`wave`. This means they will export new symbols when ``import *`` is used. @@ -2219,11 +2219,11 @@ Changes in the Python API an error (e.g. ``EBADF``) was reported by the underlying system call. (Contributed by Martin Panter in :issue:`26685`.) -* The *decode_data* argument for the :class:`smtpd.SMTPChannel` and - :class:`smtpd.SMTPServer` constructors is now ``False`` by default. +* The *decode_data* argument for the :class:`!smtpd.SMTPChannel` and + :class:`!smtpd.SMTPServer` constructors is now ``False`` by default. This means that the argument passed to - :meth:`~smtpd.SMTPServer.process_message` is now a bytes object by - default, and ``process_message()`` will be passed keyword arguments. + :meth:`!process_message` is now a bytes object by + default, and :meth:`!process_message` will be passed keyword arguments. Code that has already been updated in accordance with the deprecation warning generated by 3.5 will not be affected. diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 775a45a1b3ff06..402b15a277e53d 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -2304,9 +2304,9 @@ Changes in the Python API * The :attr:`struct.Struct.format` type is now :class:`str` instead of :class:`bytes`. (Contributed by Victor Stinner in :issue:`21071`.) -* :func:`~cgi.parse_multipart` now accepts the *encoding* and *errors* +* :func:`!cgi.parse_multipart` now accepts the *encoding* and *errors* arguments and returns the same results as - :class:`~FieldStorage`: for non-file fields, the value associated to a key + :class:`!FieldStorage`: for non-file fields, the value associated to a key is a list of strings, not bytes. (Contributed by Pierre Quentel in :issue:`29979`.) diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index e4dcb9bf872e28..d373fa163ff737 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -1086,7 +1086,7 @@ pathlib contain characters unrepresentable at the OS level. (Contributed by Serhiy Storchaka in :issue:`33721`.) -Added :meth:`pathlib.Path.link_to()` which creates a hard link pointing +Added :meth:`!pathlib.Path.link_to()` which creates a hard link pointing to a path. (Contributed by Joannah Nanjekye in :issue:`26978`) Note that ``link_to`` was deprecated in 3.10 and removed in 3.12 in diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 0c85fe15915518..f7ad4372325ccb 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -931,7 +931,7 @@ Deprecated * Passing ``None`` as the first argument to the :func:`shlex.split` function has been deprecated. (Contributed by Zackery Spytz in :issue:`33262`.) -* :func:`smtpd.MailmanProxy` is now deprecated as it is unusable without +* :func:`!smtpd.MailmanProxy` is now deprecated as it is unusable without an external module, ``mailman``. (Contributed by Samuel Colvin in :issue:`35800`.) * The :mod:`!lib2to3` module now emits a :exc:`PendingDeprecationWarning`. diff --git a/Misc/NEWS.d/3.10.0a1.rst b/Misc/NEWS.d/3.10.0a1.rst index 731eed3447d2bc..3186de75efd9c5 100644 --- a/Misc/NEWS.d/3.10.0a1.rst +++ b/Misc/NEWS.d/3.10.0a1.rst @@ -2527,7 +2527,7 @@ in Python 3.4 and removed in Python 3.5. .. nonce: BE7zbu .. section: Library -Fix `cgi.parse_multipart` without content_length. Patch by Roger Duran +Fix ``cgi.parse_multipart`` without content_length. Patch by Roger Duran .. diff --git a/Misc/NEWS.d/3.11.0a1.rst b/Misc/NEWS.d/3.11.0a1.rst index ba7fb515305ff5..63abcbd5a6499e 100644 --- a/Misc/NEWS.d/3.11.0a1.rst +++ b/Misc/NEWS.d/3.11.0a1.rst @@ -819,7 +819,7 @@ always available when needed. Patch by Mark Shannon. .. nonce: qKnSqV .. section: Core and Builtins -The threading debug (:envvar:`PYTHONTHREADDEBUG` environment variable) is +The threading debug (:envvar:`!PYTHONTHREADDEBUG` environment variable) is deprecated in Python 3.10 and will be removed in Python 3.12. This feature requires a debug build of Python. Patch by Victor Stinner. @@ -2808,7 +2808,7 @@ behaves differently than the similar implementation in :mod:`sysconfig`. .. nonce: 3hmkWw .. section: Library -:class:`smtpd.MailmanProxy` is now removed as it is unusable without an +:class:`!smtpd.MailmanProxy` is now removed as it is unusable without an external module, ``mailman``. Patch by Donghee Na. .. diff --git a/Misc/NEWS.d/3.11.0a7.rst b/Misc/NEWS.d/3.11.0a7.rst index 79557d5c436593..76699632db223a 100644 --- a/Misc/NEWS.d/3.11.0a7.rst +++ b/Misc/NEWS.d/3.11.0a7.rst @@ -717,7 +717,7 @@ Fix :class:`asyncio.Semaphore` re-aquiring FIFO order. .. nonce: uaEDcI .. section: Library -The :mod:`asynchat`, :mod:`asyncore` and :mod:`smtpd` modules have been +The :mod:`!asynchat`, :mod:`!asyncore` and :mod:`!smtpd` modules have been deprecated since at least Python 3.6. Their documentation and deprecation warnings and have now been updated to note they will removed in Python 3.12 (:pep:`594`). @@ -1324,7 +1324,7 @@ extensions. .. section: Tests A test case for :func:`os.sendfile` is converted from deprecated -:mod:`asyncore` (see :pep:`594`) to :mod:`asyncio`. Patch by Oleg Iarygin. +:mod:`!asyncore` (see :pep:`594`) to :mod:`asyncio`. Patch by Oleg Iarygin. .. diff --git a/Misc/NEWS.d/3.12.0a1.rst b/Misc/NEWS.d/3.12.0a1.rst index 29d04fa0e175bf..81ef69093005e8 100644 --- a/Misc/NEWS.d/3.12.0a1.rst +++ b/Misc/NEWS.d/3.12.0a1.rst @@ -3617,7 +3617,7 @@ allow access to handlers by name. .. nonce: uw6x5z .. section: Library -The :mod:`smtpd` module was removed per the schedule in :pep:`594`. +The :mod:`!smtpd` module was removed per the schedule in :pep:`594`. .. diff --git a/Misc/NEWS.d/3.12.0a2.rst b/Misc/NEWS.d/3.12.0a2.rst index 1a04ed473f329d..dbc743abe8a767 100644 --- a/Misc/NEWS.d/3.12.0a2.rst +++ b/Misc/NEWS.d/3.12.0a2.rst @@ -695,7 +695,7 @@ Make sure ``patch.dict()`` can be applied on async functions. .. nonce: jUpzF3 .. section: Library -Remove modules :mod:`asyncore` and :mod:`asynchat`, which were deprecated by +Remove modules :mod:`!asyncore` and :mod:`!asynchat`, which were deprecated by :pep:`594`. .. diff --git a/Misc/NEWS.d/3.8.0a1.rst b/Misc/NEWS.d/3.8.0a1.rst index 99f408661d9f69..b56cda86f11faa 100644 --- a/Misc/NEWS.d/3.8.0a1.rst +++ b/Misc/NEWS.d/3.8.0a1.rst @@ -2006,8 +2006,8 @@ Improved support of custom data descriptors in :func:`help` and .. nonce: V4kNN3 .. section: Library -The `crypt` module now internally uses the `crypt_r()` library function -instead of `crypt()` when available. +The ``crypt`` module now internally uses the ``crypt_r()`` library function +instead of ``crypt()`` when available. .. diff --git a/Misc/NEWS.d/3.8.0a4.rst b/Misc/NEWS.d/3.8.0a4.rst index 7e8bfa5c4364a9..3097245b74a511 100644 --- a/Misc/NEWS.d/3.8.0a4.rst +++ b/Misc/NEWS.d/3.8.0a4.rst @@ -255,7 +255,7 @@ all tags in a namespace. Patch by Stefan Behnel. .. nonce: Lpm-SI .. section: Library -`pathlib.path.link_to()` is now implemented. It creates a hard link pointing +``pathlib.path.link_to()`` is now implemented. It creates a hard link pointing to a path. .. From 0c3455a9693cfabcd991c4c33db7cccb1387de58 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 3 Jan 2024 22:25:27 +0900 Subject: [PATCH 49/71] gh-111926: Set up basic sementics of weakref API for freethreading (gh-113621) --------- Co-authored-by: Sam Gross --- Include/internal/pycore_weakref.h | 56 ++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h index eacbe14c903289..dea267b49039e7 100644 --- a/Include/internal/pycore_weakref.h +++ b/Include/internal/pycore_weakref.h @@ -9,48 +9,66 @@ extern "C" { #endif #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() +#include "pycore_object.h" // _Py_REF_IS_MERGED() -static inline PyObject* _PyWeakref_GET_REF(PyObject *ref_obj) { +static inline int _is_dead(PyObject *obj) +{ + // Explanation for the Py_REFCNT() check: when a weakref's target is part + // of a long chain of deallocations which triggers the trashcan mechanism, + // clearing the weakrefs can be delayed long after the target's refcount + // has dropped to zero. In the meantime, code accessing the weakref will + // be able to "see" the target object even though it is supposed to be + // unreachable. See issue gh-60806. +#if defined(Py_GIL_DISABLED) + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&obj->ob_ref_shared); + return shared == _Py_REF_SHARED(0, _Py_REF_MERGED); +#else + return (Py_REFCNT(obj) == 0); +#endif +} + +static inline PyObject* _PyWeakref_GET_REF(PyObject *ref_obj) +{ assert(PyWeakref_Check(ref_obj)); + PyObject *ret = NULL; + Py_BEGIN_CRITICAL_SECTION(ref_obj); PyWeakReference *ref = _Py_CAST(PyWeakReference*, ref_obj); PyObject *obj = ref->wr_object; if (obj == Py_None) { // clear_weakref() was called - return NULL; + goto end; } - // Explanation for the Py_REFCNT() check: when a weakref's target is part - // of a long chain of deallocations which triggers the trashcan mechanism, - // clearing the weakrefs can be delayed long after the target's refcount - // has dropped to zero. In the meantime, code accessing the weakref will - // be able to "see" the target object even though it is supposed to be - // unreachable. See issue gh-60806. - Py_ssize_t refcnt = Py_REFCNT(obj); - if (refcnt == 0) { - return NULL; + if (_is_dead(obj)) { + goto end; } - - assert(refcnt > 0); - return Py_NewRef(obj); +#if !defined(Py_GIL_DISABLED) + assert(Py_REFCNT(obj) > 0); +#endif + ret = Py_NewRef(obj); +end: + Py_END_CRITICAL_SECTION(); + return ret; } -static inline int _PyWeakref_IS_DEAD(PyObject *ref_obj) { +static inline int _PyWeakref_IS_DEAD(PyObject *ref_obj) +{ assert(PyWeakref_Check(ref_obj)); - int is_dead; + int ret = 0; Py_BEGIN_CRITICAL_SECTION(ref_obj); PyWeakReference *ref = _Py_CAST(PyWeakReference*, ref_obj); PyObject *obj = ref->wr_object; if (obj == Py_None) { // clear_weakref() was called - is_dead = 1; + ret = 1; } else { // See _PyWeakref_GET_REF() for the rationale of this test - is_dead = (Py_REFCNT(obj) == 0); + ret = _is_dead(obj); } Py_END_CRITICAL_SECTION(); - return is_dead; + return ret; } extern Py_ssize_t _PyWeakref_GetWeakrefCount(PyWeakReference *head); From 7d01fb48089872155e1721ba0a8cc27ee5c4fecd Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 3 Jan 2024 16:57:48 +0000 Subject: [PATCH 50/71] gh-113603: Compiler no longer tries to maintain the no-empty-block invariant (#113636) --- Lib/test/test_compile.py | 13 ++ ...-01-01-23-57-24.gh-issue-113603.ySwovr.rst | 1 + Python/flowgraph.c | 116 ++++++------------ 3 files changed, 52 insertions(+), 78 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-01-23-57-24.gh-issue-113603.ySwovr.rst diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 906e16cc9437fb..7850977428985f 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -448,6 +448,19 @@ def test_condition_expression_with_dead_blocks_compiles(self): # See gh-113054 compile('if (5 if 5 else T): 0', '', 'exec') + def test_condition_expression_with_redundant_comparisons_compiles(self): + # See gh-113054 + compile('if 9<9<9and 9or 9:9', '', 'exec') + + def test_dead_code_with_except_handler_compiles(self): + compile(textwrap.dedent(""" + if None: + with CM: + x = 1 + else: + x = 2 + """), '', 'exec') + def test_compile_invalid_namedexpr(self): # gh-109351 m = ast.Module( diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-01-23-57-24.gh-issue-113603.ySwovr.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-01-23-57-24.gh-issue-113603.ySwovr.rst new file mode 100644 index 00000000000000..5fe6d80dedd19d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-01-23-57-24.gh-issue-113603.ySwovr.rst @@ -0,0 +1 @@ +Fixed bug where a redundant NOP is not removed, causing an assertion to fail in the compiler in debug mode. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 0e6ffbc32e1526..5bb11980b8ca37 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -449,6 +449,15 @@ _PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc) } +static basicblock * +next_nonempty_block(basicblock *b) +{ + while (b && b->b_iused == 0) { + b = b->b_next; + } + return b; +} + /***** debugging helpers *****/ #ifndef NDEBUG @@ -464,24 +473,16 @@ no_redundant_nops(cfg_builder *g) { return true; } -static bool -no_empty_basic_blocks(cfg_builder *g) { - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - if (b->b_iused == 0) { - return false; - } - } - return true; -} - static bool no_redundant_jumps(cfg_builder *g) { for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { cfg_instr *last = basicblock_last_instr(b); if (last != NULL) { if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { - assert(last->i_target != b->b_next); - if (last->i_target == b->b_next) { + basicblock *next = next_nonempty_block(b->b_next); + basicblock *jump_target = next_nonempty_block(last->i_target); + assert(jump_target != next); + if (jump_target == next) { return false; } } @@ -961,42 +962,6 @@ mark_reachable(basicblock *entryblock) { return SUCCESS; } -static void -eliminate_empty_basic_blocks(cfg_builder *g) { - /* Eliminate empty blocks */ - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - basicblock *next = b->b_next; - while (next && next->b_iused == 0) { - next = next->b_next; - } - b->b_next = next; - } - while(g->g_entryblock && g->g_entryblock->b_iused == 0) { - g->g_entryblock = g->g_entryblock->b_next; - } - int next_lbl = get_max_label(g->g_entryblock) + 1; - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - assert(b->b_iused > 0); - for (int i = 0; i < b->b_iused; i++) { - cfg_instr *instr = &b->b_instr[i]; - if (HAS_TARGET(instr->i_opcode)) { - basicblock *target = instr->i_target; - while (target->b_iused == 0) { - target = target->b_next; - } - if (instr->i_target != target) { - if (!IS_LABEL(target->b_label)) { - target->b_label.id = next_lbl++; - } - instr->i_target = target; - instr->i_oparg = target->b_label.id; - } - assert(instr->i_target && instr->i_target->b_iused > 0); - } - } - } -} - static int remove_redundant_nops(basicblock *bb) { /* Remove NOPs when legal to do so. */ @@ -1025,10 +990,7 @@ remove_redundant_nops(basicblock *bb) { } } else { - basicblock* next = bb->b_next; - while (next && next->b_iused == 0) { - next = next->b_next; - } + basicblock *next = next_nonempty_block(bb->b_next); /* or if last instruction in BB and next BB has same line number */ if (next) { location next_loc = NO_LOCATION; @@ -1112,25 +1074,22 @@ remove_redundant_jumps(cfg_builder *g) { * can be deleted. */ - assert(no_empty_basic_blocks(g)); - - bool remove_empty_blocks = false; for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { cfg_instr *last = basicblock_last_instr(b); - assert(last != NULL); + if (last == NULL) { + continue; + } assert(!IS_ASSEMBLER_OPCODE(last->i_opcode)); if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { - if (last->i_target == NULL) { + basicblock* jump_target = next_nonempty_block(last->i_target); + if (jump_target == NULL) { PyErr_SetString(PyExc_SystemError, "jump with NULL target"); return ERROR; } - if (last->i_target == b->b_next) { - assert(b->b_next->b_iused); + basicblock *next = next_nonempty_block(b->b_next); + if (jump_target == next) { if (last->i_loc.lineno == NO_LOCATION.lineno) { b->b_iused--; - if (b->b_iused == 0) { - remove_empty_blocks = true; - } } else { INSTR_SET_OP0(last, NOP); @@ -1138,10 +1097,6 @@ remove_redundant_jumps(cfg_builder *g) { } } } - if (remove_empty_blocks) { - eliminate_empty_basic_blocks(g); - } - assert(no_empty_basic_blocks(g)); return SUCCESS; } @@ -1749,11 +1704,9 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) { assert(PyDict_CheckExact(const_cache)); RETURN_IF_ERROR(check_cfg(g)); - eliminate_empty_basic_blocks(g); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { RETURN_IF_ERROR(inline_small_exit_blocks(b)); } - assert(no_empty_basic_blocks(g)); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts)); assert(b->b_predecessors == 0); @@ -1768,14 +1721,21 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { if (b->b_predecessors == 0) { b->b_iused = 0; + b->b_except_handler = 0; } } for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { remove_redundant_nops(b); } - eliminate_empty_basic_blocks(g); - assert(no_redundant_nops(g)); RETURN_IF_ERROR(remove_redundant_jumps(g)); + + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + remove_redundant_nops(b); + } + + RETURN_IF_ERROR(remove_redundant_jumps(g)); + + assert(no_redundant_jumps(g)); return SUCCESS; } @@ -1825,7 +1785,6 @@ insert_superinstructions(cfg_builder *g) for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { remove_redundant_nops(b); } - eliminate_empty_basic_blocks(g); assert(no_redundant_nops(g)); } @@ -2299,8 +2258,6 @@ is_exit_without_lineno(basicblock *b) { static int duplicate_exits_without_lineno(cfg_builder *g) { - assert(no_empty_basic_blocks(g)); - int next_lbl = get_max_label(g->g_entryblock) + 1; /* Copy all exit blocks without line number that are targets of a jump. @@ -2308,9 +2265,11 @@ duplicate_exits_without_lineno(cfg_builder *g) basicblock *entryblock = g->g_entryblock; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { cfg_instr *last = basicblock_last_instr(b); - assert(last != NULL); + if (last == NULL) { + continue; + } if (is_jump(last)) { - basicblock *target = last->i_target; + basicblock *target = next_nonempty_block(last->i_target); if (is_exit_without_lineno(target) && target->b_predecessors > 1) { basicblock *new_target = copy_basicblock(g, target); if (new_target == NULL) { @@ -2367,9 +2326,10 @@ propagate_line_numbers(basicblock *entryblock) { } } if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) { - assert(b->b_next->b_iused); - if (b->b_next->b_instr[0].i_loc.lineno < 0) { - b->b_next->b_instr[0].i_loc = prev_location; + if (b->b_next->b_iused > 0) { + if (b->b_next->b_instr[0].i_loc.lineno < 0) { + b->b_next->b_instr[0].i_loc = prev_location; + } } } if (is_jump(last)) { From 178919cf2132a67bc03ae5994769d93cfb7e2cd3 Mon Sep 17 00:00:00 2001 From: Itamar Oren Date: Wed, 3 Jan 2024 09:30:20 -0800 Subject: [PATCH 51/71] gh-113258: Write frozen modules to the build tree on Windows (GH-113303) This ensures the source directory is not modified at build time, and different builds (e.g. different versions or GIL vs no-GIL) do not have conflicts. --- ...-12-23-09-35-48.gh-issue-113258.GlsAyH.rst | 2 + PCbuild/_freeze_module.vcxproj | 98 +++++++++---------- PCbuild/pyproject.props | 1 + PCbuild/pythoncore.vcxproj | 8 +- Tools/build/freeze_modules.py | 8 +- 5 files changed, 61 insertions(+), 56 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2023-12-23-09-35-48.gh-issue-113258.GlsAyH.rst diff --git a/Misc/NEWS.d/next/Build/2023-12-23-09-35-48.gh-issue-113258.GlsAyH.rst b/Misc/NEWS.d/next/Build/2023-12-23-09-35-48.gh-issue-113258.GlsAyH.rst new file mode 100644 index 00000000000000..e7256ea423b3e0 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2023-12-23-09-35-48.gh-issue-113258.GlsAyH.rst @@ -0,0 +1,2 @@ +Changed the Windows build to write out generated frozen modules into the +build tree instead of the source tree. diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index f8c5fafa561efa..292bfa76519507 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -266,117 +266,117 @@ importlib._bootstrap $(IntDir)importlib._bootstrap.g.h - $(PySourcePath)Python\frozen_modules\importlib._bootstrap.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\importlib._bootstrap.h importlib._bootstrap_external $(IntDir)importlib._bootstrap_external.g.h - $(PySourcePath)Python\frozen_modules\importlib._bootstrap_external.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\importlib._bootstrap_external.h zipimport $(IntDir)zipimport.g.h - $(PySourcePath)Python\frozen_modules\zipimport.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\zipimport.h abc $(IntDir)abc.g.h - $(PySourcePath)Python\frozen_modules\abc.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\abc.h codecs $(IntDir)codecs.g.h - $(PySourcePath)Python\frozen_modules\codecs.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\codecs.h io $(IntDir)io.g.h - $(PySourcePath)Python\frozen_modules\io.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\io.h _collections_abc $(IntDir)_collections_abc.g.h - $(PySourcePath)Python\frozen_modules\_collections_abc.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\_collections_abc.h _sitebuiltins $(IntDir)_sitebuiltins.g.h - $(PySourcePath)Python\frozen_modules\_sitebuiltins.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\_sitebuiltins.h genericpath $(IntDir)genericpath.g.h - $(PySourcePath)Python\frozen_modules\genericpath.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\genericpath.h ntpath $(IntDir)ntpath.g.h - $(PySourcePath)Python\frozen_modules\ntpath.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\ntpath.h posixpath $(IntDir)posixpath.g.h - $(PySourcePath)Python\frozen_modules\posixpath.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\posixpath.h os $(IntDir)os.g.h - $(PySourcePath)Python\frozen_modules\os.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\os.h site $(IntDir)site.g.h - $(PySourcePath)Python\frozen_modules\site.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\site.h stat $(IntDir)stat.g.h - $(PySourcePath)Python\frozen_modules\stat.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\stat.h importlib.util $(IntDir)importlib.util.g.h - $(PySourcePath)Python\frozen_modules\importlib.util.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\importlib.util.h importlib.machinery $(IntDir)importlib.machinery.g.h - $(PySourcePath)Python\frozen_modules\importlib.machinery.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\importlib.machinery.h runpy $(IntDir)runpy.g.h - $(PySourcePath)Python\frozen_modules\runpy.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\runpy.h __hello__ $(IntDir)__hello__.g.h - $(PySourcePath)Python\frozen_modules\__hello__.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\__hello__.h __phello__ $(IntDir)__phello__.g.h - $(PySourcePath)Python\frozen_modules\__phello__.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\__phello__.h __phello__.ham $(IntDir)__phello__.ham.g.h - $(PySourcePath)Python\frozen_modules\__phello__.ham.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\__phello__.ham.h __phello__.ham.eggs $(IntDir)__phello__.ham.eggs.g.h - $(PySourcePath)Python\frozen_modules\__phello__.ham.eggs.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\__phello__.ham.eggs.h __phello__.spam $(IntDir)__phello__.spam.g.h - $(PySourcePath)Python\frozen_modules\__phello__.spam.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\__phello__.spam.h frozen_only $(IntDir)frozen_only.g.h - $(PySourcePath)Python\frozen_modules\frozen_only.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\frozen_only.h @@ -385,34 +385,34 @@ getpath $(IntDir)getpath.g.h - $(PySourcePath)Python\frozen_modules\getpath.h + $(GeneratedFrozenModulesDir)Python\frozen_modules\getpath.h - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + @@ -484,7 +484,7 @@ $(IntDir)\deepfreeze_mappings.txt Overwrite="true" Lines="@(FrozenModule->'%(FullPath):%(FrozenId)')" /> - + @@ -493,7 +493,7 @@ $(IntDir)\deepfreeze_mappings.txt - + diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 68c0550f7603b7..d69b43b0406ce0 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -12,6 +12,7 @@ $(IntDir.Replace(`\\`, `\`)) $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\pythoncore\ + $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_frozen_$(Configuration)\ $(ProjectName) $(TargetName)$(PyDebugExt) false diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index c90ad1a3592f67..be5b34220aa0bc 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -111,6 +111,7 @@ + $(GeneratedFrozenModulesDir);%(AdditionalIncludeDirectories) PREFIX=NULL; EXEC_PREFIX=NULL; @@ -120,7 +121,6 @@ PLATLIBDIR="DLLs"; %(PreprocessorDefinitions) - $(PySourcePath);%(AdditionalIncludeDirectories) @@ -562,7 +562,9 @@ - + + $(GeneratedFrozenModulesDir)Python;%(AdditionalIncludeDirectories) + @@ -617,7 +619,7 @@ - + diff --git a/Tools/build/freeze_modules.py b/Tools/build/freeze_modules.py index 6a54f45bac3a86..a541b4b33c519b 100644 --- a/Tools/build/freeze_modules.py +++ b/Tools/build/freeze_modules.py @@ -658,7 +658,7 @@ def regen_pcbuild(modules): filterlines = [] corelines = [] deepfreezemappingsfile = f'$(IntDir)\\{DEEPFREEZE_MAPPING_FNAME}' - deepfreezerules = [f' '] + deepfreezerules = [f' '] deepfreezemappings = [] for src in _iter_sources(modules): pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR) @@ -667,15 +667,15 @@ def regen_pcbuild(modules): projlines.append(f' ') projlines.append(f' {src.frozenid}') projlines.append(f' $(IntDir){intfile}') - projlines.append(f' $(PySourcePath){header}') + projlines.append(f' $(GeneratedFrozenModulesDir){header}') projlines.append(f' ') filterlines.append(f' ') filterlines.append(' Python Files') filterlines.append(' ') - deepfreezemappings.append(f' \n') + deepfreezemappings.append(f' \n') - corelines.append(f' ') + corelines.append(f' ') print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): From f1f839243251fef7422c31d6a7c3c747e0b5e27c Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 3 Jan 2024 19:29:24 +0000 Subject: [PATCH 52/71] Document the `co_lines` method on code objects (#113682) Co-authored-by: Hugo van Kemenade --- Doc/library/dis.rst | 9 +++++---- Doc/reference/datamodel.rst | 39 +++++++++++++++++++++++++++++++++++-- Doc/whatsnew/3.10.rst | 6 ++++-- Doc/whatsnew/3.13.rst | 3 ++- Misc/NEWS.d/3.12.0a4.rst | 4 ++-- Objects/lnotab_notes.txt | 2 +- 6 files changed, 51 insertions(+), 12 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 5823142cc75998..7492ae85c4ea46 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -342,17 +342,18 @@ operation is being performed, so the intermediate analysis object isn't useful: .. function:: findlinestarts(code) - This generator function uses the ``co_lines`` method - of the code object *code* to find the offsets which are starts of + This generator function uses the :meth:`~codeobject.co_lines` method + of the :ref:`code object ` *code* to find the offsets which + are starts of lines in the source code. They are generated as ``(offset, lineno)`` pairs. .. versionchanged:: 3.6 Line numbers can be decreasing. Before, they were always increasing. .. versionchanged:: 3.10 - The :pep:`626` ``co_lines`` method is used instead of the + The :pep:`626` :meth:`~codeobject.co_lines` method is used instead of the :attr:`~codeobject.co_firstlineno` and :attr:`~codeobject.co_lnotab` - attributes of the code object. + attributes of the :ref:`code object `. .. versionchanged:: 3.13 Line numbers can be ``None`` for bytecode that does not map to source lines. diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index b3af5c6298d02d..d611bda298b509 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1219,8 +1219,8 @@ If a code object represents a function, the first item in :attr:`~codeobject.co_consts` is the documentation string of the function, or ``None`` if undefined. -The :meth:`!co_positions` method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Methods on code objects +~~~~~~~~~~~~~~~~~~~~~~~ .. method:: codeobject.co_positions() @@ -1255,6 +1255,41 @@ The :meth:`!co_positions` method :option:`-X` ``no_debug_ranges`` command line flag or the :envvar:`PYTHONNODEBUGRANGES` environment variable can be used. +.. method:: codeobject.co_lines() + + Returns an iterator that yields information about successive ranges of + :term:`bytecode`\s. Each item yielded is a ``(start, end, lineno)`` + :class:`tuple`: + + * ``start`` (an :class:`int`) represents the offset (inclusive) of the start + of the :term:`bytecode` range + * ``end`` (an :class:`int`) represents the offset (inclusive) of the end of + the :term:`bytecode` range + * ``lineno`` is an :class:`int` representing the line number of the + :term:`bytecode` range, or ``None`` if the bytecodes in the given range + have no line number + + The items yielded generated will have the following properties: + + * The first range yielded will have a ``start`` of 0. + * The ``(start, end)`` ranges will be non-decreasing and consecutive. That + is, for any pair of :class:`tuple`\s, the ``start`` of the second will be + equal to the ``end`` of the first. + * No range will be backwards: ``end >= start`` for all triples. + * The :class:`tuple` yielded will have ``end`` equal to the size of the + :term:`bytecode`. + + Zero-width ranges, where ``start == end``, are allowed. Zero-width ranges + are used for lines that are present in the source code, but have been + eliminated by the :term:`bytecode` compiler. + + .. versionadded:: 3.10 + + .. seealso:: + + :pep:`626` - Precise line numbers for debugging and other tools. + The PEP that introduced the :meth:`!co_lines` method. + .. _frame-objects: diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index a8a27bfd3dc1bc..cd86c82caffc56 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -402,9 +402,11 @@ Tracing events, with the correct line number, are generated for all lines of cod The :attr:`~frame.f_lineno` attribute of frame objects will always contain the expected line number. -The :attr:`~codeobject.co_lnotab` attribute of code objects is deprecated and +The :attr:`~codeobject.co_lnotab` attribute of +:ref:`code objects ` is deprecated and will be removed in 3.12. -Code that needs to convert from offset to line number should use the new ``co_lines()`` method instead. +Code that needs to convert from offset to line number should use the new +:meth:`~codeobject.co_lines` method instead. PEP 634: Structural Pattern Matching ------------------------------------ diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 888ebd0402d0e7..3ab6d1ddc6ef21 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -816,7 +816,8 @@ although there is currently no date scheduled for their removal. * :mod:`!sre_compile`, :mod:`!sre_constants` and :mod:`!sre_parse` modules. -* :attr:`~codeobject.co_lnotab`: use the ``co_lines`` attribute instead. +* :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method + instead. * :class:`typing.Text` (:gh:`92332`). diff --git a/Misc/NEWS.d/3.12.0a4.rst b/Misc/NEWS.d/3.12.0a4.rst index 75246f3f13503e..ce2814bbe2e5ab 100644 --- a/Misc/NEWS.d/3.12.0a4.rst +++ b/Misc/NEWS.d/3.12.0a4.rst @@ -147,8 +147,8 @@ clinic. .. nonce: yRWQ1y .. section: Core and Builtins -Improve the output of ``co_lines`` by emitting only one entry for each line -range. +Improve the output of :meth:`codeobject.co_lines` by emitting only one entry +for each line range. .. diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt index d45d09d4ab9a50..0f3599340318f0 100644 --- a/Objects/lnotab_notes.txt +++ b/Objects/lnotab_notes.txt @@ -60,7 +60,7 @@ Final form: Iterating over the table. ------------------------- -For the `co_lines` attribute we want to emit the full form, omitting the (350, 360, No line number) and empty entries. +For the `co_lines` method we want to emit the full form, omitting the (350, 360, No line number) and empty entries. The code is as follows: From 4c4b08dd2bd5f2cad4e41bf29119a3daa2956f6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20=C5=81apkiewicz?= <80906036+fipachu@users.noreply.github.com> Date: Wed, 3 Jan 2024 20:37:34 +0100 Subject: [PATCH 53/71] gh-52161: Enhance Cmd support for docstrings (#110987) In `cmd.Cmd.do_help` call `inspect.cleandoc()`, to clean indentation and remove leading/trailing empty lines from a dosctring before printing. --- Lib/cmd.py | 3 ++- .../next/Library/2023-10-17-16-11-03.gh-issue-52161.WBYyCJ.rst | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2023-10-17-16-11-03.gh-issue-52161.WBYyCJ.rst diff --git a/Lib/cmd.py b/Lib/cmd.py index 2e358d6cd5a02d..a37d16cd7bde16 100644 --- a/Lib/cmd.py +++ b/Lib/cmd.py @@ -42,7 +42,7 @@ functions respectively. """ -import string, sys +import inspect, string, sys __all__ = ["Cmd"] @@ -305,6 +305,7 @@ def do_help(self, arg): except AttributeError: try: doc=getattr(self, 'do_' + arg).__doc__ + doc = inspect.cleandoc(doc) if doc: self.stdout.write("%s\n"%str(doc)) return diff --git a/Misc/NEWS.d/next/Library/2023-10-17-16-11-03.gh-issue-52161.WBYyCJ.rst b/Misc/NEWS.d/next/Library/2023-10-17-16-11-03.gh-issue-52161.WBYyCJ.rst new file mode 100644 index 00000000000000..3f598d40e4ae93 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-17-16-11-03.gh-issue-52161.WBYyCJ.rst @@ -0,0 +1,2 @@ +:meth:`cmd.Cmd.do_help` now cleans docstrings with :func:`inspect.cleandoc` +before writing them. Patch by Filip Łapkiewicz. From 35ef8cb25917bfd6cbbd7c2bb55dd4f82131c9cf Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 4 Jan 2024 03:14:15 -0800 Subject: [PATCH 54/71] GH-113689: Fix broken handling of invalid executors (GH-113694) --- Python/bytecodes.c | 3 ++- Python/generated_cases.c.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2eeeac53e1dd7e..e1a6a256fbdf96 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2378,11 +2378,12 @@ dummy_func( stack_pointer = _PyFrame_GetStackPointer(frame); } else { + code->co_executors->executors[oparg & 255] = NULL; opcode = this_instr->op.code = executor->vm_data.opcode; this_instr->op.arg = executor->vm_data.oparg; oparg = (oparg & (~255)) | executor->vm_data.oparg; - code->co_executors->executors[oparg&255] = NULL; Py_DECREF(executor); + next_instr = this_instr; DISPATCH_GOTO(); } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 99fd169ca4fec3..8226d827cde514 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2392,11 +2392,12 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } else { + code->co_executors->executors[oparg & 255] = NULL; opcode = this_instr->op.code = executor->vm_data.opcode; this_instr->op.arg = executor->vm_data.oparg; oparg = (oparg & (~255)) | executor->vm_data.oparg; - code->co_executors->executors[oparg&255] = NULL; Py_DECREF(executor); + next_instr = this_instr; DISPATCH_GOTO(); } DISPATCH(); From 1ae7ceba29771baf8f2e8d2d4c50a0355cb6b5c8 Mon Sep 17 00:00:00 2001 From: Jamie Phan Date: Fri, 5 Jan 2024 01:05:31 +1100 Subject: [PATCH 55/71] gh-113696: Docs: Annotate PyObject_CallOneArg and PyObject_CallNoArgs as returning a strong reference (#113697) --- Doc/data/refcounts.dat | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index ef9ac1617a284b..0b48512083ced4 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1589,6 +1589,13 @@ PyObject_Call:PyObject*:callable_object:0: PyObject_Call:PyObject*:args:0: PyObject_Call:PyObject*:kw:0: +PyObject_CallNoArgs:PyObject*::+1: +PyObject_CallNoArgs:PyObject*:callable_object:0: + +PyObject_CallOneArg:PyObject*::+1: +PyObject_CallOneArg:PyObject*:callable_object:0: +PyObject_CallOneArg:PyObject*:arg:0: + PyObject_CallFunction:PyObject*::+1: PyObject_CallFunction:PyObject*:callable_object:0: PyObject_CallFunction:const char*:format:: From 1600d78e2d090319930c6538b496ffcca120a696 Mon Sep 17 00:00:00 2001 From: wookie184 Date: Thu, 4 Jan 2024 19:11:34 +0000 Subject: [PATCH 56/71] gh-113569: Display calls in Mock.assert_has_calls failure when empty (GH-113573) --- Lib/test/test_unittest/testmock/testmock.py | 36 +++++++++++-------- Lib/unittest/mock.py | 8 ++--- ...-12-29-17-57-45.gh-issue-113569.qcRCEI.rst | 2 ++ 3 files changed, 28 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-29-17-57-45.gh-issue-113569.qcRCEI.rst diff --git a/Lib/test/test_unittest/testmock/testmock.py b/Lib/test/test_unittest/testmock/testmock.py index 6af8acc3b0617e..1725406bcfb9e4 100644 --- a/Lib/test/test_unittest/testmock/testmock.py +++ b/Lib/test/test_unittest/testmock/testmock.py @@ -1547,25 +1547,33 @@ def f(x=None): pass mock = Mock(spec=f) mock(1) - with self.assertRaisesRegex( - AssertionError, - '^{}$'.format( - re.escape('Calls not found.\n' - 'Expected: [call()]\n' - ' Actual: [call(1)]'))) as cm: + with self.assertRaises(AssertionError) as cm: mock.assert_has_calls([call()]) + self.assertEqual(str(cm.exception), + 'Calls not found.\n' + 'Expected: [call()]\n' + ' Actual: [call(1)]' + ) self.assertIsNone(cm.exception.__cause__) + uncalled_mock = Mock() + with self.assertRaises(AssertionError) as cm: + uncalled_mock.assert_has_calls([call()]) + self.assertEqual(str(cm.exception), + 'Calls not found.\n' + 'Expected: [call()]\n' + ' Actual: []' + ) + self.assertIsNone(cm.exception.__cause__) - with self.assertRaisesRegex( - AssertionError, - '^{}$'.format( - re.escape( - 'Error processing expected calls.\n' - "Errors: [None, TypeError('too many positional arguments')]\n" - "Expected: [call(), call(1, 2)]\n" - ' Actual: [call(1)]'))) as cm: + with self.assertRaises(AssertionError) as cm: mock.assert_has_calls([call(), call(1, 2)]) + self.assertEqual(str(cm.exception), + 'Error processing expected calls.\n' + "Errors: [None, TypeError('too many positional arguments')]\n" + 'Expected: [call(), call(1, 2)]\n' + ' Actual: [call(1)]' + ) self.assertIsInstance(cm.exception.__cause__, TypeError) def test_assert_any_call(self): diff --git a/Lib/unittest/mock.py b/Lib/unittest/mock.py index 2adb3d70662b1a..93f4d9743ed2fa 100644 --- a/Lib/unittest/mock.py +++ b/Lib/unittest/mock.py @@ -1010,8 +1010,8 @@ def assert_has_calls(self, calls, any_order=False): for e in expected]) raise AssertionError( f'{problem}\n' - f'Expected: {_CallList(calls)}' - f'{self._calls_repr(prefix=" Actual").rstrip(".")}' + f'Expected: {_CallList(calls)}\n' + f' Actual: {safe_repr(self.mock_calls)}' ) from cause return @@ -1085,7 +1085,7 @@ def _get_child_mock(self, /, **kw): return klass(**kw) - def _calls_repr(self, prefix="Calls"): + def _calls_repr(self): """Renders self.mock_calls as a string. Example: "\nCalls: [call(1), call(2)]." @@ -1095,7 +1095,7 @@ def _calls_repr(self, prefix="Calls"): """ if not self.mock_calls: return "" - return f"\n{prefix}: {safe_repr(self.mock_calls)}." + return f"\nCalls: {safe_repr(self.mock_calls)}." # Denylist for forbidden attribute names in safe mode diff --git a/Misc/NEWS.d/next/Library/2023-12-29-17-57-45.gh-issue-113569.qcRCEI.rst b/Misc/NEWS.d/next/Library/2023-12-29-17-57-45.gh-issue-113569.qcRCEI.rst new file mode 100644 index 00000000000000..9b63fc940991fe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-29-17-57-45.gh-issue-113569.qcRCEI.rst @@ -0,0 +1,2 @@ +Indicate if there were no actual calls in unittest +:meth:`~unittest.mock.Mock.assert_has_calls` failure. From 4681a5271a8598b46021cbc556ac8098ab8a1d81 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 4 Jan 2024 12:20:21 -0800 Subject: [PATCH 57/71] gh-113538: Don't error in stream reader protocol callback when task is cancelled (#113690) --- Lib/asyncio/streams.py | 3 +++ Lib/test/test_asyncio/test_streams.py | 20 ++++++++++++------- ...-01-03-14-19-26.gh-issue-113538.ahuBCo.rst | 5 +++++ 3 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-03-14-19-26.gh-issue-113538.ahuBCo.rst diff --git a/Lib/asyncio/streams.py b/Lib/asyncio/streams.py index ffb48b9cd6cb70..df58b7a799a5ad 100644 --- a/Lib/asyncio/streams.py +++ b/Lib/asyncio/streams.py @@ -246,6 +246,9 @@ def connection_made(self, transport): self._stream_writer) if coroutines.iscoroutine(res): def callback(task): + if task.cancelled(): + transport.close() + return exc = task.exception() if exc is not None: self._loop.call_exception_handler({ diff --git a/Lib/test/test_asyncio/test_streams.py b/Lib/test/test_asyncio/test_streams.py index b408cd1f7da205..3c8cc5f3649180 100644 --- a/Lib/test/test_asyncio/test_streams.py +++ b/Lib/test/test_asyncio/test_streams.py @@ -1129,7 +1129,7 @@ async def inner(httpd): self.assertEqual(messages, []) - def test_unhandled_exceptions(self) -> None: + def _basetest_unhandled_exceptions(self, handle_echo): port = socket_helper.find_unused_port() messages = [] @@ -1143,9 +1143,6 @@ async def client(): await wr.wait_closed() async def main(): - async def handle_echo(reader, writer): - raise Exception('test') - server = await asyncio.start_server( handle_echo, 'localhost', port) await server.start_serving() @@ -1154,11 +1151,20 @@ async def handle_echo(reader, writer): await server.wait_closed() self.loop.run_until_complete(main()) + return messages + def test_unhandled_exception(self): + async def handle_echo(reader, writer): + raise Exception('test') + messages = self._basetest_unhandled_exceptions(handle_echo) self.assertEqual(messages[0]['message'], - 'Unhandled exception in client_connected_cb') - # Break explicitly reference cycle - messages = None + 'Unhandled exception in client_connected_cb') + + def test_unhandled_cancel(self): + async def handle_echo(reader, writer): + asyncio.current_task().cancel() + messages = self._basetest_unhandled_exceptions(handle_echo) + self.assertEqual(messages, []) if __name__ == '__main__': diff --git a/Misc/NEWS.d/next/Library/2024-01-03-14-19-26.gh-issue-113538.ahuBCo.rst b/Misc/NEWS.d/next/Library/2024-01-03-14-19-26.gh-issue-113538.ahuBCo.rst new file mode 100644 index 00000000000000..a52076501b7bf4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-03-14-19-26.gh-issue-113538.ahuBCo.rst @@ -0,0 +1,5 @@ +In :meth:`asyncio.StreamReaderProtocol.connection_made`, there is callback +that logs an error if the task wrapping the "connected callback" fails. This +callback would itself fail if the task was cancelled. Prevent this by +checking whether the task was cancelled first. If so, close the transport +but don't log an error. From c2e8298eba3f8d75a58e5b3636f8edc8d60e68da Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 4 Jan 2024 20:48:26 +0000 Subject: [PATCH 58/71] GH-113225: Speed up `pathlib.Path.glob()` (#113226) Use `os.DirEntry.path` as the string representation of child paths, unless the parent path is empty, in which case we use the entry `name`. --- Lib/pathlib/__init__.py | 8 +++++++- .../2023-12-17-04-43-57.gh-issue-113225.dhxhiZ.rst | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-17-04-43-57.gh-issue-113225.dhxhiZ.rst diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 2b4193c400a099..79b8b4917f6cc4 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -301,7 +301,13 @@ def _scandir(self): def _make_child_entry(self, entry): # Transform an entry yielded from _scandir() into a path object. - return self._make_child_relpath(entry.name) + path_str = entry.name if str(self) == '.' else entry.path + path = self.with_segments(path_str) + path._str = path_str + path._drv = self.drive + path._root = self.root + path._tail_cached = self._tail + [entry.name] + return path def absolute(self): """Return an absolute version of this path diff --git a/Misc/NEWS.d/next/Library/2023-12-17-04-43-57.gh-issue-113225.dhxhiZ.rst b/Misc/NEWS.d/next/Library/2023-12-17-04-43-57.gh-issue-113225.dhxhiZ.rst new file mode 100644 index 00000000000000..7160cca2e11366 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-17-04-43-57.gh-issue-113225.dhxhiZ.rst @@ -0,0 +1 @@ +Speed up :meth:`pathlib.Path.glob` by using :attr:`os.DirEntry.path` where possible. From fcb3c2a444709d2a53faa20c5b43541674064018 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 4 Jan 2024 17:21:40 -0500 Subject: [PATCH 59/71] gh-112532: Isolate abandoned segments by interpreter (#113717) * gh-112532: Isolate abandoned segments by interpreter Mimalloc segments are data structures that contain memory allocations along with metadata. Each segment is "owned" by a thread. When a thread exits, it abandons its segments to a global pool to be later reclaimed by other threads. This changes the pool to be per-interpreter instead of process-wide. This will be important for when we use mimalloc to find GC objects in the `--disable-gil` builds. We want heaps to only store Python objects from a single interpreter. Absent this change, the abandoning and reclaiming process could break this isolation. * Add missing '&_mi_abandoned_default' to 'tld_empty' --- Include/internal/mimalloc/mimalloc/internal.h | 20 +--- Include/internal/mimalloc/mimalloc/types.h | 40 ++++++++ Include/internal/pycore_interp.h | 5 + Include/internal/pycore_mimalloc.h | 6 ++ Objects/mimalloc/init.c | 5 +- Objects/mimalloc/segment.c | 97 ++++++++----------- Python/pystate.c | 5 + 7 files changed, 102 insertions(+), 76 deletions(-) diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index cb6e211de5bb63..afd7d18a13ed8f 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -23,23 +23,6 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif -#define MI_CACHE_LINE 64 -#if defined(_MSC_VER) -#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) -#pragma warning(disable:26812) // unscoped enum warning -#define mi_decl_noinline __declspec(noinline) -#define mi_decl_thread __declspec(thread) -#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) -#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc -#define mi_decl_noinline __attribute__((noinline)) -#define mi_decl_thread __thread -#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) -#else -#define mi_decl_noinline -#define mi_decl_thread __thread // hope for the best :-) -#define mi_decl_cache_align -#endif - #if defined(__EMSCRIPTEN__) && !defined(__wasi__) #define __wasi__ #endif @@ -131,6 +114,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" +extern mi_abandoned_pool_t _mi_abandoned_default; // global abandoned pool mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); @@ -145,7 +129,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, m uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); -void _mi_abandoned_await_readers(void); +void _mi_abandoned_await_readers(mi_abandoned_pool_t *pool); void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); // "page.c" diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index 7616f37e4b978f..ab41b1ce990827 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -33,6 +33,23 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) #endif +#define MI_CACHE_LINE 64 +#if defined(_MSC_VER) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#pragma warning(disable:26812) // unscoped enum warning +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#else +#define mi_decl_noinline +#define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align +#endif + // ------------------------------------------------------ // Variants // ------------------------------------------------------ @@ -445,6 +462,28 @@ typedef struct mi_segment_s { mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment } mi_segment_t; +typedef uintptr_t mi_tagged_segment_t; + +// Segments unowned by any thread are put in a shared pool +typedef struct mi_abandoned_pool_s { + // This is a list of visited abandoned pages that were full at the time. + // this list migrates to `abandoned` when that becomes NULL. The use of + // this list reduces contention and the rate at which segments are visited. + mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL + + // The abandoned page list (tagged as it supports pop) + mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL + + // Maintain these for debug purposes (these counts may be a bit off) + mi_decl_cache_align _Atomic(size_t) abandoned_count; + mi_decl_cache_align _Atomic(size_t) abandoned_visited_count; + + // We also maintain a count of current readers of the abandoned list + // in order to prevent resetting/decommitting segment memory if it might + // still be read. + mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0 +} mi_abandoned_pool_t; + // ------------------------------------------------------ // Heaps @@ -654,6 +693,7 @@ typedef struct mi_segments_tld_s { size_t peak_size; // peak size of all segments mi_stats_t* stats; // points to tld stats mi_os_tld_t* os; // points to os stats + mi_abandoned_pool_t* abandoned; // pool of abandoned segments } mi_segments_tld_t; // Thread local data diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 04d7a6a615e370..4512b1edb4b9b3 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -27,6 +27,7 @@ extern "C" { #include "pycore_import.h" // struct _import_state #include "pycore_instruments.h" // _PY_MONITORING_EVENTS #include "pycore_list.h" // struct _Py_list_state +#include "pycore_mimalloc.h" // struct _mimalloc_interp_state #include "pycore_object_state.h" // struct _py_object_state #include "pycore_obmalloc.h" // struct _obmalloc_state #include "pycore_tstate.h" // _PyThreadStateImpl @@ -166,6 +167,10 @@ struct _is { struct _warnings_runtime_state warnings; struct atexit_state atexit; +#if defined(Py_GIL_DISABLED) + struct _mimalloc_interp_state mimalloc; +#endif + struct _obmalloc_state obmalloc; PyObject *audit_hooks; diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h index adebb559dae658..1e7ed5a4ca62e2 100644 --- a/Include/internal/pycore_mimalloc.h +++ b/Include/internal/pycore_mimalloc.h @@ -35,6 +35,12 @@ typedef enum { #endif #ifdef Py_GIL_DISABLED +struct _mimalloc_interp_state { + // When exiting, threads place any segments with live blocks in this + // shared pool for other threads to claim and reuse. + mi_abandoned_pool_t abandoned_pool; +}; + struct _mimalloc_thread_state { mi_heap_t *current_object_heap; mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT]; diff --git a/Objects/mimalloc/init.c b/Objects/mimalloc/init.c index 376e14b49b7c7b..0446021fdc514e 100644 --- a/Objects/mimalloc/init.c +++ b/Objects/mimalloc/init.c @@ -131,7 +131,7 @@ mi_decl_cache_align static const mi_tld_t tld_empty = { 0, false, NULL, NULL, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os, &_mi_abandoned_default }, // segments { 0, tld_empty_stats }, // os { MI_STATS_NULL } // stats }; @@ -148,7 +148,7 @@ extern mi_heap_t _mi_heap_main; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, & _mi_heap_main, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os, &_mi_abandoned_default }, // segments { 0, &tld_main.stats }, // os { MI_STATS_NULL } // stats }; @@ -308,6 +308,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; + tld->segments.abandoned = &_mi_abandoned_default; tld->os.stats = &tld->stats; tld->heap_backing = bheap; tld->heaps = bheap; diff --git a/Objects/mimalloc/segment.c b/Objects/mimalloc/segment.c index 033e0f97c36c14..1040da0d9af3e9 100644 --- a/Objects/mimalloc/segment.c +++ b/Objects/mimalloc/segment.c @@ -395,7 +395,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { const size_t size = mi_segment_size(segment); const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - _mi_abandoned_await_readers(); // wait until safe to free + _mi_abandoned_await_readers(tld->abandoned); // wait until safe to free _mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid, tld->stats); } @@ -1059,7 +1059,6 @@ would be spread among all other segments in the arenas. // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers // to put in a tag that increments on update to avoid the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK -typedef uintptr_t mi_tagged_segment_t; static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); @@ -1071,55 +1070,40 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se return ((uintptr_t)segment | tag); } -// This is a list of visited abandoned pages that were full at the time. -// this list migrates to `abandoned` when that becomes NULL. The use of -// this list reduces contention and the rate at which segments are visited. -static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL - -// The abandoned page list (tagged as it supports pop) -static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL - -// Maintain these for debug purposes (these counts may be a bit off) -static mi_decl_cache_align _Atomic(size_t) abandoned_count; -static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count; - -// We also maintain a count of current readers of the abandoned list -// in order to prevent resetting/decommitting segment memory if it might -// still be read. -static mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0 +mi_abandoned_pool_t _mi_abandoned_default; // Push on the visited list -static void mi_abandoned_visited_push(mi_segment_t* segment) { +static void mi_abandoned_visited_push(mi_abandoned_pool_t *pool, mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL); mi_assert_internal(segment->used > 0); - mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &pool->abandoned_visited); do { mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); - } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); - mi_atomic_increment_relaxed(&abandoned_visited_count); + } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &pool->abandoned_visited, &anext, segment)); + mi_atomic_increment_relaxed(&pool->abandoned_visited_count); } // Move the visited list to the abandoned list. -static bool mi_abandoned_visited_revisit(void) +static bool mi_abandoned_visited_revisit(mi_abandoned_pool_t *pool) { // quick check if the visited list is empty - if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; + if (mi_atomic_load_ptr_relaxed(mi_segment_t, &pool->abandoned_visited) == NULL) return false; // grab the whole visited list - mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); + mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &pool->abandoned_visited, NULL); if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL mi_tagged_segment_t afirst; - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&pool->abandoned); if (mi_tagged_segment_ptr(ts)==NULL) { - size_t count = mi_atomic_load_relaxed(&abandoned_visited_count); + size_t count = mi_atomic_load_relaxed(&pool->abandoned_visited_count); afirst = mi_tagged_segment(first, ts); - if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) { - mi_atomic_add_relaxed(&abandoned_count, count); - mi_atomic_sub_relaxed(&abandoned_visited_count, count); + if (mi_atomic_cas_strong_acq_rel(&pool->abandoned, &ts, afirst)) { + mi_atomic_add_relaxed(&pool->abandoned_count, count); + mi_atomic_sub_relaxed(&pool->abandoned_visited_count, count); return true; } } @@ -1133,51 +1117,51 @@ static bool mi_abandoned_visited_revisit(void) // and atomically prepend to the abandoned list // (no need to increase the readers as we don't access the abandoned segments) - mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); + mi_tagged_segment_t anext = mi_atomic_load_relaxed(&pool->abandoned); size_t count; do { - count = mi_atomic_load_relaxed(&abandoned_visited_count); + count = mi_atomic_load_relaxed(&pool->abandoned_visited_count); mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); afirst = mi_tagged_segment(first, anext); - } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); - mi_atomic_add_relaxed(&abandoned_count, count); - mi_atomic_sub_relaxed(&abandoned_visited_count, count); + } while (!mi_atomic_cas_weak_release(&pool->abandoned, &anext, afirst)); + mi_atomic_add_relaxed(&pool->abandoned_count, count); + mi_atomic_sub_relaxed(&pool->abandoned_visited_count, count); return true; } // Push on the abandoned list. -static void mi_abandoned_push(mi_segment_t* segment) { +static void mi_abandoned_push(mi_abandoned_pool_t* pool, mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL); mi_assert_internal(segment->used > 0); mi_tagged_segment_t next; - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&pool->abandoned); do { mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); next = mi_tagged_segment(segment, ts); - } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); - mi_atomic_increment_relaxed(&abandoned_count); + } while (!mi_atomic_cas_weak_release(&pool->abandoned, &ts, next)); + mi_atomic_increment_relaxed(&pool->abandoned_count); } // Wait until there are no more pending reads on segments that used to be in the abandoned list // called for example from `arena.c` before decommitting -void _mi_abandoned_await_readers(void) { +void _mi_abandoned_await_readers(mi_abandoned_pool_t* pool) { size_t n; do { - n = mi_atomic_load_acquire(&abandoned_readers); + n = mi_atomic_load_acquire(&pool->abandoned_readers); if (n != 0) mi_atomic_yield(); } while (n != 0); } // Pop from the abandoned list -static mi_segment_t* mi_abandoned_pop(void) { +static mi_segment_t* mi_abandoned_pop(mi_abandoned_pool_t* pool) { mi_segment_t* segment; // Check efficiently if it is empty (or if the visited list needs to be moved) - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&pool->abandoned); segment = mi_tagged_segment_ptr(ts); if mi_likely(segment == NULL) { - if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL + if mi_likely(!mi_abandoned_visited_revisit(pool)) { // try to swap in the visited list on NULL return NULL; } } @@ -1186,20 +1170,20 @@ static mi_segment_t* mi_abandoned_pop(void) { // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. // (this is called from `region.c:_mi_mem_free` for example) - mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted + mi_atomic_increment_relaxed(&pool->abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; - ts = mi_atomic_load_acquire(&abandoned); + ts = mi_atomic_load_acquire(&pool->abandoned); do { segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } - } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); - mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock + } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&pool->abandoned, &ts, next)); + mi_atomic_decrement_relaxed(&pool->abandoned_readers); // release reader lock if (segment != NULL) { mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); - mi_atomic_decrement_relaxed(&abandoned_count); + mi_atomic_decrement_relaxed(&pool->abandoned_count); } return segment; } @@ -1237,7 +1221,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned - mi_abandoned_push(segment); + mi_abandoned_push(tld->abandoned, segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -1381,7 +1365,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; - while ((segment = mi_abandoned_pop()) != NULL) { + while ((segment = mi_abandoned_pop(tld->abandoned)) != NULL) { mi_segment_reclaim(segment, heap, 0, NULL, tld); } } @@ -1391,7 +1375,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice *reclaimed = false; mi_segment_t* segment; long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times - while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop(tld->abandoned)) != NULL)) { segment->abandoned_visits++; // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? @@ -1418,7 +1402,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice else { // otherwise, push on the visited list so it gets not looked at too quickly again mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again - mi_abandoned_visited_push(segment); + mi_abandoned_visited_push(tld->abandoned, segment); } } return NULL; @@ -1428,11 +1412,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) { mi_segment_t* segment; + mi_abandoned_pool_t* pool = tld->abandoned; int max_tries = (force ? 16*1024 : 1024); // limit latency if (force) { - mi_abandoned_visited_revisit(); + mi_abandoned_visited_revisit(pool); } - while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop(pool)) != NULL)) { mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { // free the segment (by forced reclaim) to make it available to other threads. @@ -1444,7 +1429,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) // otherwise, purge if needed and push on the visited list // note: forced purge can be expensive if many threads are destroyed/created as in mstress. mi_segment_try_purge(segment, force, tld->stats); - mi_abandoned_visited_push(segment); + mi_abandoned_visited_push(pool, segment); } } } diff --git a/Python/pystate.c b/Python/pystate.c index 84e2d6ea172f2b..5f515cf475dab5 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2533,6 +2533,11 @@ tstate_mimalloc_bind(PyThreadState *tstate) mi_tld_t *tld = &mts->tld; _mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]); + // Exiting threads push any remaining in-use segments to the abandoned + // pool to be re-claimed later by other threads. We use per-interpreter + // pools to keep Python objects from different interpreters separate. + tld->segments.abandoned = &tstate->interp->mimalloc.abandoned_pool; + // Initialize each heap for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) { _mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none()); From ed6ea3ea79fac68b127c7eb457c7ecb996461010 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 5 Jan 2024 01:01:48 +0000 Subject: [PATCH 60/71] gh-113320: Reduce the number of dangerous `getattr()` calls when constructing protocol classes (#113401) - Only attempt to figure out whether protocol members are "method members" or not if the class is marked as a runtime protocol. This information is irrelevant for non-runtime protocols; we can safely skip the risky introspection for them. - Only do the risky getattr() calls in one place (the runtime_checkable class decorator), rather than in three places (_ProtocolMeta.__init__, _ProtocolMeta.__instancecheck__ and _ProtocolMeta.__subclasscheck__). This reduces the number of locations in typing.py where the risky introspection could go wrong. - For runtime protocols, if determining whether a protocol member is callable or not fails, give a better error message. I think it's reasonable for us to reject runtime protocols that have members which raise strange exceptions when you try to access them. PEP-544 clearly states that all protocol member must be callable for issubclass() calls against the protocol to be valid -- and if a member raises when we try to access it, there's no way for us to figure out whether it's a callable member or not! --- Lib/test/test_typing.py | 38 ++++++++++++++- Lib/typing.py | 46 +++++++++++-------- ...-12-22-11-30-57.gh-issue-113320.Vp5suS.rst | 4 ++ 3 files changed, 68 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-22-11-30-57.gh-issue-113320.Vp5suS.rst diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 2d10c39840ddf3..8edab0cd6e34db 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -3448,8 +3448,8 @@ def meth(self): pass self.assertNotIn("__protocol_attrs__", vars(NonP)) self.assertNotIn("__protocol_attrs__", vars(NonPR)) - self.assertNotIn("__callable_proto_members_only__", vars(NonP)) - self.assertNotIn("__callable_proto_members_only__", vars(NonPR)) + self.assertNotIn("__non_callable_proto_members__", vars(NonP)) + self.assertNotIn("__non_callable_proto_members__", vars(NonPR)) self.assertEqual(get_protocol_members(P), {"x"}) self.assertEqual(get_protocol_members(PR), {"meth"}) @@ -4105,6 +4105,7 @@ def method(self) -> None: ... self.assertNotIsInstance(42, ProtocolWithMixedMembers) def test_protocol_issubclass_error_message(self): + @runtime_checkable class Vec2D(Protocol): x: float y: float @@ -4120,6 +4121,39 @@ def square_norm(self) -> float: with self.assertRaisesRegex(TypeError, re.escape(expected_error_message)): issubclass(int, Vec2D) + def test_nonruntime_protocol_interaction_with_evil_classproperty(self): + class classproperty: + def __get__(self, instance, type): + raise RuntimeError("NO") + + class Commentable(Protocol): + evil = classproperty() + + # recognised as a protocol attr, + # but not actually accessed by the protocol metaclass + # (which would raise RuntimeError) for non-runtime protocols. + # See gh-113320 + self.assertEqual(get_protocol_members(Commentable), {"evil"}) + + def test_runtime_protocol_interaction_with_evil_classproperty(self): + class CustomError(Exception): pass + + class classproperty: + def __get__(self, instance, type): + raise CustomError + + with self.assertRaises(TypeError) as cm: + @runtime_checkable + class Commentable(Protocol): + evil = classproperty() + + exc = cm.exception + self.assertEqual( + exc.args[0], + "Failed to determine whether protocol member 'evil' is a method member" + ) + self.assertIs(type(exc.__cause__), CustomError) + class GenericTests(BaseTestCase): diff --git a/Lib/typing.py b/Lib/typing.py index d7d793539b35b1..d278b4effc7eba 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -1670,7 +1670,7 @@ class _TypingEllipsis: _TYPING_INTERNALS = frozenset({ '__parameters__', '__orig_bases__', '__orig_class__', '_is_protocol', '_is_runtime_protocol', '__protocol_attrs__', - '__callable_proto_members_only__', '__type_params__', + '__non_callable_proto_members__', '__type_params__', }) _SPECIAL_NAMES = frozenset({ @@ -1833,11 +1833,6 @@ def __init__(cls, *args, **kwargs): super().__init__(*args, **kwargs) if getattr(cls, "_is_protocol", False): cls.__protocol_attrs__ = _get_protocol_attrs(cls) - # PEP 544 prohibits using issubclass() - # with protocols that have non-method members. - cls.__callable_proto_members_only__ = all( - callable(getattr(cls, attr, None)) for attr in cls.__protocol_attrs__ - ) def __subclasscheck__(cls, other): if cls is Protocol: @@ -1846,25 +1841,23 @@ def __subclasscheck__(cls, other): getattr(cls, '_is_protocol', False) and not _allow_reckless_class_checks() ): + if not getattr(cls, '_is_runtime_protocol', False): + _type_check_issubclass_arg_1(other) + raise TypeError( + "Instance and class checks can only be used with " + "@runtime_checkable protocols" + ) if ( - not cls.__callable_proto_members_only__ + # this attribute is set by @runtime_checkable: + cls.__non_callable_proto_members__ and cls.__dict__.get("__subclasshook__") is _proto_hook ): _type_check_issubclass_arg_1(other) - non_method_attrs = sorted( - attr for attr in cls.__protocol_attrs__ - if not callable(getattr(cls, attr, None)) - ) + non_method_attrs = sorted(cls.__non_callable_proto_members__) raise TypeError( "Protocols with non-method members don't support issubclass()." f" Non-method members: {str(non_method_attrs)[1:-1]}." ) - if not getattr(cls, '_is_runtime_protocol', False): - _type_check_issubclass_arg_1(other) - raise TypeError( - "Instance and class checks can only be used with " - "@runtime_checkable protocols" - ) return _abc_subclasscheck(cls, other) def __instancecheck__(cls, instance): @@ -1892,7 +1885,8 @@ def __instancecheck__(cls, instance): val = getattr_static(instance, attr) except AttributeError: break - if val is None and callable(getattr(cls, attr, None)): + # this attribute is set by @runtime_checkable: + if val is None and attr not in cls.__non_callable_proto_members__: break else: return True @@ -2114,6 +2108,22 @@ def close(self): ... raise TypeError('@runtime_checkable can be only applied to protocol classes,' ' got %r' % cls) cls._is_runtime_protocol = True + # PEP 544 prohibits using issubclass() + # with protocols that have non-method members. + # See gh-113320 for why we compute this attribute here, + # rather than in `_ProtocolMeta.__init__` + cls.__non_callable_proto_members__ = set() + for attr in cls.__protocol_attrs__: + try: + is_callable = callable(getattr(cls, attr, None)) + except Exception as e: + raise TypeError( + f"Failed to determine whether protocol member {attr!r} " + "is a method member" + ) from e + else: + if not is_callable: + cls.__non_callable_proto_members__.add(attr) return cls diff --git a/Misc/NEWS.d/next/Library/2023-12-22-11-30-57.gh-issue-113320.Vp5suS.rst b/Misc/NEWS.d/next/Library/2023-12-22-11-30-57.gh-issue-113320.Vp5suS.rst new file mode 100644 index 00000000000000..6cf74f335d4d7d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-22-11-30-57.gh-issue-113320.Vp5suS.rst @@ -0,0 +1,4 @@ +Fix regression in Python 3.12 where :class:`~typing.Protocol` classes that +were not marked as :func:`runtime-checkable ` +would be unnecessarily introspected, potentially causing exceptions to be +raised if the protocol had problematic members. Patch by Alex Waygood. From 0ae60b66dea5140382190463a676bafe706608f5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 5 Jan 2024 09:45:22 +0000 Subject: [PATCH 61/71] GH-113486: Do not emit spurious PY_UNWIND events for optimized calls to classes. (GH-113680) --- Include/cpython/code.h | 2 + Lib/test/test_monitoring.py | 78 +++++++++++++------ ...-12-31-07-46-01.gh-issue-113486.uki19C.rst | 1 + Python/ceval.c | 3 + Python/instrumentation.c | 6 +- Python/specialize.c | 2 +- 6 files changed, 64 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-31-07-46-01.gh-issue-113486.uki19C.rst diff --git a/Include/cpython/code.h b/Include/cpython/code.h index cf715c55a2b3b8..1f47d99fb60443 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -208,6 +208,8 @@ struct PyCodeObject _PyCode_DEF(1); #define CO_FUTURE_GENERATOR_STOP 0x800000 #define CO_FUTURE_ANNOTATIONS 0x1000000 +#define CO_NO_MONITORING_EVENTS 0x2000000 + /* This should be defined if a future statement modifies the syntax. For example, when a keyword is added. */ diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py index a2efbc95f556c6..a64d1ed79decd8 100644 --- a/Lib/test/test_monitoring.py +++ b/Lib/test/test_monitoring.py @@ -750,7 +750,7 @@ class UnwindRecorder(ExceptionRecorder): event_type = E.PY_UNWIND def __call__(self, code, offset, exc): - self.events.append(("unwind", type(exc))) + self.events.append(("unwind", type(exc), code.co_name)) class ExceptionHandledRecorder(ExceptionRecorder): @@ -766,8 +766,27 @@ class ThrowRecorder(ExceptionRecorder): def __call__(self, code, offset, exc): self.events.append(("throw", type(exc))) -class ExceptionMonitoringTest(CheckEvents): +class CallRecorder: + + event_type = E.CALL + + def __init__(self, events): + self.events = events + + def __call__(self, code, offset, func, arg): + self.events.append(("call", func.__name__, arg)) + +class ReturnRecorder: + + event_type = E.PY_RETURN + + def __init__(self, events): + self.events = events + def __call__(self, code, offset, val): + self.events.append(("return", code.co_name, val)) + +class ExceptionMonitoringTest(CheckEvents): exception_recorders = ( ExceptionRecorder, @@ -936,26 +955,48 @@ def func(): ) self.assertEqual(events[0], ("throw", IndexError)) -class LineRecorder: + def test_no_unwind_for_shim_frame(self): - event_type = E.LINE + class B: + def __init__(self): + raise ValueError() + + def f(): + try: + return B() + except ValueError: + pass + for _ in range(100): + f() + recorders = ( + ReturnRecorder, + UnwindRecorder + ) + events = self.get_events(f, TEST_TOOL, recorders) + adaptive_insts = dis.get_instructions(f, adaptive=True) + self.assertIn( + "CALL_ALLOC_AND_ENTER_INIT", + [i.opname for i in adaptive_insts] + ) + #There should be only one unwind event + expected = [ + ('unwind', ValueError, '__init__'), + ('return', 'f', None), + ] - def __init__(self, events): - self.events = events + self.assertEqual(events, expected) - def __call__(self, code, line): - self.events.append(("line", code.co_name, line - code.co_firstlineno)) +class LineRecorder: -class CallRecorder: + event_type = E.LINE - event_type = E.CALL def __init__(self, events): self.events = events - def __call__(self, code, offset, func, arg): - self.events.append(("call", func.__name__, arg)) + def __call__(self, code, line): + self.events.append(("line", code.co_name, line - code.co_firstlineno)) class CEventRecorder: @@ -1351,15 +1392,6 @@ class BranchRecorder(JumpRecorder): event_type = E.BRANCH name = "branch" -class ReturnRecorder: - - event_type = E.PY_RETURN - - def __init__(self, events): - self.events = events - - def __call__(self, code, offset, val): - self.events.append(("return", val)) JUMP_AND_BRANCH_RECORDERS = JumpRecorder, BranchRecorder @@ -1449,11 +1481,11 @@ def func(): ('branch', 'func', 4, 4), ('line', 'func', 5), ('line', 'meth', 1), - ('return', None), + ('return', 'meth', None), ('jump', 'func', 5, 5), ('jump', 'func', 5, '[offset=114]'), ('branch', 'func', '[offset=120]', '[offset=124]'), - ('return', None), + ('return', 'func', None), ('line', 'get_events', 11)]) class TestLoadSuperAttr(CheckEvents): diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-31-07-46-01.gh-issue-113486.uki19C.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-31-07-46-01.gh-issue-113486.uki19C.rst new file mode 100644 index 00000000000000..42ff4a2feb15f2 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-31-07-46-01.gh-issue-113486.uki19C.rst @@ -0,0 +1 @@ +No longer issue spurious ``PY_UNWIND`` events for optimized calls to classes. diff --git a/Python/ceval.c b/Python/ceval.c index 1fea9747488102..b3b542f8ddea37 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2111,6 +2111,9 @@ do_monitor_exc(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, int event) { assert(event < _PY_MONITORING_UNGROUPED_EVENTS); + if (_PyFrame_GetCode(frame)->co_flags & CO_NO_MONITORING_EVENTS) { + return 0; + } PyObject *exc = PyErr_GetRaisedException(); assert(exc != NULL); int err = _Py_call_instrumentation_arg(tstate, event, frame, instr, exc); diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 35b0e7a8f35c56..533aece210202b 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1576,13 +1576,11 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) } _Py_Executors_InvalidateDependency(interp, code); int code_len = (int)Py_SIZE(code); - /* code->_co_firsttraceable >= code_len indicates - * that no instrumentation can be inserted. - * Exit early to avoid creating instrumentation + /* Exit early to avoid creating instrumentation * data for potential statically allocated code * objects. * See https://github.com/python/cpython/issues/108390 */ - if (code->_co_firsttraceable >= code_len) { + if (code->co_flags & CO_NO_MONITORING_EVENTS) { return 0; } if (update_instrumentation_data(code, interp)) { diff --git a/Python/specialize.c b/Python/specialize.c index 369b962a545f4e..7b63393803b430 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2534,7 +2534,7 @@ const struct _PyCode_DEF(8) _Py_InitCleanup = { .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty), .co_names = (PyObject *)&_Py_SINGLETON(tuple_empty), .co_exceptiontable = (PyObject *)&_Py_SINGLETON(bytes_empty), - .co_flags = CO_OPTIMIZED, + .co_flags = CO_OPTIMIZED | CO_NO_MONITORING_EVENTS, .co_localsplusnames = (PyObject *)&_Py_SINGLETON(tuple_empty), .co_localspluskinds = (PyObject *)&_Py_SINGLETON(bytes_empty), .co_filename = &_Py_ID(__init__), From 3003fbbf00422bce6e327646063e97470afa9091 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 5 Jan 2024 12:16:46 +0000 Subject: [PATCH 62/71] gh-113703: Correctly identify incomplete f-strings in the codeop module (#113709) --- Lib/test/test_codeop.py | 3 +++ .../2024-01-04-17-15-30.gh-issue-113703.Zsk0pY.rst | 2 ++ Parser/lexer/lexer.c | 8 ++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-04-17-15-30.gh-issue-113703.Zsk0pY.rst diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py index 2abb6c6d935b7e..787bd1b6a79e20 100644 --- a/Lib/test/test_codeop.py +++ b/Lib/test/test_codeop.py @@ -223,6 +223,9 @@ def test_incomplete(self): ai("(x for x in") ai("(x for x in (") + ai('a = f"""') + ai('a = \\') + def test_invalid(self): ai = self.assertInvalid ai("a b") diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-04-17-15-30.gh-issue-113703.Zsk0pY.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-04-17-15-30.gh-issue-113703.Zsk0pY.rst new file mode 100644 index 00000000000000..5db93e344724fb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-04-17-15-30.gh-issue-113703.Zsk0pY.rst @@ -0,0 +1,2 @@ +Fix a regression in the :mod:`codeop` module that was causing it to incorrectly +identify incomplete f-strings. Patch by Pablo Galindo diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index ea4bdf7ce4a24c..ebf7686773ff45 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -1355,9 +1355,13 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct tok->lineno = the_current_tok->f_string_line_start; if (current_tok->f_string_quote_size == 3) { - return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, + _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted f-string literal" - " (detected at line %d)", start)); + " (detected at line %d)", start); + if (c != '\n') { + tok->done = E_EOFS; + } + return MAKE_TOKEN(ERRORTOKEN); } else { return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, From e56c53334f1adf5b4ea940036c76d18e80fe809d Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 5 Jan 2024 15:31:28 +0200 Subject: [PATCH 63/71] gh-101100: Fix Sphinx warnings for 2.6 deprecations and removals (#113725) Co-authored-by: Alex Waygood Co-authored-by: Hugo van Kemenade --- Doc/library/subprocess.rst | 4 +-- Doc/whatsnew/2.0.rst | 8 ++--- Doc/whatsnew/2.2.rst | 4 +-- Doc/whatsnew/2.4.rst | 8 ++--- Doc/whatsnew/2.5.rst | 6 ++-- Doc/whatsnew/2.6.rst | 70 +++++++++++++++++++------------------- Doc/whatsnew/2.7.rst | 6 ++-- Doc/whatsnew/3.0.rst | 8 ++--- 8 files changed, 57 insertions(+), 57 deletions(-) diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index d6b892a7ed957d..91e9fcf0263d8d 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -1461,8 +1461,8 @@ Return code handling translates as follows:: print("There were some errors") -Replacing functions from the :mod:`popen2` module -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Replacing functions from the :mod:`!popen2` module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. note:: diff --git a/Doc/whatsnew/2.0.rst b/Doc/whatsnew/2.0.rst index b0e495b0651789..620eb7829d559a 100644 --- a/Doc/whatsnew/2.0.rst +++ b/Doc/whatsnew/2.0.rst @@ -1095,8 +1095,8 @@ module. GNU gettext message catalog library. (Integrated by Barry Warsaw, from separate contributions by Martin von Löwis, Peter Funk, and James Henstridge.) -* :mod:`linuxaudiodev`: Support for the :file:`/dev/audio` device on Linux, a - twin to the existing :mod:`sunaudiodev` module. (Contributed by Peter Bosch, +* :mod:`!linuxaudiodev`: Support for the :file:`/dev/audio` device on Linux, a + twin to the existing :mod:`!sunaudiodev` module. (Contributed by Peter Bosch, with fixes by Jeremy Hylton.) * :mod:`mmap`: An interface to memory-mapped files on both Windows and Unix. A @@ -1139,8 +1139,8 @@ module. Unix, not to be confused with :program:`gzip`\ -format files (which are supported by the :mod:`gzip` module) (Contributed by James C. Ahlstrom.) -* :mod:`imputil`: A module that provides a simpler way for writing customized - import hooks, in comparison to the existing :mod:`ihooks` module. (Implemented +* :mod:`!imputil`: A module that provides a simpler way for writing customized + import hooks, in comparison to the existing :mod:`!ihooks` module. (Implemented by Greg Stein, with much discussion on python-dev along the way.) .. ====================================================================== diff --git a/Doc/whatsnew/2.2.rst b/Doc/whatsnew/2.2.rst index 6dfe79cef00987..6efc23a82de923 100644 --- a/Doc/whatsnew/2.2.rst +++ b/Doc/whatsnew/2.2.rst @@ -143,8 +143,8 @@ To make the set of types complete, new type objects such as :func:`dict` and return fcntl.lockf(self.fileno(), operation, length, start, whence) -The now-obsolete :mod:`posixfile` module contained a class that emulated all of -a file object's methods and also added a :meth:`lock` method, but this class +The now-obsolete :mod:`!posixfile` module contained a class that emulated all of +a file object's methods and also added a :meth:`!lock` method, but this class couldn't be passed to internal functions that expected a built-in file, something which is possible with our new :class:`LockableFile`. diff --git a/Doc/whatsnew/2.4.rst b/Doc/whatsnew/2.4.rst index e9a59f4a62551a..15d4003622c506 100644 --- a/Doc/whatsnew/2.4.rst +++ b/Doc/whatsnew/2.4.rst @@ -387,13 +387,13 @@ The standard library provides a number of ways to execute a subprocess, offering different features and different levels of complexity. ``os.system(command)`` is easy to use, but slow (it runs a shell process which executes the command) and dangerous (you have to be careful about escaping -the shell's metacharacters). The :mod:`popen2` module offers classes that can +the shell's metacharacters). The :mod:`!popen2` module offers classes that can capture standard output and standard error from the subprocess, but the naming is confusing. The :mod:`subprocess` module cleans this up, providing a unified interface that offers all the features you might need. -Instead of :mod:`popen2`'s collection of classes, :mod:`subprocess` contains a -single class called :class:`Popen` whose constructor supports a number of +Instead of :mod:`!popen2`'s collection of classes, :mod:`subprocess` contains a +single class called :class:`subprocess.Popen` whose constructor supports a number of different keyword arguments. :: class Popen(args, bufsize=0, executable=None, @@ -1529,7 +1529,7 @@ code: will now always be unequal, and relative comparisons (``<``, ``>``) will raise a :exc:`TypeError`. -* :func:`dircache.listdir` now passes exceptions to the caller instead of +* :func:`!dircache.listdir` now passes exceptions to the caller instead of returning empty lists. * :func:`LexicalHandler.startDTD` used to receive the public and system IDs in diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst index 627c918dd6d8b4..f45d70ea5a19a0 100644 --- a/Doc/whatsnew/2.5.rst +++ b/Doc/whatsnew/2.5.rst @@ -1680,7 +1680,7 @@ The ctypes package The :mod:`ctypes` package, written by Thomas Heller, has been added to the standard library. :mod:`ctypes` lets you call arbitrary functions in shared -libraries or DLLs. Long-time users may remember the :mod:`dl` module, which +libraries or DLLs. Long-time users may remember the :mod:`!dl` module, which provides functions for loading shared libraries and calling functions in them. The :mod:`ctypes` package is much fancier. @@ -1877,12 +1877,12 @@ The hashlib package ------------------- A new :mod:`hashlib` module, written by Gregory P. Smith, has been added to -replace the :mod:`md5` and :mod:`sha` modules. :mod:`hashlib` adds support for +replace the :mod:`!md5` and :mod:`!sha` modules. :mod:`hashlib` adds support for additional secure hashes (SHA-224, SHA-256, SHA-384, and SHA-512). When available, the module uses OpenSSL for fast platform optimized implementations of algorithms. -The old :mod:`md5` and :mod:`sha` modules still exist as wrappers around hashlib +The old :mod:`!md5` and :mod:`!sha` modules still exist as wrappers around hashlib to preserve backwards compatibility. The new module's interface is very close to that of the old modules, but not identical. The most significant difference is that the constructor functions for creating new hashing objects are named diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index d947f61b50cfe0..93ddc7a027c324 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -2916,8 +2916,8 @@ Deprecations and Removals * Changes to the :class:`Exception` interface as dictated by :pep:`352` continue to be made. For 2.6, - the :attr:`message` attribute is being deprecated in favor of the - :attr:`args` attribute. + the :attr:`!message` attribute is being deprecated in favor of the + :attr:`~BaseException.args` attribute. * (3.0-warning mode) Python 3.0 will feature a reorganized standard library that will drop many outdated modules and rename others. @@ -2925,51 +2925,51 @@ Deprecations and Removals when they are imported. The list of deprecated modules is: - :mod:`audiodev`, - :mod:`bgenlocations`, - :mod:`buildtools`, - :mod:`bundlebuilder`, - :mod:`Canvas`, - :mod:`compiler`, - :mod:`dircache`, - :mod:`dl`, - :mod:`fpformat`, - :mod:`gensuitemodule`, - :mod:`ihooks`, - :mod:`imageop`, - :mod:`imgfile`, - :mod:`linuxaudiodev`, - :mod:`mhlib`, - :mod:`mimetools`, - :mod:`multifile`, - :mod:`new`, - :mod:`pure`, - :mod:`statvfs`, - :mod:`sunaudiodev`, - :mod:`test.testall`, and - :mod:`toaiff`. - -* The :mod:`gopherlib` module has been removed. - -* The :mod:`MimeWriter` module and :mod:`mimify` module + :mod:`!audiodev`, + :mod:`!bgenlocations`, + :mod:`!buildtools`, + :mod:`!bundlebuilder`, + :mod:`!Canvas`, + :mod:`!compiler`, + :mod:`!dircache`, + :mod:`!dl`, + :mod:`!fpformat`, + :mod:`!gensuitemodule`, + :mod:`!ihooks`, + :mod:`!imageop`, + :mod:`!imgfile`, + :mod:`!linuxaudiodev`, + :mod:`!mhlib`, + :mod:`!mimetools`, + :mod:`!multifile`, + :mod:`!new`, + :mod:`!pure`, + :mod:`!statvfs`, + :mod:`!sunaudiodev`, + :mod:`!test.testall`, and + :mod:`!toaiff`. + +* The :mod:`!gopherlib` module has been removed. + +* The :mod:`!MimeWriter` module and :mod:`!mimify` module have been deprecated; use the :mod:`email` package instead. -* The :mod:`md5` module has been deprecated; use the :mod:`hashlib` module +* The :mod:`!md5` module has been deprecated; use the :mod:`hashlib` module instead. -* The :mod:`posixfile` module has been deprecated; :func:`fcntl.lockf` +* The :mod:`!posixfile` module has been deprecated; :func:`fcntl.lockf` provides better locking. -* The :mod:`popen2` module has been deprecated; use the :mod:`subprocess` +* The :mod:`!popen2` module has been deprecated; use the :mod:`subprocess` module. -* The :mod:`rgbimg` module has been removed. +* The :mod:`!rgbimg` module has been removed. -* The :mod:`sets` module has been deprecated; it's better to +* The :mod:`!sets` module has been deprecated; it's better to use the built-in :class:`set` and :class:`frozenset` types. -* The :mod:`sha` module has been deprecated; use the :mod:`hashlib` module +* The :mod:`!sha` module has been deprecated; use the :mod:`hashlib` module instead. diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index 81fe132d50e1f1..fcad4bb8acdceb 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -1315,9 +1315,9 @@ changes, or look through the Subversion logs for all the details. giving the source address that will be used for the connection. (Contributed by Eldon Ziegler; :issue:`3972`.) -* The :mod:`ihooks` module now supports relative imports. Note that - :mod:`ihooks` is an older module for customizing imports, - superseded by the :mod:`imputil` module added in Python 2.0. +* The :mod:`!ihooks` module now supports relative imports. Note that + :mod:`!ihooks` is an older module for customizing imports, + superseded by the :mod:`!imputil` module added in Python 2.0. (Relative import support added by Neil Schemenauer.) .. revision 75423 diff --git a/Doc/whatsnew/3.0.rst b/Doc/whatsnew/3.0.rst index 89e12062abaddd..1df5209f22c6a5 100644 --- a/Doc/whatsnew/3.0.rst +++ b/Doc/whatsnew/3.0.rst @@ -555,8 +555,8 @@ very extensive changes to the standard library. :pep:`3108` is the reference for the major changes to the library. Here's a capsule review: -* Many old modules were removed. Some, like :mod:`gopherlib` (no - longer used) and :mod:`md5` (replaced by :mod:`hashlib`), were +* Many old modules were removed. Some, like :mod:`!gopherlib` (no + longer used) and :mod:`!md5` (replaced by :mod:`hashlib`), were already deprecated by :pep:`4`. Others were removed as a result of the removal of support for various platforms such as Irix, BeOS and Mac OS 9 (see :pep:`11`). Some modules were also selected for @@ -626,7 +626,7 @@ review: Some other changes to standard library modules, not covered by :pep:`3108`: -* Killed :mod:`sets`. Use the built-in :func:`set` class. +* Killed :mod:`!sets`. Use the built-in :func:`set` class. * Cleanup of the :mod:`sys` module: removed :func:`sys.exitfunc`, :func:`sys.exc_clear`, :data:`sys.exc_type`, :data:`sys.exc_value`, @@ -648,7 +648,7 @@ Some other changes to standard library modules, not covered by * Cleanup of the :mod:`random` module: removed the :func:`jumpahead` API. -* The :mod:`new` module is gone. +* The :mod:`!new` module is gone. * The functions :func:`os.tmpnam`, :func:`os.tempnam` and :func:`os.tmpfile` have been removed in favor of the :mod:`tempfile` From 5e1916ba1bf521d6ff9d2c553c057f3ef7008977 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Fri, 5 Jan 2024 07:34:25 -0800 Subject: [PATCH 64/71] gh-80532: Do not set ipv6type when cross-compiling (#17956) Co-authored-by: Xavier de Gaye --- Misc/NEWS.d/next/Build/2020-01-11-23-49-17.bpo-36351.ce8BBh.rst | 1 + configure | 2 +- configure.ac | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2020-01-11-23-49-17.bpo-36351.ce8BBh.rst diff --git a/Misc/NEWS.d/next/Build/2020-01-11-23-49-17.bpo-36351.ce8BBh.rst b/Misc/NEWS.d/next/Build/2020-01-11-23-49-17.bpo-36351.ce8BBh.rst new file mode 100644 index 00000000000000..d3cfbfc7ea1000 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2020-01-11-23-49-17.bpo-36351.ce8BBh.rst @@ -0,0 +1 @@ +Do not set ipv6type when cross-compiling. diff --git a/configure b/configure index 3cc9aecafad13e..b1153df4d7ec52 100755 --- a/configure +++ b/configure @@ -16538,7 +16538,7 @@ ipv6type=unknown ipv6lib=none ipv6trylibc=no -if test "$ipv6" = "yes"; then +if test "$ipv6" = yes -a "$cross_compiling" = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking ipv6 stack type" >&5 printf %s "checking ipv6 stack type... " >&6; } for i in inria kame linux-glibc linux-inet6 solaris toshiba v6d zeta; diff --git a/configure.ac b/configure.ac index 6a80a5d29a04ef..9587e6d63499aa 100644 --- a/configure.ac +++ b/configure.ac @@ -4367,7 +4367,7 @@ ipv6type=unknown ipv6lib=none ipv6trylibc=no -if test "$ipv6" = "yes"; then +if test "$ipv6" = yes -a "$cross_compiling" = no; then AC_MSG_CHECKING([ipv6 stack type]) for i in inria kame linux-glibc linux-inet6 solaris toshiba v6d zeta; do From eb53750757062255b1793969ca4cb12ef82b91c6 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 5 Jan 2024 21:15:07 +0200 Subject: [PATCH 65/71] gh-101100: Fix Sphinx warnings in `library/pyclbr.rst` (#113739) Co-authored-by: Alex Waygood --- Doc/library/pyclbr.rst | 103 ++++++++++++++++++++++------------------- Doc/tools/.nitignore | 1 - Doc/whatsnew/3.10.rst | 4 +- 3 files changed, 58 insertions(+), 50 deletions(-) diff --git a/Doc/library/pyclbr.rst b/Doc/library/pyclbr.rst index 1c40ba4838ca75..1e9876849b02f3 100644 --- a/Doc/library/pyclbr.rst +++ b/Doc/library/pyclbr.rst @@ -58,106 +58,115 @@ of these classes. Function Objects ---------------- -Class :class:`Function` instances describe functions defined by def -statements. They have the following attributes: +.. class:: Function -.. attribute:: Function.file + Class :class:`!Function` instances describe functions defined by def + statements. They have the following attributes: - Name of the file in which the function is defined. + .. attribute:: file -.. attribute:: Function.module + Name of the file in which the function is defined. - The name of the module defining the function described. + .. attribute:: module -.. attribute:: Function.name + The name of the module defining the function described. - The name of the function. + .. attribute:: name -.. attribute:: Function.lineno + The name of the function. - The line number in the file where the definition starts. + .. attribute:: lineno -.. attribute:: Function.parent + The line number in the file where the definition starts. - For top-level functions, None. For nested functions, the parent. - .. versionadded:: 3.7 + .. attribute:: parent + For top-level functions, ``None``. For nested functions, the parent. -.. attribute:: Function.children + .. versionadded:: 3.7 - A dictionary mapping names to descriptors for nested functions and - classes. - .. versionadded:: 3.7 + .. attribute:: children + A :class:`dictionary ` mapping names to descriptors for nested functions and + classes. -.. attribute:: Function.is_async + .. versionadded:: 3.7 - ``True`` for functions that are defined with the ``async`` prefix, ``False`` otherwise. - .. versionadded:: 3.10 + .. attribute:: is_async + + ``True`` for functions that are defined with the + :keyword:`async ` prefix, ``False`` otherwise. + + .. versionadded:: 3.10 .. _pyclbr-class-objects: Class Objects ------------- -Class :class:`Class` instances describe classes defined by class -statements. They have the same attributes as Functions and two more. + +.. class:: Class + + Class :class:`!Class` instances describe classes defined by class + statements. They have the same attributes as :class:`Functions ` + and two more. -.. attribute:: Class.file + .. attribute:: file - Name of the file in which the class is defined. + Name of the file in which the class is defined. -.. attribute:: Class.module + .. attribute:: module - The name of the module defining the class described. + The name of the module defining the class described. -.. attribute:: Class.name + .. attribute:: name - The name of the class. + The name of the class. -.. attribute:: Class.lineno + .. attribute:: lineno - The line number in the file where the definition starts. + The line number in the file where the definition starts. -.. attribute:: Class.parent + .. attribute:: parent - For top-level classes, None. For nested classes, the parent. + For top-level classes, None. For nested classes, the parent. - .. versionadded:: 3.7 + .. versionadded:: 3.7 -.. attribute:: Class.children + .. attribute:: children - A dictionary mapping names to descriptors for nested functions and - classes. + A dictionary mapping names to descriptors for nested functions and + classes. - .. versionadded:: 3.7 + .. versionadded:: 3.7 -.. attribute:: Class.super + .. attribute:: super - A list of :class:`Class` objects which describe the immediate base - classes of the class being described. Classes which are named as - superclasses but which are not discoverable by :func:`readmodule_ex` - are listed as a string with the class name instead of as - :class:`Class` objects. + A list of :class:`!Class` objects which describe the immediate base + classes of the class being described. Classes which are named as + superclasses but which are not discoverable by :func:`readmodule_ex` + are listed as a string with the class name instead of as + :class:`!Class` objects. -.. attribute:: Class.methods + .. attribute:: methods - A dictionary mapping method names to line numbers. This can be - derived from the newer children dictionary, but remains for - back-compatibility. + A :class:`dictionary ` mapping method names to line numbers. + This can be derived from the newer :attr:`children` dictionary, + but remains for + back-compatibility. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 05df332fa7c9a8..c4f9b07dc916c9 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -67,7 +67,6 @@ Doc/library/pickletools.rst Doc/library/platform.rst Doc/library/plistlib.rst Doc/library/profile.rst -Doc/library/pyclbr.rst Doc/library/pydoc.rst Doc/library/pyexpat.rst Doc/library/readline.rst diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index cd86c82caffc56..7dc06e9af694d9 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -1316,8 +1316,8 @@ pyclbr ------ Add an ``end_lineno`` attribute to the ``Function`` and ``Class`` -objects in the tree returned by :func:`pyclbr.readline` and -:func:`pyclbr.readline_ex`. It matches the existing (start) ``lineno``. +objects in the tree returned by :func:`pyclbr.readmodule` and +:func:`pyclbr.readmodule_ex`. It matches the existing (start) ``lineno``. (Contributed by Aviral Srivastava in :issue:`38307`.) shelve From 0b7476080b58ea2ee71c6c1229994a3bb62fe4fa Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 5 Jan 2024 15:08:50 -0500 Subject: [PATCH 66/71] gh-112532: Tag mimalloc heaps and pages (#113742) * gh-112532: Tag mimalloc heaps and pages Mimalloc pages are data structures that contain contiguous allocations of the same block size. Note that they are distinct from operating system pages. Mimalloc pages are contained in segments. When a thread exits, it abandons any segments and contained pages that have live allocations. These segments and pages may be later reclaimed by another thread. To support GC and certain thread-safety guarantees in free-threaded builds, we want pages to only be reclaimed by the corresponding heap in the claimant thread. For example, we want pages containing GC objects to only be claimed by GC heaps. This allows heaps and pages to be tagged with an integer tag that is used to ensure that abandoned pages are only claimed by heaps with the same tag. Heaps can be initialized with a tag (0-15); any page allocated by that heap copies the corresponding tag. * Fix conversion warning --- Include/internal/mimalloc/mimalloc/internal.h | 2 +- Include/internal/mimalloc/mimalloc/types.h | 2 ++ Objects/mimalloc/heap.c | 14 +++++++------ Objects/mimalloc/init.c | 8 ++++---- Objects/mimalloc/page.c | 1 + Objects/mimalloc/segment.c | 20 ++++++++++++++++--- Python/pystate.c | 4 ++-- 7 files changed, 35 insertions(+), 16 deletions(-) diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index afd7d18a13ed8f..887bf26c956982 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -155,7 +155,7 @@ size_t _mi_bin_size(uint8_t bin); // for stats uint8_t _mi_bin(size_t size); // for stats // "heap.c" -void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id); +void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool no_reclaim, uint8_t tag); void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index ab41b1ce990827..b8cae24507fc5e 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -311,6 +311,7 @@ typedef struct mi_page_s { uint32_t slice_offset; // distance from the actual page data slice (0 if a page) uint8_t is_committed : 1; // `true` if the page virtual memory is committed uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized + uint8_t tag : 4; // tag from the owning heap // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` @@ -551,6 +552,7 @@ struct mi_heap_s { size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + uint8_t tag; // custom identifier for this heap }; diff --git a/Objects/mimalloc/heap.c b/Objects/mimalloc/heap.c index c50e3b05590b6f..6468999a7d5766 100644 --- a/Objects/mimalloc/heap.c +++ b/Objects/mimalloc/heap.c @@ -209,7 +209,7 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id) +void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool no_reclaim, uint8_t tag) { _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = tld; @@ -224,17 +224,19 @@ void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id) heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); + heap->no_reclaim = no_reclaim; + heap->tag = tag; + // push on the thread local heaps list + heap->next = heap->tld->heaps; + heap->tld->heaps = heap; } mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? if (heap == NULL) return NULL; - _mi_heap_init_ex(heap, bheap->tld, arena_id); - heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe - // push on the thread local heaps list - heap->next = heap->tld->heaps; - heap->tld->heaps = heap; + // don't reclaim abandoned pages or otherwise destroy is unsafe + _mi_heap_init_ex(heap, bheap->tld, arena_id, true, 0); return heap; } diff --git a/Objects/mimalloc/init.c b/Objects/mimalloc/init.c index 0446021fdc514e..5897f0512f8ef9 100644 --- a/Objects/mimalloc/init.c +++ b/Objects/mimalloc/init.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, + 0, false, false, false, 0, 0, // capacity 0, // reserved capacity { 0 }, // flags @@ -121,7 +121,8 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next - false + false, + 0 }; #define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) @@ -298,7 +299,7 @@ static bool _mi_heap_init(void) { if (td == NULL) return false; _mi_tld_init(&td->tld, &td->heap); - _mi_heap_init_ex(&td->heap, &td->tld, _mi_arena_id_none()); + _mi_heap_init_ex(&td->heap, &td->tld, _mi_arena_id_none(), false, 0); _mi_heap_set_default_direct(&td->heap); } return false; @@ -311,7 +312,6 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->segments.abandoned = &_mi_abandoned_default; tld->os.stats = &tld->stats; tld->heap_backing = bheap; - tld->heaps = bheap; } // Free the thread local default heap (called from `mi_thread_done`) diff --git a/Objects/mimalloc/page.c b/Objects/mimalloc/page.c index 4610cf27afff75..8f0ce920156e04 100644 --- a/Objects/mimalloc/page.c +++ b/Objects/mimalloc/page.c @@ -660,6 +660,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields mi_page_set_heap(page, heap); + page->tag = heap->tag; page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start size_t page_size; const void* page_start = _mi_segment_page_start(segment, page, &page_size); diff --git a/Objects/mimalloc/segment.c b/Objects/mimalloc/segment.c index 1040da0d9af3e9..d9b39b03fd6c5f 100644 --- a/Objects/mimalloc/segment.c +++ b/Objects/mimalloc/segment.c @@ -1299,6 +1299,18 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s return has_page; } +static mi_heap_t* mi_heap_by_tag(mi_heap_t* heap, uint8_t tag) { + if (heap->tag == tag) { + return heap; + } + for (mi_heap_t *curr = heap->tld->heaps; curr != NULL; curr = curr->next) { + if (curr->tag == tag) { + return curr; + } + } + return NULL; +} + // Reclaim an abandoned segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { @@ -1321,6 +1333,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, if (mi_slice_is_used(slice)) { // in use: reclaim the page in our heap mi_page_t* page = mi_slice_to_page(slice); + mi_heap_t* target_heap = mi_heap_by_tag(heap, page->tag); mi_assert_internal(page->is_committed); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); @@ -1328,7 +1341,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, _mi_stat_decrease(&tld->stats->pages_abandoned, 1); segment->abandoned--; // set the heap again and allow delayed free again - mi_page_set_heap(page, heap); + mi_page_set_heap(page, target_heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { @@ -1337,8 +1350,9 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } else { // otherwise reclaim it into the heap - _mi_page_reclaim(heap, page); - if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + _mi_page_reclaim(target_heap, page); + if (requested_block_size == page->xblock_size && mi_page_has_any_available(page) && + heap == target_heap) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } diff --git a/Python/pystate.c b/Python/pystate.c index 5f515cf475dab5..21f16b7bcdff0d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2539,8 +2539,8 @@ tstate_mimalloc_bind(PyThreadState *tstate) tld->segments.abandoned = &tstate->interp->mimalloc.abandoned_pool; // Initialize each heap - for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) { - _mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none()); + for (uint8_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) { + _mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none(), false, i); } // By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT. From 99854ce1701ca4d1a0d153e501a29f9eec306ce5 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 5 Jan 2024 15:17:16 -0500 Subject: [PATCH 67/71] gh-113688: Split up gcmodule.c (gh-113715) This splits part of Modules/gcmodule.c of into Python/gc.c, which now contains the core garbage collection implementation. The Python module remain in the Modules/gcmodule.c file. --- Include/internal/pycore_gc.h | 31 + Lib/test/test_gc.py | 2 +- Makefile.pre.in | 1 + Modules/gcmodule.c | 2001 +----------------------- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Python/gc.c | 1958 +++++++++++++++++++++++ 9 files changed, 2036 insertions(+), 1965 deletions(-) create mode 100644 Python/gc.c diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 2d33aa76d78229..2a79c403803ed1 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -64,6 +64,26 @@ static inline int _PyObject_GC_MAY_BE_TRACKED(PyObject *obj) { #define _PyGC_PREV_SHIFT (2) #define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) +/* set for debugging information */ +#define _PyGC_DEBUG_STATS (1<<0) /* print collection statistics */ +#define _PyGC_DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ +#define _PyGC_DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */ +#define _PyGC_DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */ +#define _PyGC_DEBUG_LEAK _PyGC_DEBUG_COLLECTABLE | \ + _PyGC_DEBUG_UNCOLLECTABLE | \ + _PyGC_DEBUG_SAVEALL + +typedef enum { + // GC was triggered by heap allocation + _Py_GC_REASON_HEAP, + + // GC was called during shutdown + _Py_GC_REASON_SHUTDOWN, + + // GC was called by gc.collect() or PyGC_Collect() + _Py_GC_REASON_MANUAL +} _PyGC_Reason; + // Lowest bit of _gc_next is used for flags only in GC. // But it is always 0 for normal code. static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) { @@ -203,8 +223,19 @@ struct _gc_runtime_state { extern void _PyGC_InitState(struct _gc_runtime_state *); +extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, + _PyGC_Reason reason); extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate); +/* Freeze objects tracked by the GC and ignore them in future collections. */ +extern void _PyGC_Freeze(PyInterpreterState *interp); +/* Unfreezes objects placing them in the oldest generation */ +extern void _PyGC_Unfreeze(PyInterpreterState *interp); +/* Number of frozen objects */ +extern Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp); + +extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation); +extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs); // Functions to clear types free lists extern void _PyTuple_ClearFreeList(PyInterpreterState *interp); diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index db7cb9ace6e5f3..1d71dd9e262a6a 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1225,7 +1225,7 @@ def test_refcount_errors(self): p.stderr.close() # Verify that stderr has a useful error message: self.assertRegex(stderr, - br'gcmodule\.c:[0-9]+: gc_decref: Assertion "gc_get_refs\(g\) > 0" failed.') + br'gc\.c:[0-9]+: gc_decref: Assertion "gc_get_refs\(g\) > 0" failed.') self.assertRegex(stderr, br'refcount is too small') # "address : 0x7fb5062efc18" diff --git a/Makefile.pre.in b/Makefile.pre.in index 6a64547e97d266..44e8e4bd3f9557 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -417,6 +417,7 @@ PYTHON_OBJS= \ Python/frame.o \ Python/frozenmain.o \ Python/future.o \ + Python/gc.o \ Python/getargs.o \ Python/getcompiler.o \ Python/getcopyright.o \ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 2d1f381e622226..9a827cb79d73ab 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1,1530 +1,27 @@ /* - - Reference Cycle Garbage Collection - ================================== - - Neil Schemenauer - - Based on a post on the python-dev list. Ideas from Guido van Rossum, - Eric Tiedemann, and various others. - - http://www.arctrix.com/nas/python/gc/ - - The following mailing list threads provide a historical perspective on - the design of this module. Note that a fair amount of refinement has - occurred since those discussions. - - http://mail.python.org/pipermail/python-dev/2000-March/002385.html - http://mail.python.org/pipermail/python-dev/2000-March/002434.html - http://mail.python.org/pipermail/python-dev/2000-March/002497.html - - For a highlevel view of the collection process, read the collect - function. - -*/ - -#include "Python.h" -#include "pycore_ceval.h" // _Py_set_eval_breaker_bit() -#include "pycore_context.h" -#include "pycore_dict.h" // _PyDict_MaybeUntrack() -#include "pycore_initconfig.h" -#include "pycore_interp.h" // PyInterpreterState.gc -#include "pycore_object.h" -#include "pycore_pyerrors.h" -#include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_weakref.h" // _PyWeakref_ClearRef() -#include "pydtrace.h" - -typedef struct _gc_runtime_state GCState; - -/*[clinic input] -module gc -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/ - - -#ifdef Py_DEBUG -# define GC_DEBUG -#endif - -#define GC_NEXT _PyGCHead_NEXT -#define GC_PREV _PyGCHead_PREV - -// update_refs() set this bit for all objects in current generation. -// subtract_refs() and move_unreachable() uses this to distinguish -// visited object is in GCing or not. -// -// move_unreachable() removes this flag from reachable objects. -// Only unreachable objects have this flag. -// -// No objects in interpreter have this flag after GC ends. -#define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING - -// Lowest bit of _gc_next is used for UNREACHABLE flag. -// -// This flag represents the object is in unreachable list in move_unreachable() -// -// Although this flag is used only in move_unreachable(), move_unreachable() -// doesn't clear this flag to skip unnecessary iteration. -// move_legacy_finalizers() removes this flag instead. -// Between them, unreachable list is not normal list and we can not use -// most gc_list_* functions for it. -#define NEXT_MASK_UNREACHABLE (1) - -#define AS_GC(op) _Py_AS_GC(op) -#define FROM_GC(gc) _Py_FROM_GC(gc) - -// Automatically choose the generation that needs collecting. -#define GENERATION_AUTO (-1) - -typedef enum { - // GC was triggered by heap allocation - _Py_GC_REASON_HEAP, - - // GC was called during shutdown - _Py_GC_REASON_SHUTDOWN, - - // GC was called by gc.collect() or PyGC_Collect() - _Py_GC_REASON_MANUAL -} _PyGC_Reason; - - -static inline int -gc_is_collecting(PyGC_Head *g) -{ - return (g->_gc_prev & PREV_MASK_COLLECTING) != 0; -} - -static inline void -gc_clear_collecting(PyGC_Head *g) -{ - g->_gc_prev &= ~PREV_MASK_COLLECTING; -} - -static inline Py_ssize_t -gc_get_refs(PyGC_Head *g) -{ - return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT); -} - -static inline void -gc_set_refs(PyGC_Head *g, Py_ssize_t refs) -{ - g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK) - | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); -} - -static inline void -gc_reset_refs(PyGC_Head *g, Py_ssize_t refs) -{ - g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED) - | PREV_MASK_COLLECTING - | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); -} - -static inline void -gc_decref(PyGC_Head *g) -{ - _PyObject_ASSERT_WITH_MSG(FROM_GC(g), - gc_get_refs(g) > 0, - "refcount is too small"); - g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; -} - -/* set for debugging information */ -#define DEBUG_STATS (1<<0) /* print collection statistics */ -#define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ -#define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */ -#define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */ -#define DEBUG_LEAK DEBUG_COLLECTABLE | \ - DEBUG_UNCOLLECTABLE | \ - DEBUG_SAVEALL - -#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head) - - -static GCState * -get_gc_state(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->gc; -} - - -void -_PyGC_InitState(GCState *gcstate) -{ -#define INIT_HEAD(GEN) \ - do { \ - GEN.head._gc_next = (uintptr_t)&GEN.head; \ - GEN.head._gc_prev = (uintptr_t)&GEN.head; \ - } while (0) - - for (int i = 0; i < NUM_GENERATIONS; i++) { - assert(gcstate->generations[i].count == 0); - INIT_HEAD(gcstate->generations[i]); - }; - gcstate->generation0 = GEN_HEAD(gcstate, 0); - INIT_HEAD(gcstate->permanent_generation); - -#undef INIT_HEAD -} - - -PyStatus -_PyGC_Init(PyInterpreterState *interp) -{ - GCState *gcstate = &interp->gc; - - gcstate->garbage = PyList_New(0); - if (gcstate->garbage == NULL) { - return _PyStatus_NO_MEMORY(); - } - - gcstate->callbacks = PyList_New(0); - if (gcstate->callbacks == NULL) { - return _PyStatus_NO_MEMORY(); - } - - return _PyStatus_OK(); -} - - -/* -_gc_prev values ---------------- - -Between collections, _gc_prev is used for doubly linked list. - -Lowest two bits of _gc_prev are used for flags. -PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends -or _PyObject_GC_UNTRACK() is called. - -During a collection, _gc_prev is temporary used for gc_refs, and the gc list -is singly linked until _gc_prev is restored. - -gc_refs - At the start of a collection, update_refs() copies the true refcount - to gc_refs, for each object in the generation being collected. - subtract_refs() then adjusts gc_refs so that it equals the number of - times an object is referenced directly from outside the generation - being collected. - -PREV_MASK_COLLECTING - Objects in generation being collected are marked PREV_MASK_COLLECTING in - update_refs(). - - -_gc_next values ---------------- - -_gc_next takes these values: - -0 - The object is not tracked - -!= 0 - Pointer to the next object in the GC list. - Additionally, lowest bit is used temporary for - NEXT_MASK_UNREACHABLE flag described below. - -NEXT_MASK_UNREACHABLE - move_unreachable() then moves objects not reachable (whether directly or - indirectly) from outside the generation into an "unreachable" set and - set this flag. - - Objects that are found to be reachable have gc_refs set to 1. - When this flag is set for the reachable object, the object must be in - "unreachable" set. - The flag is unset and the object is moved back to "reachable" set. - - move_legacy_finalizers() will remove this flag from "unreachable" set. -*/ - -/*** list functions ***/ - -static inline void -gc_list_init(PyGC_Head *list) -{ - // List header must not have flags. - // We can assign pointer by simple cast. - list->_gc_prev = (uintptr_t)list; - list->_gc_next = (uintptr_t)list; -} - -static inline int -gc_list_is_empty(PyGC_Head *list) -{ - return (list->_gc_next == (uintptr_t)list); -} - -/* Append `node` to `list`. */ -static inline void -gc_list_append(PyGC_Head *node, PyGC_Head *list) -{ - PyGC_Head *last = (PyGC_Head *)list->_gc_prev; - - // last <-> node - _PyGCHead_SET_PREV(node, last); - _PyGCHead_SET_NEXT(last, node); - - // node <-> list - _PyGCHead_SET_NEXT(node, list); - list->_gc_prev = (uintptr_t)node; -} - -/* Remove `node` from the gc list it's currently in. */ -static inline void -gc_list_remove(PyGC_Head *node) -{ - PyGC_Head *prev = GC_PREV(node); - PyGC_Head *next = GC_NEXT(node); - - _PyGCHead_SET_NEXT(prev, next); - _PyGCHead_SET_PREV(next, prev); - - node->_gc_next = 0; /* object is not currently tracked */ -} - -/* Move `node` from the gc list it's currently in (which is not explicitly - * named here) to the end of `list`. This is semantically the same as - * gc_list_remove(node) followed by gc_list_append(node, list). - */ -static void -gc_list_move(PyGC_Head *node, PyGC_Head *list) -{ - /* Unlink from current list. */ - PyGC_Head *from_prev = GC_PREV(node); - PyGC_Head *from_next = GC_NEXT(node); - _PyGCHead_SET_NEXT(from_prev, from_next); - _PyGCHead_SET_PREV(from_next, from_prev); - - /* Relink at end of new list. */ - // list must not have flags. So we can skip macros. - PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev; - _PyGCHead_SET_PREV(node, to_prev); - _PyGCHead_SET_NEXT(to_prev, node); - list->_gc_prev = (uintptr_t)node; - _PyGCHead_SET_NEXT(node, list); -} - -/* append list `from` onto list `to`; `from` becomes an empty list */ -static void -gc_list_merge(PyGC_Head *from, PyGC_Head *to) -{ - assert(from != to); - if (!gc_list_is_empty(from)) { - PyGC_Head *to_tail = GC_PREV(to); - PyGC_Head *from_head = GC_NEXT(from); - PyGC_Head *from_tail = GC_PREV(from); - assert(from_head != from); - assert(from_tail != from); - - _PyGCHead_SET_NEXT(to_tail, from_head); - _PyGCHead_SET_PREV(from_head, to_tail); - - _PyGCHead_SET_NEXT(from_tail, to); - _PyGCHead_SET_PREV(to, from_tail); - } - gc_list_init(from); -} - -static Py_ssize_t -gc_list_size(PyGC_Head *list) -{ - PyGC_Head *gc; - Py_ssize_t n = 0; - for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { - n++; - } - return n; -} - -/* Walk the list and mark all objects as non-collecting */ -static inline void -gc_list_clear_collecting(PyGC_Head *collectable) -{ - PyGC_Head *gc; - for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { - gc_clear_collecting(gc); - } -} - -/* Append objects in a GC list to a Python list. - * Return 0 if all OK, < 0 if error (out of memory for list) - */ -static int -append_objects(PyObject *py_list, PyGC_Head *gc_list) -{ - PyGC_Head *gc; - for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - if (op != py_list) { - if (PyList_Append(py_list, op)) { - return -1; /* exception */ - } - } - } - return 0; -} - -// Constants for validate_list's flags argument. -enum flagstates {collecting_clear_unreachable_clear, - collecting_clear_unreachable_set, - collecting_set_unreachable_clear, - collecting_set_unreachable_set}; - -#ifdef GC_DEBUG -// validate_list checks list consistency. And it works as document -// describing when flags are expected to be set / unset. -// `head` must be a doubly-linked gc list, although it's fine (expected!) if -// the prev and next pointers are "polluted" with flags. -// What's checked: -// - The `head` pointers are not polluted. -// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all -// `set or clear, as specified by the 'flags' argument. -// - The prev and next pointers are mutually consistent. -static void -validate_list(PyGC_Head *head, enum flagstates flags) -{ - assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0); - assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0); - uintptr_t prev_value = 0, next_value = 0; - switch (flags) { - case collecting_clear_unreachable_clear: - break; - case collecting_set_unreachable_clear: - prev_value = PREV_MASK_COLLECTING; - break; - case collecting_clear_unreachable_set: - next_value = NEXT_MASK_UNREACHABLE; - break; - case collecting_set_unreachable_set: - prev_value = PREV_MASK_COLLECTING; - next_value = NEXT_MASK_UNREACHABLE; - break; - default: - assert(! "bad internal flags argument"); - } - PyGC_Head *prev = head; - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - PyGC_Head *trueprev = GC_PREV(gc); - PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); - assert(truenext != NULL); - assert(trueprev == prev); - assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value); - assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value); - prev = gc; - gc = truenext; - } - assert(prev == GC_PREV(head)); -} -#else -#define validate_list(x, y) do{}while(0) -#endif - -/*** end of list stuff ***/ - - -/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and - * PREV_MASK_COLLECTING bit is set for all objects in containers. - */ -static void -update_refs(PyGC_Head *containers) -{ - PyGC_Head *next; - PyGC_Head *gc = GC_NEXT(containers); - - while (gc != containers) { - next = GC_NEXT(gc); - /* Move any object that might have become immortal to the - * permanent generation as the reference count is not accurately - * reflecting the actual number of live references to this object - */ - if (_Py_IsImmortal(FROM_GC(gc))) { - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; - } - gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); - /* Python's cyclic gc should never see an incoming refcount - * of 0: if something decref'ed to 0, it should have been - * deallocated immediately at that time. - * Possible cause (if the assert triggers): a tp_dealloc - * routine left a gc-aware object tracked during its teardown - * phase, and did something-- or allowed something to happen -- - * that called back into Python. gc can trigger then, and may - * see the still-tracked dying object. Before this assert - * was added, such mistakes went on to allow gc to try to - * delete the object again. In a debug build, that caused - * a mysterious segfault, when _Py_ForgetReference tried - * to remove the object from the doubly-linked list of all - * objects a second time. In a release build, an actual - * double deallocation occurred, which leads to corruption - * of the allocator's internal bookkeeping pointers. That's - * so serious that maybe this should be a release-build - * check instead of an assert? - */ - _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); - gc = next; - } -} - -/* A traversal callback for subtract_refs. */ -static int -visit_decref(PyObject *op, void *parent) -{ - OBJECT_STAT_INC(object_visits); - _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op)); - - if (_PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - /* We're only interested in gc_refs for objects in the - * generation being collected, which can be recognized - * because only they have positive gc_refs. - */ - if (gc_is_collecting(gc)) { - gc_decref(gc); - } - } - return 0; -} - -/* Subtract internal references from gc_refs. After this, gc_refs is >= 0 - * for all objects in containers, and is GC_REACHABLE for all tracked gc - * objects not in containers. The ones with gc_refs > 0 are directly - * reachable from outside containers, and so can't be collected. - */ -static void -subtract_refs(PyGC_Head *containers) -{ - traverseproc traverse; - PyGC_Head *gc = GC_NEXT(containers); - for (; gc != containers; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_decref, - op); - } -} - -/* A traversal callback for move_unreachable. */ -static int -visit_reachable(PyObject *op, void *arg) -{ - PyGC_Head *reachable = arg; - OBJECT_STAT_INC(object_visits); - if (!_PyObject_IS_GC(op)) { - return 0; - } - - PyGC_Head *gc = AS_GC(op); - const Py_ssize_t gc_refs = gc_get_refs(gc); - - // Ignore objects in other generation. - // This also skips objects "to the left" of the current position in - // move_unreachable's scan of the 'young' list - they've already been - // traversed, and no longer have the PREV_MASK_COLLECTING flag. - if (! gc_is_collecting(gc)) { - return 0; - } - // It would be a logic error elsewhere if the collecting flag were set on - // an untracked object. - assert(gc->_gc_next != 0); - - if (gc->_gc_next & NEXT_MASK_UNREACHABLE) { - /* This had gc_refs = 0 when move_unreachable got - * to it, but turns out it's reachable after all. - * Move it back to move_unreachable's 'young' list, - * and move_unreachable will eventually get to it - * again. - */ - // Manually unlink gc from unreachable list because the list functions - // don't work right in the presence of NEXT_MASK_UNREACHABLE flags. - PyGC_Head *prev = GC_PREV(gc); - PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); - _PyObject_ASSERT(FROM_GC(prev), - prev->_gc_next & NEXT_MASK_UNREACHABLE); - _PyObject_ASSERT(FROM_GC(next), - next->_gc_next & NEXT_MASK_UNREACHABLE); - prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE - _PyGCHead_SET_PREV(next, prev); - - gc_list_append(gc, reachable); - gc_set_refs(gc, 1); - } - else if (gc_refs == 0) { - /* This is in move_unreachable's 'young' list, but - * the traversal hasn't yet gotten to it. All - * we need to do is tell move_unreachable that it's - * reachable. - */ - gc_set_refs(gc, 1); - } - /* Else there's nothing to do. - * If gc_refs > 0, it must be in move_unreachable's 'young' - * list, and move_unreachable will eventually get to it. - */ - else { - _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small"); - } - return 0; -} - -/* Move the unreachable objects from young to unreachable. After this, - * all objects in young don't have PREV_MASK_COLLECTING flag and - * unreachable have the flag. - * All objects in young after this are directly or indirectly reachable - * from outside the original young; and all objects in unreachable are - * not. - * - * This function restores _gc_prev pointer. young and unreachable are - * doubly linked list after this function. - * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag. - * So we can not gc_list_* functions for unreachable until we remove the flag. - */ -static void -move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) -{ - // previous elem in the young list, used for restore gc_prev. - PyGC_Head *prev = young; - PyGC_Head *gc = GC_NEXT(young); - - /* Invariants: all objects "to the left" of us in young are reachable - * (directly or indirectly) from outside the young list as it was at entry. - * - * All other objects from the original young "to the left" of us are in - * unreachable now, and have NEXT_MASK_UNREACHABLE. All objects to the - * left of us in 'young' now have been scanned, and no objects here - * or to the right have been scanned yet. - */ - - while (gc != young) { - if (gc_get_refs(gc)) { - /* gc is definitely reachable from outside the - * original 'young'. Mark it as such, and traverse - * its pointers to find any other objects that may - * be directly reachable from it. Note that the - * call to tp_traverse may append objects to young, - * so we have to wait until it returns to determine - * the next object to visit. - */ - PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0, - "refcount is too small"); - // NOTE: visit_reachable may change gc->_gc_next when - // young->_gc_prev == gc. Don't do gc = GC_NEXT(gc) before! - (void) traverse(op, - visit_reachable, - (void *)young); - // relink gc_prev to prev element. - _PyGCHead_SET_PREV(gc, prev); - // gc is not COLLECTING state after here. - gc_clear_collecting(gc); - prev = gc; - } - else { - /* This *may* be unreachable. To make progress, - * assume it is. gc isn't directly reachable from - * any object we've already traversed, but may be - * reachable from an object we haven't gotten to yet. - * visit_reachable will eventually move gc back into - * young if that's so, and we'll see it again. - */ - // Move gc to unreachable. - // No need to gc->next->prev = prev because it is single linked. - prev->_gc_next = gc->_gc_next; - - // We can't use gc_list_append() here because we use - // NEXT_MASK_UNREACHABLE here. - PyGC_Head *last = GC_PREV(unreachable); - // NOTE: Since all objects in unreachable set has - // NEXT_MASK_UNREACHABLE flag, we set it unconditionally. - // But this may pollute the unreachable list head's 'next' pointer - // too. That's semantically senseless but expedient here - the - // damage is repaired when this function ends. - last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); - _PyGCHead_SET_PREV(gc, last); - gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); - unreachable->_gc_prev = (uintptr_t)gc; - } - gc = (PyGC_Head*)prev->_gc_next; - } - // young->_gc_prev must be last element remained in the list. - young->_gc_prev = (uintptr_t)prev; - // don't let the pollution of the list head's next pointer leak - unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; -} - -static void -untrack_tuples(PyGC_Head *head) -{ - PyGC_Head *next, *gc = GC_NEXT(head); - while (gc != head) { - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - if (PyTuple_CheckExact(op)) { - _PyTuple_MaybeUntrack(op); - } - gc = next; - } -} - -/* Try to untrack all currently tracked dictionaries */ -static void -untrack_dicts(PyGC_Head *head) -{ - PyGC_Head *next, *gc = GC_NEXT(head); - while (gc != head) { - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - if (PyDict_CheckExact(op)) { - _PyDict_MaybeUntrack(op); - } - gc = next; - } -} - -/* Return true if object has a pre-PEP 442 finalization method. */ -static int -has_legacy_finalizer(PyObject *op) -{ - return Py_TYPE(op)->tp_del != NULL; -} - -/* Move the objects in unreachable with tp_del slots into `finalizers`. - * - * This function also removes NEXT_MASK_UNREACHABLE flag - * from _gc_next in unreachable. - */ -static void -move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) -{ - PyGC_Head *gc, *next; - assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); - - /* March over unreachable. Move objects with finalizers into - * `finalizers`. - */ - for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - PyObject *op = FROM_GC(gc); - - _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); - gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; - next = (PyGC_Head*)gc->_gc_next; - - if (has_legacy_finalizer(op)) { - gc_clear_collecting(gc); - gc_list_move(gc, finalizers); - } - } -} - -static inline void -clear_unreachable_mask(PyGC_Head *unreachable) -{ - /* Check that the list head does not have the unreachable bit set */ - assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); - - PyGC_Head *gc, *next; - assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); - for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); - gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; - next = (PyGC_Head*)gc->_gc_next; - } - validate_list(unreachable, collecting_set_unreachable_clear); -} - -/* A traversal callback for move_legacy_finalizer_reachable. */ -static int -visit_move(PyObject *op, void *arg) -{ - PyGC_Head *tolist = arg; - OBJECT_STAT_INC(object_visits); - if (_PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (gc_is_collecting(gc)) { - gc_list_move(gc, tolist); - gc_clear_collecting(gc); - } - } - return 0; -} - -/* Move objects that are reachable from finalizers, from the unreachable set - * into finalizers set. - */ -static void -move_legacy_finalizer_reachable(PyGC_Head *finalizers) -{ - traverseproc traverse; - PyGC_Head *gc = GC_NEXT(finalizers); - for (; gc != finalizers; gc = GC_NEXT(gc)) { - /* Note that the finalizers list may grow during this. */ - traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; - (void) traverse(FROM_GC(gc), - visit_move, - (void *)finalizers); - } -} - -/* Clear all weakrefs to unreachable objects, and if such a weakref has a - * callback, invoke it if necessary. Note that it's possible for such - * weakrefs to be outside the unreachable set -- indeed, those are precisely - * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for - * overview & some details. Some weakrefs with callbacks may be reclaimed - * directly by this routine; the number reclaimed is the return value. Other - * weakrefs with callbacks may be moved into the `old` generation. Objects - * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in - * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, - * no object in `unreachable` is weakly referenced anymore. - */ -static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) -{ - PyGC_Head *gc; - PyObject *op; /* generally FROM_GC(gc) */ - PyWeakReference *wr; /* generally a cast of op */ - PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ - PyGC_Head *next; - int num_freed = 0; - - gc_list_init(&wrcb_to_call); - - /* Clear all weakrefs to the objects in unreachable. If such a weakref - * also has a callback, move it into `wrcb_to_call` if the callback - * needs to be invoked. Note that we cannot invoke any callbacks until - * all weakrefs to unreachable objects are cleared, lest the callback - * resurrect an unreachable object via a still-active weakref. We - * make another pass over wrcb_to_call, invoking callbacks, after this - * pass completes. - */ - for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - PyWeakReference **wrlist; - - op = FROM_GC(gc); - next = GC_NEXT(gc); - - if (PyWeakref_Check(op)) { - /* A weakref inside the unreachable set must be cleared. If we - * allow its callback to execute inside delete_garbage(), it - * could expose objects that have tp_clear already called on - * them. Or, it could resurrect unreachable objects. One way - * this can happen is if some container objects do not implement - * tp_traverse. Then, wr_object can be outside the unreachable - * set but can be deallocated as a result of breaking the - * reference cycle. If we don't clear the weakref, the callback - * will run and potentially cause a crash. See bpo-38006 for - * one example. - */ - _PyWeakref_ClearRef((PyWeakReference *)op); - } - - if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) - continue; - - /* It supports weakrefs. Does it have any? - * - * This is never triggered for static types so we can avoid the - * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). - */ - wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs, and move the weakrefs with callbacks - * that must be called into wrcb_to_call. - */ - for (wr = *wrlist; wr != NULL; wr = *wrlist) { - PyGC_Head *wrasgc; /* AS_GC(wr) */ - - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - if (wr->wr_callback == NULL) { - /* no callback */ - continue; - } - - /* Headache time. `op` is going away, and is weakly referenced by - * `wr`, which has a callback. Should the callback be invoked? If wr - * is also trash, no: - * - * 1. There's no need to call it. The object and the weakref are - * both going away, so it's legitimate to pretend the weakref is - * going away first. The user has to ensure a weakref outlives its - * referent if they want a guarantee that the wr callback will get - * invoked. - * - * 2. It may be catastrophic to call it. If the callback is also in - * cyclic trash (CT), then although the CT is unreachable from - * outside the current generation, CT may be reachable from the - * callback. Then the callback could resurrect insane objects. - * - * Since the callback is never needed and may be unsafe in this case, - * wr is simply left in the unreachable set. Note that because we - * already called _PyWeakref_ClearRef(wr), its callback will never - * trigger. - * - * OTOH, if wr isn't part of CT, we should invoke the callback: the - * weakref outlived the trash. Note that since wr isn't CT in this - * case, its callback can't be CT either -- wr acted as an external - * root to this generation, and therefore its callback did too. So - * nothing in CT is reachable from the callback either, so it's hard - * to imagine how calling it later could create a problem for us. wr - * is moved to wrcb_to_call in this case. - */ - if (gc_is_collecting(AS_GC((PyObject *)wr))) { - /* it should already have been cleared above */ - assert(wr->wr_object == Py_None); - continue; - } - - /* Create a new reference so that wr can't go away - * before we can process it again. - */ - Py_INCREF(wr); - - /* Move wr to wrcb_to_call, for the next pass. */ - wrasgc = AS_GC((PyObject *)wr); - assert(wrasgc != next); /* wrasgc is reachable, but - next isn't, so they can't - be the same */ - gc_list_move(wrasgc, &wrcb_to_call); - } - } - - /* Invoke the callbacks we decided to honor. It's safe to invoke them - * because they can't reference unreachable objects. - */ - while (! gc_list_is_empty(&wrcb_to_call)) { - PyObject *temp; - PyObject *callback; - - gc = (PyGC_Head*)wrcb_to_call._gc_next; - op = FROM_GC(gc); - _PyObject_ASSERT(op, PyWeakref_Check(op)); - wr = (PyWeakReference *)op; - callback = wr->wr_callback; - _PyObject_ASSERT(op, callback != NULL); - - /* copy-paste of weakrefobject.c's handle_callback() */ - temp = PyObject_CallOneArg(callback, (PyObject *)wr); - if (temp == NULL) - PyErr_WriteUnraisable(callback); - else - Py_DECREF(temp); - - /* Give up the reference we created in the first pass. When - * op's refcount hits 0 (which it may or may not do right now), - * op's tp_dealloc will decref op->wr_callback too. Note - * that the refcount probably will hit 0 now, and because this - * weakref was reachable to begin with, gc didn't already - * add it to its count of freed objects. Example: a reachable - * weak value dict maps some key to this reachable weakref. - * The callback removes this key->weakref mapping from the - * dict, leaving no other references to the weakref (excepting - * ours). - */ - Py_DECREF(op); - if (wrcb_to_call._gc_next == (uintptr_t)gc) { - /* object is still alive -- move it */ - gc_list_move(gc, old); - } - else { - ++num_freed; - } - } - - return num_freed; -} - -static void -debug_cycle(const char *msg, PyObject *op) -{ - PySys_FormatStderr("gc: %s <%s %p>\n", - msg, Py_TYPE(op)->tp_name, op); -} - -/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable - * only from such cycles). - * If DEBUG_SAVEALL, all objects in finalizers are appended to the module - * garbage list (a Python list), else only the objects in finalizers with - * __del__ methods are appended to garbage. All objects in finalizers are - * merged into the old list regardless. - */ -static void -handle_legacy_finalizers(PyThreadState *tstate, - GCState *gcstate, - PyGC_Head *finalizers, PyGC_Head *old) -{ - assert(!_PyErr_Occurred(tstate)); - assert(gcstate->garbage != NULL); - - PyGC_Head *gc = GC_NEXT(finalizers); - for (; gc != finalizers; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - - if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { - if (PyList_Append(gcstate->garbage, op) < 0) { - _PyErr_Clear(tstate); - break; - } - } - } - - gc_list_merge(finalizers, old); -} - -/* Run first-time finalizers (if any) on all the objects in collectable. - * Note that this may remove some (or even all) of the objects from the - * list, due to refcounts falling to 0. - */ -static void -finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) -{ - destructor finalize; - PyGC_Head seen; - - /* While we're going through the loop, `finalize(op)` may cause op, or - * other objects, to be reclaimed via refcounts falling to zero. So - * there's little we can rely on about the structure of the input - * `collectable` list across iterations. For safety, we always take the - * first object in that list and move it to a temporary `seen` list. - * If objects vanish from the `collectable` and `seen` lists we don't - * care. - */ - gc_list_init(&seen); - - while (!gc_list_is_empty(collectable)) { - PyGC_Head *gc = GC_NEXT(collectable); - PyObject *op = FROM_GC(gc); - gc_list_move(gc, &seen); - if (!_PyGCHead_FINALIZED(gc) && - (finalize = Py_TYPE(op)->tp_finalize) != NULL) { - _PyGCHead_SET_FINALIZED(gc); - Py_INCREF(op); - finalize(op); - assert(!_PyErr_Occurred(tstate)); - Py_DECREF(op); - } - } - gc_list_merge(&seen, collectable); -} - -/* Break reference cycles by clearing the containers involved. This is - * tricky business as the lists can be changing and we don't know which - * objects may be freed. It is possible I screwed something up here. - */ -static void -delete_garbage(PyThreadState *tstate, GCState *gcstate, - PyGC_Head *collectable, PyGC_Head *old) -{ - assert(!_PyErr_Occurred(tstate)); - - while (!gc_list_is_empty(collectable)) { - PyGC_Head *gc = GC_NEXT(collectable); - PyObject *op = FROM_GC(gc); - - _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0, - "refcount is too small"); - - if (gcstate->debug & DEBUG_SAVEALL) { - assert(gcstate->garbage != NULL); - if (PyList_Append(gcstate->garbage, op) < 0) { - _PyErr_Clear(tstate); - } - } - else { - inquiry clear; - if ((clear = Py_TYPE(op)->tp_clear) != NULL) { - Py_INCREF(op); - (void) clear(op); - if (_PyErr_Occurred(tstate)) { - PyErr_FormatUnraisable("Exception ignored in tp_clear of %s", - Py_TYPE(op)->tp_name); - } - Py_DECREF(op); - } - } - if (GC_NEXT(collectable) == gc) { - /* object is still alive, move it, it may die later */ - gc_clear_collecting(gc); - gc_list_move(gc, old); - } - } -} - -/* Clear all free lists - * All free lists are cleared during the collection of the highest generation. - * Allocated items in the free list may keep a pymalloc arena occupied. - * Clearing the free lists may give back memory to the OS earlier. - */ -static void -clear_freelists(PyInterpreterState *interp) -{ - _PyTuple_ClearFreeList(interp); - _PyFloat_ClearFreeList(interp); - _PyList_ClearFreeList(interp); - _PyDict_ClearFreeList(interp); - _PyAsyncGen_ClearFreeLists(interp); - _PyContext_ClearFreeList(interp); -} - -// Show stats for objects in each generations -static void -show_stats_each_generations(GCState *gcstate) -{ - char buf[100]; - size_t pos = 0; - - for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) { - pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos, - " %zd", - gc_list_size(GEN_HEAD(gcstate, i))); - } - - PySys_FormatStderr( - "gc: objects in each generation:%s\n" - "gc: objects in permanent generation: %zd\n", - buf, gc_list_size(&gcstate->permanent_generation.head)); -} - -/* Deduce which objects among "base" are unreachable from outside the list - and move them to 'unreachable'. The process consist in the following steps: - -1. Copy all reference counts to a different field (gc_prev is used to hold - this copy to save memory). -2. Traverse all objects in "base" and visit all referred objects using - "tp_traverse" and for every visited object, subtract 1 to the reference - count (the one that we copied in the previous step). After this step, all - objects that can be reached directly from outside must have strictly positive - reference count, while all unreachable objects must have a count of exactly 0. -3. Identify all unreachable objects (the ones with 0 reference count) and move - them to the "unreachable" list. This step also needs to move back to "base" all - objects that were initially marked as unreachable but are referred transitively - by the reachable objects (the ones with strictly positive reference count). - -Contracts: - - * The "base" has to be a valid list with no mask set. - - * The "unreachable" list must be uninitialized (this function calls - gc_list_init over 'unreachable'). - -IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE -flag set but it does not clear it to skip unnecessary iteration. Before the -flag is cleared (for example, by using 'clear_unreachable_mask' function or -by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal -list and we can not use most gc_list_* functions for it. */ -static inline void -deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { - validate_list(base, collecting_clear_unreachable_clear); - /* Using ob_refcnt and gc_refs, calculate which objects in the - * container set are reachable from outside the set (i.e., have a - * refcount greater than 0 when all the references within the - * set are taken into account). - */ - update_refs(base); // gc_prev is used for gc_refs - subtract_refs(base); - - /* Leave everything reachable from outside base in base, and move - * everything else (in base) to unreachable. - * - * NOTE: This used to move the reachable objects into a reachable - * set instead. But most things usually turn out to be reachable, - * so it's more efficient to move the unreachable things. It "sounds slick" - * to move the unreachable objects, until you think about it - the reason it - * pays isn't actually obvious. - * - * Suppose we create objects A, B, C in that order. They appear in the young - * generation in the same order. If B points to A, and C to B, and C is - * reachable from outside, then the adjusted refcounts will be 0, 0, and 1 - * respectively. - * - * When move_unreachable finds A, A is moved to the unreachable list. The - * same for B when it's first encountered. Then C is traversed, B is moved - * _back_ to the reachable list. B is eventually traversed, and then A is - * moved back to the reachable list. - * - * So instead of not moving at all, the reachable objects B and A are moved - * twice each. Why is this a win? A straightforward algorithm to move the - * reachable objects instead would move A, B, and C once each. - * - * The key is that this dance leaves the objects in order C, B, A - it's - * reversed from the original order. On all _subsequent_ scans, none of - * them will move. Since most objects aren't in cycles, this can save an - * unbounded number of moves across an unbounded number of later collections. - * It can cost more only the first time the chain is scanned. - * - * Drawback: move_unreachable is also used to find out what's still trash - * after finalizers may resurrect objects. In _that_ case most unreachable - * objects will remain unreachable, so it would be more efficient to move - * the reachable objects instead. But this is a one-time cost, probably not - * worth complicating the code to speed just a little. - */ - gc_list_init(unreachable); - move_unreachable(base, unreachable); // gc_prev is pointer again - validate_list(base, collecting_clear_unreachable_clear); - validate_list(unreachable, collecting_set_unreachable_set); -} - -/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving - them to 'old_generation' and placing the rest on 'still_unreachable'. - - Contracts: - * After this function 'unreachable' must not be used anymore and 'still_unreachable' - will contain the objects that did not resurrect. - - * The "still_unreachable" list must be uninitialized (this function calls - gc_list_init over 'still_unreachable'). - -IMPORTANT: After a call to this function, the 'still_unreachable' set will have the -PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so -we can skip the expense of clearing the flag to avoid extra iteration. */ -static inline void -handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, - PyGC_Head *old_generation) -{ - // Remove the PREV_MASK_COLLECTING from unreachable - // to prepare it for a new call to 'deduce_unreachable' - gc_list_clear_collecting(unreachable); - - // After the call to deduce_unreachable, the 'still_unreachable' set will - // have the PREV_MARK_COLLECTING set, but the objects are going to be - // removed so we can skip the expense of clearing the flag. - PyGC_Head* resurrected = unreachable; - deduce_unreachable(resurrected, still_unreachable); - clear_unreachable_mask(still_unreachable); - - // Move the resurrected objects to the old generation for future collection. - gc_list_merge(resurrected, old_generation); -} - - -/* Invoke progress callbacks to notify clients that garbage collection - * is starting or stopping + * Python interface to the garbage collector. + * + * See Python/gc.c for the implementation of the garbage collector. */ -static void -invoke_gc_callback(PyThreadState *tstate, const char *phase, - int generation, Py_ssize_t collected, - Py_ssize_t uncollectable) -{ - assert(!_PyErr_Occurred(tstate)); - - /* we may get called very early */ - GCState *gcstate = &tstate->interp->gc; - if (gcstate->callbacks == NULL) { - return; - } - - /* The local variable cannot be rebound, check it for sanity */ - assert(PyList_CheckExact(gcstate->callbacks)); - PyObject *info = NULL; - if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsn}", - "generation", generation, - "collected", collected, - "uncollectable", uncollectable); - if (info == NULL) { - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; - } - } - - PyObject *phase_obj = PyUnicode_FromString(phase); - if (phase_obj == NULL) { - Py_XDECREF(info); - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; - } - - PyObject *stack[] = {phase_obj, info}; - for (Py_ssize_t i=0; icallbacks); i++) { - PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); - Py_INCREF(cb); /* make sure cb doesn't go away */ - r = PyObject_Vectorcall(cb, stack, 2, NULL); - if (r == NULL) { - PyErr_WriteUnraisable(cb); - } - else { - Py_DECREF(r); - } - Py_DECREF(cb); - } - Py_DECREF(phase_obj); - Py_XDECREF(info); - assert(!_PyErr_Occurred(tstate)); -} - -/* Find the oldest generation (highest numbered) where the count - * exceeds the threshold. Objects in the that generation and - * generations younger than it will be collected. */ -static int -gc_select_generation(GCState *gcstate) -{ - for (int i = NUM_GENERATIONS-1; i >= 0; i--) { - if (gcstate->generations[i].count > gcstate->generations[i].threshold) { - /* Avoid quadratic performance degradation in number - of tracked objects (see also issue #4074): - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - - long_lived_pending / long_lived_total - - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html - */ - if (i == NUM_GENERATIONS - 1 - && gcstate->long_lived_pending < gcstate->long_lived_total / 4) - continue; - return i; - } - } - return -1; -} +#include "Python.h" +#include "pycore_gc.h" +#include "pycore_object.h" // _PyObject_IS_GC() +#include "pycore_pystate.h" // _PyInterpreterState_GET() +typedef struct _gc_runtime_state GCState; -/* This is the main function. Read this to understand how the - * collection process works. */ -static Py_ssize_t -gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) +static GCState * +get_gc_state(void) { - int i; - Py_ssize_t m = 0; /* # objects collected */ - Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ - PyGC_Head *young; /* the generation we are examining */ - PyGC_Head *old; /* next older generation */ - PyGC_Head unreachable; /* non-problematic unreachable trash */ - PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head *gc; - _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ - GCState *gcstate = &tstate->interp->gc; - - // gc_collect_main() must not be called before _PyGC_Init - // or after _PyGC_Fini() - assert(gcstate->garbage != NULL); - assert(!_PyErr_Occurred(tstate)); - - int expected = 0; - if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { - // Don't start a garbage collection if one is already in progress. - return 0; - } - - if (generation == GENERATION_AUTO) { - // Select the oldest generation that needs collecting. We will collect - // objects from that generation and all generations younger than it. - generation = gc_select_generation(gcstate); - if (generation < 0) { - // No generation needs to be collected. - _Py_atomic_store_int(&gcstate->collecting, 0); - return 0; - } - } - - assert(generation >= 0 && generation < NUM_GENERATIONS); - -#ifdef Py_STATS - if (_Py_stats) { - _Py_stats->object_stats.object_visits = 0; - } -#endif - GC_STAT_ADD(generation, collections, 1); - - if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "start", generation, 0, 0); - } - - if (gcstate->debug & DEBUG_STATS) { - PySys_WriteStderr("gc: collecting generation %d...\n", generation); - show_stats_each_generations(gcstate); - t1 = _PyTime_GetPerfCounter(); - } - - if (PyDTrace_GC_START_ENABLED()) - PyDTrace_GC_START(generation); - - /* update collection and allocation counters */ - if (generation+1 < NUM_GENERATIONS) - gcstate->generations[generation+1].count += 1; - for (i = 0; i <= generation; i++) - gcstate->generations[i].count = 0; - - /* merge younger generations with one we are currently collecting */ - for (i = 0; i < generation; i++) { - gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation)); - } - - /* handy references */ - young = GEN_HEAD(gcstate, generation); - if (generation < NUM_GENERATIONS-1) - old = GEN_HEAD(gcstate, generation+1); - else - old = young; - validate_list(old, collecting_clear_unreachable_clear); - - deduce_unreachable(young, &unreachable); - - untrack_tuples(young); - /* Move reachable objects to next generation. */ - if (young != old) { - if (generation == NUM_GENERATIONS - 2) { - gcstate->long_lived_pending += gc_list_size(young); - } - gc_list_merge(young, old); - } - else { - /* We only un-track dicts in full collections, to avoid quadratic - dict build-up. See issue #14775. */ - untrack_dicts(young); - gcstate->long_lived_pending = 0; - gcstate->long_lived_total = gc_list_size(young); - } - - /* All objects in unreachable are trash, but objects reachable from - * legacy finalizers (e.g. tp_del) can't safely be deleted. - */ - gc_list_init(&finalizers); - // NEXT_MASK_UNREACHABLE is cleared here. - // After move_legacy_finalizers(), unreachable is normal list. - move_legacy_finalizers(&unreachable, &finalizers); - /* finalizers contains the unreachable objects with a legacy finalizer; - * unreachable objects reachable *from* those are also uncollectable, - * and we move those into the finalizers list too. - */ - move_legacy_finalizer_reachable(&finalizers); - - validate_list(&finalizers, collecting_clear_unreachable_clear); - validate_list(&unreachable, collecting_set_unreachable_clear); - - /* Print debugging information. */ - if (gcstate->debug & DEBUG_COLLECTABLE) { - for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { - debug_cycle("collectable", FROM_GC(gc)); - } - } - - /* Clear weakrefs and invoke callbacks as necessary. */ - m += handle_weakrefs(&unreachable, old); - - validate_list(old, collecting_clear_unreachable_clear); - validate_list(&unreachable, collecting_set_unreachable_clear); - - /* Call tp_finalize on objects which have one. */ - finalize_garbage(tstate, &unreachable); - - /* Handle any objects that may have resurrected after the call - * to 'finalize_garbage' and continue the collection with the - * objects that are still unreachable */ - PyGC_Head final_unreachable; - handle_resurrected_objects(&unreachable, &final_unreachable, old); - - /* Call tp_clear on objects in the final_unreachable set. This will cause - * the reference cycles to be broken. It may also cause some objects - * in finalizers to be freed. - */ - m += gc_list_size(&final_unreachable); - delete_garbage(tstate, gcstate, &final_unreachable, old); - - /* Collect statistics on uncollectable objects found and print - * debugging information. */ - for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { - n++; - if (gcstate->debug & DEBUG_UNCOLLECTABLE) - debug_cycle("uncollectable", FROM_GC(gc)); - } - if (gcstate->debug & DEBUG_STATS) { - double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); - PySys_WriteStderr( - "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", - n+m, n, d); - } - - /* Append instances in the uncollectable set to a Python - * reachable list of garbage. The programmer has to deal with - * this if they insist on creating this type of structure. - */ - handle_legacy_finalizers(tstate, gcstate, &finalizers, old); - validate_list(old, collecting_clear_unreachable_clear); - - /* Clear free list only during the collection of the highest - * generation */ - if (generation == NUM_GENERATIONS-1) { - clear_freelists(tstate->interp); - } - - if (_PyErr_Occurred(tstate)) { - if (reason == _Py_GC_REASON_SHUTDOWN) { - _PyErr_Clear(tstate); - } - else { - PyErr_FormatUnraisable("Exception ignored in garbage collection"); - } - } - - /* Update stats */ - struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; - stats->collections++; - stats->collected += m; - stats->uncollectable += n; - - GC_STAT_ADD(generation, objects_collected, m); -#ifdef Py_STATS - if (_Py_stats) { - GC_STAT_ADD(generation, object_visits, - _Py_stats->object_stats.object_visits); - _Py_stats->object_stats.object_visits = 0; - } -#endif - - if (PyDTrace_GC_DONE_ENABLED()) { - PyDTrace_GC_DONE(n + m); - } - - if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(tstate, "stop", generation, m, n); - } - - assert(!_PyErr_Occurred(tstate)); - _Py_atomic_store_int(&gcstate->collecting, 0); - return n + m; + PyInterpreterState *interp = _PyInterpreterState_GET(); + return &interp->gc; } +/*[clinic input] +module gc +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/ #include "clinic/gcmodule.c.h" /*[clinic input] @@ -1593,7 +90,7 @@ gc_collect_impl(PyObject *module, int generation) return -1; } - return gc_collect_main(tstate, generation, _Py_GC_REASON_MANUAL); + return _PyGC_Collect(tstate, generation, _Py_GC_REASON_MANUAL); } /*[clinic input] @@ -1693,36 +190,6 @@ gc_get_count_impl(PyObject *module) gcstate->generations[2].count); } -static int -referrersvisit(PyObject* obj, void *arg) -{ - PyObject *objs = arg; - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(objs); i++) - if (PyTuple_GET_ITEM(objs, i) == obj) - return 1; - return 0; -} - -static int -gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) -{ - PyGC_Head *gc; - PyObject *obj; - traverseproc traverse; - for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { - obj = FROM_GC(gc); - traverse = Py_TYPE(obj)->tp_traverse; - if (obj == objs || obj == resultlist) - continue; - if (traverse(obj, referrersvisit, objs)) { - if (PyList_Append(resultlist, obj) < 0) - return 0; /* error */ - } - } - return 1; /* no error */ -} - PyDoc_STRVAR(gc_get_referrers__doc__, "get_referrers(*objs) -> list\n\ Return the list of objects that directly refer to any of objs."); @@ -1734,19 +201,8 @@ gc_get_referrers(PyObject *self, PyObject *args) return NULL; } - PyObject *result = PyList_New(0); - if (!result) { - return NULL; - } - - GCState *gcstate = get_gc_state(); - for (int i = 0; i < NUM_GENERATIONS; i++) { - if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) { - Py_DECREF(result); - return NULL; - } - } - return result; + PyInterpreterState *interp = _PyInterpreterState_GET(); + return _PyGC_GetReferrers(interp, args); } /* Append obj to list; return true if error (out of memory), false if OK. */ @@ -1805,54 +261,25 @@ static PyObject * gc_get_objects_impl(PyObject *module, Py_ssize_t generation) /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/ { - PyThreadState *tstate = _PyThreadState_GET(); - int i; - PyObject* result; - GCState *gcstate = &tstate->interp->gc; - if (PySys_Audit("gc.get_objects", "n", generation) < 0) { return NULL; } - result = PyList_New(0); - if (result == NULL) { - return NULL; - } - - /* If generation is passed, we extract only that generation */ - if (generation != -1) { - if (generation >= NUM_GENERATIONS) { - _PyErr_Format(tstate, PyExc_ValueError, - "generation parameter must be less than the number of " - "available generations (%i)", - NUM_GENERATIONS); - goto error; - } - - if (generation < 0) { - _PyErr_SetString(tstate, PyExc_ValueError, - "generation parameter cannot be negative"); - goto error; - } - - if (append_objects(result, GEN_HEAD(gcstate, generation))) { - goto error; - } - - return result; + if (generation >= NUM_GENERATIONS) { + return PyErr_Format(PyExc_ValueError, + "generation parameter must be less than the number of " + "available generations (%i)", + NUM_GENERATIONS); } - /* If generation is not passed or None, get all objects from all generations */ - for (i = 0; i < NUM_GENERATIONS; i++) { - if (append_objects(result, GEN_HEAD(gcstate, i))) { - goto error; - } + if (generation < -1) { + PyErr_SetString(PyExc_ValueError, + "generation parameter cannot be negative"); + return NULL; } - return result; -error: - Py_DECREF(result); - return NULL; + PyInterpreterState *interp = _PyInterpreterState_GET(); + return _PyGC_GetObjects(interp, generation); } /*[clinic input] @@ -1960,11 +387,8 @@ static PyObject * gc_freeze_impl(PyObject *module) /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/ { - GCState *gcstate = get_gc_state(); - for (int i = 0; i < NUM_GENERATIONS; ++i) { - gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head); - gcstate->generations[i].count = 0; - } + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyGC_Freeze(interp); Py_RETURN_NONE; } @@ -1980,9 +404,8 @@ static PyObject * gc_unfreeze_impl(PyObject *module) /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/ { - GCState *gcstate = get_gc_state(); - gc_list_merge(&gcstate->permanent_generation.head, - GEN_HEAD(gcstate, NUM_GENERATIONS-1)); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyGC_Unfreeze(interp); Py_RETURN_NONE; } @@ -1996,8 +419,8 @@ static Py_ssize_t gc_get_freeze_count_impl(PyObject *module) /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/ { - GCState *gcstate = get_gc_state(); - return gc_list_size(&gcstate->permanent_generation.head); + PyInterpreterState *interp = _PyInterpreterState_GET(); + return _PyGC_GetFreezeCount(interp); } @@ -2063,7 +486,7 @@ gcmodule_exec(PyObject *module) return -1; } -#define ADD_INT(NAME) if (PyModule_AddIntConstant(module, #NAME, NAME) < 0) { return -1; } +#define ADD_INT(NAME) if (PyModule_AddIntConstant(module, #NAME, _PyGC_ ## NAME) < 0) { return -1; } ADD_INT(DEBUG_STATS); ADD_INT(DEBUG_COLLECTABLE); ADD_INT(DEBUG_UNCOLLECTABLE); @@ -2093,353 +516,3 @@ PyInit_gc(void) { return PyModuleDef_Init(&gcmodule); } - -/* C API for controlling the state of the garbage collector */ -int -PyGC_Enable(void) -{ - GCState *gcstate = get_gc_state(); - int old_state = gcstate->enabled; - gcstate->enabled = 1; - return old_state; -} - -int -PyGC_Disable(void) -{ - GCState *gcstate = get_gc_state(); - int old_state = gcstate->enabled; - gcstate->enabled = 0; - return old_state; -} - -int -PyGC_IsEnabled(void) -{ - GCState *gcstate = get_gc_state(); - return gcstate->enabled; -} - -/* Public API to invoke gc.collect() from C */ -Py_ssize_t -PyGC_Collect(void) -{ - PyThreadState *tstate = _PyThreadState_GET(); - GCState *gcstate = &tstate->interp->gc; - - if (!gcstate->enabled) { - return 0; - } - - Py_ssize_t n; - PyObject *exc = _PyErr_GetRaisedException(tstate); - n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); - _PyErr_SetRaisedException(tstate, exc); - - return n; -} - -Py_ssize_t -_PyGC_CollectNoFail(PyThreadState *tstate) -{ - /* Ideally, this function is only called on interpreter shutdown, - and therefore not recursively. Unfortunately, when there are daemon - threads, a daemon thread can start a cyclic garbage collection - during interpreter shutdown (and then never finish it). - See http://bugs.python.org/issue8713#msg195178 for an example. - */ - return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); -} - -void -_PyGC_DumpShutdownStats(PyInterpreterState *interp) -{ - GCState *gcstate = &interp->gc; - if (!(gcstate->debug & DEBUG_SAVEALL) - && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) { - const char *message; - if (gcstate->debug & DEBUG_UNCOLLECTABLE) - message = "gc: %zd uncollectable objects at " \ - "shutdown"; - else - message = "gc: %zd uncollectable objects at " \ - "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; - /* PyErr_WarnFormat does too many things and we are at shutdown, - the warnings module's dependencies (e.g. linecache) may be gone - already. */ - if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, - "gc", NULL, message, - PyList_GET_SIZE(gcstate->garbage))) - PyErr_WriteUnraisable(NULL); - if (gcstate->debug & DEBUG_UNCOLLECTABLE) { - PyObject *repr = NULL, *bytes = NULL; - repr = PyObject_Repr(gcstate->garbage); - if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) - PyErr_WriteUnraisable(gcstate->garbage); - else { - PySys_WriteStderr( - " %s\n", - PyBytes_AS_STRING(bytes) - ); - } - Py_XDECREF(repr); - Py_XDECREF(bytes); - } - } -} - - -void -_PyGC_Fini(PyInterpreterState *interp) -{ - GCState *gcstate = &interp->gc; - Py_CLEAR(gcstate->garbage); - Py_CLEAR(gcstate->callbacks); - - /* We expect that none of this interpreters objects are shared - with other interpreters. - See https://github.com/python/cpython/issues/90228. */ -} - -/* for debugging */ -void -_PyGC_Dump(PyGC_Head *g) -{ - _PyObject_Dump(FROM_GC(g)); -} - - -#ifdef Py_DEBUG -static int -visit_validate(PyObject *op, void *parent_raw) -{ - PyObject *parent = _PyObject_CAST(parent_raw); - if (_PyObject_IsFreed(op)) { - _PyObject_ASSERT_FAILED_MSG(parent, - "PyObject_GC_Track() object is not valid"); - } - return 0; -} -#endif - - -/* extension modules might be compiled with GC support so these - functions must always be available */ - -void -PyObject_GC_Track(void *op_raw) -{ - PyObject *op = _PyObject_CAST(op_raw); - if (_PyObject_GC_IS_TRACKED(op)) { - _PyObject_ASSERT_FAILED_MSG(op, - "object already tracked " - "by the garbage collector"); - } - _PyObject_GC_TRACK(op); - -#ifdef Py_DEBUG - /* Check that the object is valid: validate objects traversed - by tp_traverse() */ - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void)traverse(op, visit_validate, op); -#endif -} - -void -PyObject_GC_UnTrack(void *op_raw) -{ - PyObject *op = _PyObject_CAST(op_raw); - /* Obscure: the Py_TRASHCAN mechanism requires that we be able to - * call PyObject_GC_UnTrack twice on an object. - */ - if (_PyObject_GC_IS_TRACKED(op)) { - _PyObject_GC_UNTRACK(op); - } -} - -int -PyObject_IS_GC(PyObject *obj) -{ - return _PyObject_IS_GC(obj); -} - -void -_Py_ScheduleGC(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); -} - -void -_PyObject_GC_Link(PyObject *op) -{ - PyGC_Head *g = AS_GC(op); - assert(((uintptr_t)g & (sizeof(uintptr_t)-1)) == 0); // g must be correctly aligned - - PyThreadState *tstate = _PyThreadState_GET(); - GCState *gcstate = &tstate->interp->gc; - g->_gc_next = 0; - g->_gc_prev = 0; - gcstate->generations[0].count++; /* number of allocated GC objects */ - if (gcstate->generations[0].count > gcstate->generations[0].threshold && - gcstate->enabled && - gcstate->generations[0].threshold && - !_Py_atomic_load_int_relaxed(&gcstate->collecting) && - !_PyErr_Occurred(tstate)) - { - _Py_ScheduleGC(tstate->interp); - } -} - -void -_Py_RunGC(PyThreadState *tstate) -{ - gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); -} - -static PyObject * -gc_alloc(size_t basicsize, size_t presize) -{ - PyThreadState *tstate = _PyThreadState_GET(); - if (basicsize > PY_SSIZE_T_MAX - presize) { - return _PyErr_NoMemory(tstate); - } - size_t size = presize + basicsize; - char *mem = PyObject_Malloc(size); - if (mem == NULL) { - return _PyErr_NoMemory(tstate); - } - ((PyObject **)mem)[0] = NULL; - ((PyObject **)mem)[1] = NULL; - PyObject *op = (PyObject *)(mem + presize); - _PyObject_GC_Link(op); - return op; -} - -PyObject * -_PyObject_GC_New(PyTypeObject *tp) -{ - size_t presize = _PyType_PreHeaderSize(tp); - PyObject *op = gc_alloc(_PyObject_SIZE(tp), presize); - if (op == NULL) { - return NULL; - } - _PyObject_Init(op, tp); - return op; -} - -PyVarObject * -_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) -{ - PyVarObject *op; - - if (nitems < 0) { - PyErr_BadInternalCall(); - return NULL; - } - size_t presize = _PyType_PreHeaderSize(tp); - size_t size = _PyObject_VAR_SIZE(tp, nitems); - op = (PyVarObject *)gc_alloc(size, presize); - if (op == NULL) { - return NULL; - } - _PyObject_InitVar(op, tp, nitems); - return op; -} - -PyObject * -PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size) -{ - size_t presize = _PyType_PreHeaderSize(tp); - PyObject *op = gc_alloc(_PyObject_SIZE(tp) + extra_size, presize); - if (op == NULL) { - return NULL; - } - memset(op, 0, _PyObject_SIZE(tp) + extra_size); - _PyObject_Init(op, tp); - return op; -} - -PyVarObject * -_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) -{ - const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); - const size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); - _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op)); - if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) { - return (PyVarObject *)PyErr_NoMemory(); - } - char *mem = (char *)op - presize; - mem = (char *)PyObject_Realloc(mem, presize + basicsize); - if (mem == NULL) { - return (PyVarObject *)PyErr_NoMemory(); - } - op = (PyVarObject *) (mem + presize); - Py_SET_SIZE(op, nitems); - return op; -} - -void -PyObject_GC_Del(void *op) -{ - size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); - PyGC_Head *g = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op)) { - gc_list_remove(g); -#ifdef Py_DEBUG - PyObject *exc = PyErr_GetRaisedException(); - if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, - "gc", NULL, "Object of type %s is not untracked before destruction", - ((PyObject*)op)->ob_type->tp_name)) { - PyErr_WriteUnraisable(NULL); - } - PyErr_SetRaisedException(exc); -#endif - } - GCState *gcstate = get_gc_state(); - if (gcstate->generations[0].count > 0) { - gcstate->generations[0].count--; - } - PyObject_Free(((char *)op)-presize); -} - -int -PyObject_GC_IsTracked(PyObject* obj) -{ - if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) { - return 1; - } - return 0; -} - -int -PyObject_GC_IsFinalized(PyObject *obj) -{ - if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) { - return 1; - } - return 0; -} - -void -PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) -{ - size_t i; - GCState *gcstate = get_gc_state(); - int origenstate = gcstate->enabled; - gcstate->enabled = 0; - for (i = 0; i < NUM_GENERATIONS; i++) { - PyGC_Head *gc_list, *gc; - gc_list = GEN_HEAD(gcstate, i); - for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - Py_INCREF(op); - int res = callback(op, arg); - Py_DECREF(op); - if (!res) { - goto done; - } - } - } -done: - gcstate->enabled = origenstate; -} diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 292bfa76519507..f16a763772e42e 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -207,6 +207,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 1c5a6d623f4dad..7f03cfea1b3e6f 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -166,6 +166,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index be5b34220aa0bc..163adfdc51c6a8 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -566,6 +566,7 @@ $(GeneratedFrozenModulesDir)Python;%(AdditionalIncludeDirectories) + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index a96ca24cf08b66..a45a0881f7113d 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1280,6 +1280,9 @@ Python + + Python + Python diff --git a/Python/gc.c b/Python/gc.c new file mode 100644 index 00000000000000..f47c74f87a9166 --- /dev/null +++ b/Python/gc.c @@ -0,0 +1,1958 @@ +// This implements the reference cycle garbage collector. +// The Python module inteface to the collector is in gcmodule.c. +// See https://devguide.python.org/internals/garbage-collector/ + +#include "Python.h" +#include "pycore_ceval.h" // _Py_set_eval_breaker_bit() +#include "pycore_context.h" +#include "pycore_dict.h" // _PyDict_MaybeUntrack() +#include "pycore_initconfig.h" +#include "pycore_interp.h" // PyInterpreterState.gc +#include "pycore_object.h" +#include "pycore_pyerrors.h" +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_weakref.h" // _PyWeakref_ClearRef() +#include "pydtrace.h" + +typedef struct _gc_runtime_state GCState; + +#ifdef Py_DEBUG +# define GC_DEBUG +#endif + +#define GC_NEXT _PyGCHead_NEXT +#define GC_PREV _PyGCHead_PREV + +// update_refs() set this bit for all objects in current generation. +// subtract_refs() and move_unreachable() uses this to distinguish +// visited object is in GCing or not. +// +// move_unreachable() removes this flag from reachable objects. +// Only unreachable objects have this flag. +// +// No objects in interpreter have this flag after GC ends. +#define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING + +// Lowest bit of _gc_next is used for UNREACHABLE flag. +// +// This flag represents the object is in unreachable list in move_unreachable() +// +// Although this flag is used only in move_unreachable(), move_unreachable() +// doesn't clear this flag to skip unnecessary iteration. +// move_legacy_finalizers() removes this flag instead. +// Between them, unreachable list is not normal list and we can not use +// most gc_list_* functions for it. +#define NEXT_MASK_UNREACHABLE (1) + +#define AS_GC(op) _Py_AS_GC(op) +#define FROM_GC(gc) _Py_FROM_GC(gc) + +// Automatically choose the generation that needs collecting. +#define GENERATION_AUTO (-1) + +static inline int +gc_is_collecting(PyGC_Head *g) +{ + return (g->_gc_prev & PREV_MASK_COLLECTING) != 0; +} + +static inline void +gc_clear_collecting(PyGC_Head *g) +{ + g->_gc_prev &= ~PREV_MASK_COLLECTING; +} + +static inline Py_ssize_t +gc_get_refs(PyGC_Head *g) +{ + return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT); +} + +static inline void +gc_set_refs(PyGC_Head *g, Py_ssize_t refs) +{ + g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK) + | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); +} + +static inline void +gc_reset_refs(PyGC_Head *g, Py_ssize_t refs) +{ + g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED) + | PREV_MASK_COLLECTING + | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); +} + +static inline void +gc_decref(PyGC_Head *g) +{ + _PyObject_ASSERT_WITH_MSG(FROM_GC(g), + gc_get_refs(g) > 0, + "refcount is too small"); + g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; +} + + +#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head) + + +static GCState * +get_gc_state(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return &interp->gc; +} + + +void +_PyGC_InitState(GCState *gcstate) +{ +#define INIT_HEAD(GEN) \ + do { \ + GEN.head._gc_next = (uintptr_t)&GEN.head; \ + GEN.head._gc_prev = (uintptr_t)&GEN.head; \ + } while (0) + + for (int i = 0; i < NUM_GENERATIONS; i++) { + assert(gcstate->generations[i].count == 0); + INIT_HEAD(gcstate->generations[i]); + }; + gcstate->generation0 = GEN_HEAD(gcstate, 0); + INIT_HEAD(gcstate->permanent_generation); + +#undef INIT_HEAD +} + + +PyStatus +_PyGC_Init(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + + gcstate->garbage = PyList_New(0); + if (gcstate->garbage == NULL) { + return _PyStatus_NO_MEMORY(); + } + + gcstate->callbacks = PyList_New(0); + if (gcstate->callbacks == NULL) { + return _PyStatus_NO_MEMORY(); + } + + return _PyStatus_OK(); +} + + +/* +_gc_prev values +--------------- + +Between collections, _gc_prev is used for doubly linked list. + +Lowest two bits of _gc_prev are used for flags. +PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends +or _PyObject_GC_UNTRACK() is called. + +During a collection, _gc_prev is temporary used for gc_refs, and the gc list +is singly linked until _gc_prev is restored. + +gc_refs + At the start of a collection, update_refs() copies the true refcount + to gc_refs, for each object in the generation being collected. + subtract_refs() then adjusts gc_refs so that it equals the number of + times an object is referenced directly from outside the generation + being collected. + +PREV_MASK_COLLECTING + Objects in generation being collected are marked PREV_MASK_COLLECTING in + update_refs(). + + +_gc_next values +--------------- + +_gc_next takes these values: + +0 + The object is not tracked + +!= 0 + Pointer to the next object in the GC list. + Additionally, lowest bit is used temporary for + NEXT_MASK_UNREACHABLE flag described below. + +NEXT_MASK_UNREACHABLE + move_unreachable() then moves objects not reachable (whether directly or + indirectly) from outside the generation into an "unreachable" set and + set this flag. + + Objects that are found to be reachable have gc_refs set to 1. + When this flag is set for the reachable object, the object must be in + "unreachable" set. + The flag is unset and the object is moved back to "reachable" set. + + move_legacy_finalizers() will remove this flag from "unreachable" set. +*/ + +/*** list functions ***/ + +static inline void +gc_list_init(PyGC_Head *list) +{ + // List header must not have flags. + // We can assign pointer by simple cast. + list->_gc_prev = (uintptr_t)list; + list->_gc_next = (uintptr_t)list; +} + +static inline int +gc_list_is_empty(PyGC_Head *list) +{ + return (list->_gc_next == (uintptr_t)list); +} + +/* Append `node` to `list`. */ +static inline void +gc_list_append(PyGC_Head *node, PyGC_Head *list) +{ + PyGC_Head *last = (PyGC_Head *)list->_gc_prev; + + // last <-> node + _PyGCHead_SET_PREV(node, last); + _PyGCHead_SET_NEXT(last, node); + + // node <-> list + _PyGCHead_SET_NEXT(node, list); + list->_gc_prev = (uintptr_t)node; +} + +/* Remove `node` from the gc list it's currently in. */ +static inline void +gc_list_remove(PyGC_Head *node) +{ + PyGC_Head *prev = GC_PREV(node); + PyGC_Head *next = GC_NEXT(node); + + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + + node->_gc_next = 0; /* object is not currently tracked */ +} + +/* Move `node` from the gc list it's currently in (which is not explicitly + * named here) to the end of `list`. This is semantically the same as + * gc_list_remove(node) followed by gc_list_append(node, list). + */ +static void +gc_list_move(PyGC_Head *node, PyGC_Head *list) +{ + /* Unlink from current list. */ + PyGC_Head *from_prev = GC_PREV(node); + PyGC_Head *from_next = GC_NEXT(node); + _PyGCHead_SET_NEXT(from_prev, from_next); + _PyGCHead_SET_PREV(from_next, from_prev); + + /* Relink at end of new list. */ + // list must not have flags. So we can skip macros. + PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev; + _PyGCHead_SET_PREV(node, to_prev); + _PyGCHead_SET_NEXT(to_prev, node); + list->_gc_prev = (uintptr_t)node; + _PyGCHead_SET_NEXT(node, list); +} + +/* append list `from` onto list `to`; `from` becomes an empty list */ +static void +gc_list_merge(PyGC_Head *from, PyGC_Head *to) +{ + assert(from != to); + if (!gc_list_is_empty(from)) { + PyGC_Head *to_tail = GC_PREV(to); + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); + assert(from_head != from); + assert(from_tail != from); + + _PyGCHead_SET_NEXT(to_tail, from_head); + _PyGCHead_SET_PREV(from_head, to_tail); + + _PyGCHead_SET_NEXT(from_tail, to); + _PyGCHead_SET_PREV(to, from_tail); + } + gc_list_init(from); +} + +static Py_ssize_t +gc_list_size(PyGC_Head *list) +{ + PyGC_Head *gc; + Py_ssize_t n = 0; + for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { + n++; + } + return n; +} + +/* Walk the list and mark all objects as non-collecting */ +static inline void +gc_list_clear_collecting(PyGC_Head *collectable) +{ + PyGC_Head *gc; + for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { + gc_clear_collecting(gc); + } +} + +/* Append objects in a GC list to a Python list. + * Return 0 if all OK, < 0 if error (out of memory for list) + */ +static int +append_objects(PyObject *py_list, PyGC_Head *gc_list) +{ + PyGC_Head *gc; + for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + if (op != py_list) { + if (PyList_Append(py_list, op)) { + return -1; /* exception */ + } + } + } + return 0; +} + +// Constants for validate_list's flags argument. +enum flagstates {collecting_clear_unreachable_clear, + collecting_clear_unreachable_set, + collecting_set_unreachable_clear, + collecting_set_unreachable_set}; + +#ifdef GC_DEBUG +// validate_list checks list consistency. And it works as document +// describing when flags are expected to be set / unset. +// `head` must be a doubly-linked gc list, although it's fine (expected!) if +// the prev and next pointers are "polluted" with flags. +// What's checked: +// - The `head` pointers are not polluted. +// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all +// `set or clear, as specified by the 'flags' argument. +// - The prev and next pointers are mutually consistent. +static void +validate_list(PyGC_Head *head, enum flagstates flags) +{ + assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0); + assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0); + uintptr_t prev_value = 0, next_value = 0; + switch (flags) { + case collecting_clear_unreachable_clear: + break; + case collecting_set_unreachable_clear: + prev_value = PREV_MASK_COLLECTING; + break; + case collecting_clear_unreachable_set: + next_value = NEXT_MASK_UNREACHABLE; + break; + case collecting_set_unreachable_set: + prev_value = PREV_MASK_COLLECTING; + next_value = NEXT_MASK_UNREACHABLE; + break; + default: + assert(! "bad internal flags argument"); + } + PyGC_Head *prev = head; + PyGC_Head *gc = GC_NEXT(head); + while (gc != head) { + PyGC_Head *trueprev = GC_PREV(gc); + PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); + assert(truenext != NULL); + assert(trueprev == prev); + assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value); + assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value); + prev = gc; + gc = truenext; + } + assert(prev == GC_PREV(head)); +} +#else +#define validate_list(x, y) do{}while(0) +#endif + +/*** end of list stuff ***/ + + +/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and + * PREV_MASK_COLLECTING bit is set for all objects in containers. + */ +static void +update_refs(PyGC_Head *containers) +{ + PyGC_Head *next; + PyGC_Head *gc = GC_NEXT(containers); + + while (gc != containers) { + next = GC_NEXT(gc); + /* Move any object that might have become immortal to the + * permanent generation as the reference count is not accurately + * reflecting the actual number of live references to this object + */ + if (_Py_IsImmortal(FROM_GC(gc))) { + gc_list_move(gc, &get_gc_state()->permanent_generation.head); + gc = next; + continue; + } + gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); + /* Python's cyclic gc should never see an incoming refcount + * of 0: if something decref'ed to 0, it should have been + * deallocated immediately at that time. + * Possible cause (if the assert triggers): a tp_dealloc + * routine left a gc-aware object tracked during its teardown + * phase, and did something-- or allowed something to happen -- + * that called back into Python. gc can trigger then, and may + * see the still-tracked dying object. Before this assert + * was added, such mistakes went on to allow gc to try to + * delete the object again. In a debug build, that caused + * a mysterious segfault, when _Py_ForgetReference tried + * to remove the object from the doubly-linked list of all + * objects a second time. In a release build, an actual + * double deallocation occurred, which leads to corruption + * of the allocator's internal bookkeeping pointers. That's + * so serious that maybe this should be a release-build + * check instead of an assert? + */ + _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); + gc = next; + } +} + +/* A traversal callback for subtract_refs. */ +static int +visit_decref(PyObject *op, void *parent) +{ + OBJECT_STAT_INC(object_visits); + _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op)); + + if (_PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + /* We're only interested in gc_refs for objects in the + * generation being collected, which can be recognized + * because only they have positive gc_refs. + */ + if (gc_is_collecting(gc)) { + gc_decref(gc); + } + } + return 0; +} + +/* Subtract internal references from gc_refs. After this, gc_refs is >= 0 + * for all objects in containers, and is GC_REACHABLE for all tracked gc + * objects not in containers. The ones with gc_refs > 0 are directly + * reachable from outside containers, and so can't be collected. + */ +static void +subtract_refs(PyGC_Head *containers) +{ + traverseproc traverse; + PyGC_Head *gc = GC_NEXT(containers); + for (; gc != containers; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_decref, + op); + } +} + +/* A traversal callback for move_unreachable. */ +static int +visit_reachable(PyObject *op, void *arg) +{ + PyGC_Head *reachable = arg; + OBJECT_STAT_INC(object_visits); + if (!_PyObject_IS_GC(op)) { + return 0; + } + + PyGC_Head *gc = AS_GC(op); + const Py_ssize_t gc_refs = gc_get_refs(gc); + + // Ignore objects in other generation. + // This also skips objects "to the left" of the current position in + // move_unreachable's scan of the 'young' list - they've already been + // traversed, and no longer have the PREV_MASK_COLLECTING flag. + if (! gc_is_collecting(gc)) { + return 0; + } + // It would be a logic error elsewhere if the collecting flag were set on + // an untracked object. + assert(gc->_gc_next != 0); + + if (gc->_gc_next & NEXT_MASK_UNREACHABLE) { + /* This had gc_refs = 0 when move_unreachable got + * to it, but turns out it's reachable after all. + * Move it back to move_unreachable's 'young' list, + * and move_unreachable will eventually get to it + * again. + */ + // Manually unlink gc from unreachable list because the list functions + // don't work right in the presence of NEXT_MASK_UNREACHABLE flags. + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); + _PyObject_ASSERT(FROM_GC(prev), + prev->_gc_next & NEXT_MASK_UNREACHABLE); + _PyObject_ASSERT(FROM_GC(next), + next->_gc_next & NEXT_MASK_UNREACHABLE); + prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE + _PyGCHead_SET_PREV(next, prev); + + gc_list_append(gc, reachable); + gc_set_refs(gc, 1); + } + else if (gc_refs == 0) { + /* This is in move_unreachable's 'young' list, but + * the traversal hasn't yet gotten to it. All + * we need to do is tell move_unreachable that it's + * reachable. + */ + gc_set_refs(gc, 1); + } + /* Else there's nothing to do. + * If gc_refs > 0, it must be in move_unreachable's 'young' + * list, and move_unreachable will eventually get to it. + */ + else { + _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small"); + } + return 0; +} + +/* Move the unreachable objects from young to unreachable. After this, + * all objects in young don't have PREV_MASK_COLLECTING flag and + * unreachable have the flag. + * All objects in young after this are directly or indirectly reachable + * from outside the original young; and all objects in unreachable are + * not. + * + * This function restores _gc_prev pointer. young and unreachable are + * doubly linked list after this function. + * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag. + * So we can not gc_list_* functions for unreachable until we remove the flag. + */ +static void +move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) +{ + // previous elem in the young list, used for restore gc_prev. + PyGC_Head *prev = young; + PyGC_Head *gc = GC_NEXT(young); + + /* Invariants: all objects "to the left" of us in young are reachable + * (directly or indirectly) from outside the young list as it was at entry. + * + * All other objects from the original young "to the left" of us are in + * unreachable now, and have NEXT_MASK_UNREACHABLE. All objects to the + * left of us in 'young' now have been scanned, and no objects here + * or to the right have been scanned yet. + */ + + while (gc != young) { + if (gc_get_refs(gc)) { + /* gc is definitely reachable from outside the + * original 'young'. Mark it as such, and traverse + * its pointers to find any other objects that may + * be directly reachable from it. Note that the + * call to tp_traverse may append objects to young, + * so we have to wait until it returns to determine + * the next object to visit. + */ + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0, + "refcount is too small"); + // NOTE: visit_reachable may change gc->_gc_next when + // young->_gc_prev == gc. Don't do gc = GC_NEXT(gc) before! + (void) traverse(op, + visit_reachable, + (void *)young); + // relink gc_prev to prev element. + _PyGCHead_SET_PREV(gc, prev); + // gc is not COLLECTING state after here. + gc_clear_collecting(gc); + prev = gc; + } + else { + /* This *may* be unreachable. To make progress, + * assume it is. gc isn't directly reachable from + * any object we've already traversed, but may be + * reachable from an object we haven't gotten to yet. + * visit_reachable will eventually move gc back into + * young if that's so, and we'll see it again. + */ + // Move gc to unreachable. + // No need to gc->next->prev = prev because it is single linked. + prev->_gc_next = gc->_gc_next; + + // We can't use gc_list_append() here because we use + // NEXT_MASK_UNREACHABLE here. + PyGC_Head *last = GC_PREV(unreachable); + // NOTE: Since all objects in unreachable set has + // NEXT_MASK_UNREACHABLE flag, we set it unconditionally. + // But this may pollute the unreachable list head's 'next' pointer + // too. That's semantically senseless but expedient here - the + // damage is repaired when this function ends. + last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); + _PyGCHead_SET_PREV(gc, last); + gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); + unreachable->_gc_prev = (uintptr_t)gc; + } + gc = (PyGC_Head*)prev->_gc_next; + } + // young->_gc_prev must be last element remained in the list. + young->_gc_prev = (uintptr_t)prev; + // don't let the pollution of the list head's next pointer leak + unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; +} + +static void +untrack_tuples(PyGC_Head *head) +{ + PyGC_Head *next, *gc = GC_NEXT(head); + while (gc != head) { + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + if (PyTuple_CheckExact(op)) { + _PyTuple_MaybeUntrack(op); + } + gc = next; + } +} + +/* Try to untrack all currently tracked dictionaries */ +static void +untrack_dicts(PyGC_Head *head) +{ + PyGC_Head *next, *gc = GC_NEXT(head); + while (gc != head) { + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + if (PyDict_CheckExact(op)) { + _PyDict_MaybeUntrack(op); + } + gc = next; + } +} + +/* Return true if object has a pre-PEP 442 finalization method. */ +static int +has_legacy_finalizer(PyObject *op) +{ + return Py_TYPE(op)->tp_del != NULL; +} + +/* Move the objects in unreachable with tp_del slots into `finalizers`. + * + * This function also removes NEXT_MASK_UNREACHABLE flag + * from _gc_next in unreachable. + */ +static void +move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) +{ + PyGC_Head *gc, *next; + assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); + + /* March over unreachable. Move objects with finalizers into + * `finalizers`. + */ + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyObject *op = FROM_GC(gc); + + _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); + gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; + + if (has_legacy_finalizer(op)) { + gc_clear_collecting(gc); + gc_list_move(gc, finalizers); + } + } +} + +static inline void +clear_unreachable_mask(PyGC_Head *unreachable) +{ + /* Check that the list head does not have the unreachable bit set */ + assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); + + PyGC_Head *gc, *next; + assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); + gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; + } + validate_list(unreachable, collecting_set_unreachable_clear); +} + +/* A traversal callback for move_legacy_finalizer_reachable. */ +static int +visit_move(PyObject *op, void *arg) +{ + PyGC_Head *tolist = arg; + OBJECT_STAT_INC(object_visits); + if (_PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (gc_is_collecting(gc)) { + gc_list_move(gc, tolist); + gc_clear_collecting(gc); + } + } + return 0; +} + +/* Move objects that are reachable from finalizers, from the unreachable set + * into finalizers set. + */ +static void +move_legacy_finalizer_reachable(PyGC_Head *finalizers) +{ + traverseproc traverse; + PyGC_Head *gc = GC_NEXT(finalizers); + for (; gc != finalizers; gc = GC_NEXT(gc)) { + /* Note that the finalizers list may grow during this. */ + traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; + (void) traverse(FROM_GC(gc), + visit_move, + (void *)finalizers); + } +} + +/* Clear all weakrefs to unreachable objects, and if such a weakref has a + * callback, invoke it if necessary. Note that it's possible for such + * weakrefs to be outside the unreachable set -- indeed, those are precisely + * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for + * overview & some details. Some weakrefs with callbacks may be reclaimed + * directly by this routine; the number reclaimed is the return value. Other + * weakrefs with callbacks may be moved into the `old` generation. Objects + * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in + * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, + * no object in `unreachable` is weakly referenced anymore. + */ +static int +handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +{ + PyGC_Head *gc; + PyObject *op; /* generally FROM_GC(gc) */ + PyWeakReference *wr; /* generally a cast of op */ + PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ + PyGC_Head *next; + int num_freed = 0; + + gc_list_init(&wrcb_to_call); + + /* Clear all weakrefs to the objects in unreachable. If such a weakref + * also has a callback, move it into `wrcb_to_call` if the callback + * needs to be invoked. Note that we cannot invoke any callbacks until + * all weakrefs to unreachable objects are cleared, lest the callback + * resurrect an unreachable object via a still-active weakref. We + * make another pass over wrcb_to_call, invoking callbacks, after this + * pass completes. + */ + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyWeakref_Check(op)) { + /* A weakref inside the unreachable set must be cleared. If we + * allow its callback to execute inside delete_garbage(), it + * could expose objects that have tp_clear already called on + * them. Or, it could resurrect unreachable objects. One way + * this can happen is if some container objects do not implement + * tp_traverse. Then, wr_object can be outside the unreachable + * set but can be deallocated as a result of breaking the + * reference cycle. If we don't clear the weakref, the callback + * will run and potentially cause a crash. See bpo-38006 for + * one example. + */ + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + /* It supports weakrefs. Does it have any? + * + * This is never triggered for static types so we can avoid the + * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + */ + wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs, and move the weakrefs with callbacks + * that must be called into wrcb_to_call. + */ + for (wr = *wrlist; wr != NULL; wr = *wrlist) { + PyGC_Head *wrasgc; /* AS_GC(wr) */ + + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + if (wr->wr_callback == NULL) { + /* no callback */ + continue; + } + + /* Headache time. `op` is going away, and is weakly referenced by + * `wr`, which has a callback. Should the callback be invoked? If wr + * is also trash, no: + * + * 1. There's no need to call it. The object and the weakref are + * both going away, so it's legitimate to pretend the weakref is + * going away first. The user has to ensure a weakref outlives its + * referent if they want a guarantee that the wr callback will get + * invoked. + * + * 2. It may be catastrophic to call it. If the callback is also in + * cyclic trash (CT), then although the CT is unreachable from + * outside the current generation, CT may be reachable from the + * callback. Then the callback could resurrect insane objects. + * + * Since the callback is never needed and may be unsafe in this case, + * wr is simply left in the unreachable set. Note that because we + * already called _PyWeakref_ClearRef(wr), its callback will never + * trigger. + * + * OTOH, if wr isn't part of CT, we should invoke the callback: the + * weakref outlived the trash. Note that since wr isn't CT in this + * case, its callback can't be CT either -- wr acted as an external + * root to this generation, and therefore its callback did too. So + * nothing in CT is reachable from the callback either, so it's hard + * to imagine how calling it later could create a problem for us. wr + * is moved to wrcb_to_call in this case. + */ + if (gc_is_collecting(AS_GC((PyObject *)wr))) { + /* it should already have been cleared above */ + assert(wr->wr_object == Py_None); + continue; + } + + /* Create a new reference so that wr can't go away + * before we can process it again. + */ + Py_INCREF(wr); + + /* Move wr to wrcb_to_call, for the next pass. */ + wrasgc = AS_GC((PyObject *)wr); + assert(wrasgc != next); /* wrasgc is reachable, but + next isn't, so they can't + be the same */ + gc_list_move(wrasgc, &wrcb_to_call); + } + } + + /* Invoke the callbacks we decided to honor. It's safe to invoke them + * because they can't reference unreachable objects. + */ + while (! gc_list_is_empty(&wrcb_to_call)) { + PyObject *temp; + PyObject *callback; + + gc = (PyGC_Head*)wrcb_to_call._gc_next; + op = FROM_GC(gc); + _PyObject_ASSERT(op, PyWeakref_Check(op)); + wr = (PyWeakReference *)op; + callback = wr->wr_callback; + _PyObject_ASSERT(op, callback != NULL); + + /* copy-paste of weakrefobject.c's handle_callback() */ + temp = PyObject_CallOneArg(callback, (PyObject *)wr); + if (temp == NULL) { + PyErr_WriteUnraisable(callback); + } + else { + Py_DECREF(temp); + } + + /* Give up the reference we created in the first pass. When + * op's refcount hits 0 (which it may or may not do right now), + * op's tp_dealloc will decref op->wr_callback too. Note + * that the refcount probably will hit 0 now, and because this + * weakref was reachable to begin with, gc didn't already + * add it to its count of freed objects. Example: a reachable + * weak value dict maps some key to this reachable weakref. + * The callback removes this key->weakref mapping from the + * dict, leaving no other references to the weakref (excepting + * ours). + */ + Py_DECREF(op); + if (wrcb_to_call._gc_next == (uintptr_t)gc) { + /* object is still alive -- move it */ + gc_list_move(gc, old); + } + else { + ++num_freed; + } + } + + return num_freed; +} + +static void +debug_cycle(const char *msg, PyObject *op) +{ + PySys_FormatStderr("gc: %s <%s %p>\n", + msg, Py_TYPE(op)->tp_name, op); +} + +/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable + * only from such cycles). + * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module + * garbage list (a Python list), else only the objects in finalizers with + * __del__ methods are appended to garbage. All objects in finalizers are + * merged into the old list regardless. + */ +static void +handle_legacy_finalizers(PyThreadState *tstate, + GCState *gcstate, + PyGC_Head *finalizers, PyGC_Head *old) +{ + assert(!_PyErr_Occurred(tstate)); + assert(gcstate->garbage != NULL); + + PyGC_Head *gc = GC_NEXT(finalizers); + for (; gc != finalizers; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + + if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) { + if (PyList_Append(gcstate->garbage, op) < 0) { + _PyErr_Clear(tstate); + break; + } + } + } + + gc_list_merge(finalizers, old); +} + +/* Run first-time finalizers (if any) on all the objects in collectable. + * Note that this may remove some (or even all) of the objects from the + * list, due to refcounts falling to 0. + */ +static void +finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) +{ + destructor finalize; + PyGC_Head seen; + + /* While we're going through the loop, `finalize(op)` may cause op, or + * other objects, to be reclaimed via refcounts falling to zero. So + * there's little we can rely on about the structure of the input + * `collectable` list across iterations. For safety, we always take the + * first object in that list and move it to a temporary `seen` list. + * If objects vanish from the `collectable` and `seen` lists we don't + * care. + */ + gc_list_init(&seen); + + while (!gc_list_is_empty(collectable)) { + PyGC_Head *gc = GC_NEXT(collectable); + PyObject *op = FROM_GC(gc); + gc_list_move(gc, &seen); + if (!_PyGCHead_FINALIZED(gc) && + (finalize = Py_TYPE(op)->tp_finalize) != NULL) + { + _PyGCHead_SET_FINALIZED(gc); + Py_INCREF(op); + finalize(op); + assert(!_PyErr_Occurred(tstate)); + Py_DECREF(op); + } + } + gc_list_merge(&seen, collectable); +} + +/* Break reference cycles by clearing the containers involved. This is + * tricky business as the lists can be changing and we don't know which + * objects may be freed. It is possible I screwed something up here. + */ +static void +delete_garbage(PyThreadState *tstate, GCState *gcstate, + PyGC_Head *collectable, PyGC_Head *old) +{ + assert(!_PyErr_Occurred(tstate)); + + while (!gc_list_is_empty(collectable)) { + PyGC_Head *gc = GC_NEXT(collectable); + PyObject *op = FROM_GC(gc); + + _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0, + "refcount is too small"); + + if (gcstate->debug & _PyGC_DEBUG_SAVEALL) { + assert(gcstate->garbage != NULL); + if (PyList_Append(gcstate->garbage, op) < 0) { + _PyErr_Clear(tstate); + } + } + else { + inquiry clear; + if ((clear = Py_TYPE(op)->tp_clear) != NULL) { + Py_INCREF(op); + (void) clear(op); + if (_PyErr_Occurred(tstate)) { + PyErr_FormatUnraisable("Exception ignored in tp_clear of %s", + Py_TYPE(op)->tp_name); + } + Py_DECREF(op); + } + } + if (GC_NEXT(collectable) == gc) { + /* object is still alive, move it, it may die later */ + gc_clear_collecting(gc); + gc_list_move(gc, old); + } + } +} + +/* Clear all free lists + * All free lists are cleared during the collection of the highest generation. + * Allocated items in the free list may keep a pymalloc arena occupied. + * Clearing the free lists may give back memory to the OS earlier. + */ +static void +clear_freelists(PyInterpreterState *interp) +{ + _PyTuple_ClearFreeList(interp); + _PyFloat_ClearFreeList(interp); + _PyList_ClearFreeList(interp); + _PyDict_ClearFreeList(interp); + _PyAsyncGen_ClearFreeLists(interp); + _PyContext_ClearFreeList(interp); +} + +// Show stats for objects in each generations +static void +show_stats_each_generations(GCState *gcstate) +{ + char buf[100]; + size_t pos = 0; + + for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) { + pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos, + " %zd", + gc_list_size(GEN_HEAD(gcstate, i))); + } + + PySys_FormatStderr( + "gc: objects in each generation:%s\n" + "gc: objects in permanent generation: %zd\n", + buf, gc_list_size(&gcstate->permanent_generation.head)); +} + +/* Deduce which objects among "base" are unreachable from outside the list + and move them to 'unreachable'. The process consist in the following steps: + +1. Copy all reference counts to a different field (gc_prev is used to hold + this copy to save memory). +2. Traverse all objects in "base" and visit all referred objects using + "tp_traverse" and for every visited object, subtract 1 to the reference + count (the one that we copied in the previous step). After this step, all + objects that can be reached directly from outside must have strictly positive + reference count, while all unreachable objects must have a count of exactly 0. +3. Identify all unreachable objects (the ones with 0 reference count) and move + them to the "unreachable" list. This step also needs to move back to "base" all + objects that were initially marked as unreachable but are referred transitively + by the reachable objects (the ones with strictly positive reference count). + +Contracts: + + * The "base" has to be a valid list with no mask set. + + * The "unreachable" list must be uninitialized (this function calls + gc_list_init over 'unreachable'). + +IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE +flag set but it does not clear it to skip unnecessary iteration. Before the +flag is cleared (for example, by using 'clear_unreachable_mask' function or +by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal +list and we can not use most gc_list_* functions for it. */ +static inline void +deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { + validate_list(base, collecting_clear_unreachable_clear); + /* Using ob_refcnt and gc_refs, calculate which objects in the + * container set are reachable from outside the set (i.e., have a + * refcount greater than 0 when all the references within the + * set are taken into account). + */ + update_refs(base); // gc_prev is used for gc_refs + subtract_refs(base); + + /* Leave everything reachable from outside base in base, and move + * everything else (in base) to unreachable. + * + * NOTE: This used to move the reachable objects into a reachable + * set instead. But most things usually turn out to be reachable, + * so it's more efficient to move the unreachable things. It "sounds slick" + * to move the unreachable objects, until you think about it - the reason it + * pays isn't actually obvious. + * + * Suppose we create objects A, B, C in that order. They appear in the young + * generation in the same order. If B points to A, and C to B, and C is + * reachable from outside, then the adjusted refcounts will be 0, 0, and 1 + * respectively. + * + * When move_unreachable finds A, A is moved to the unreachable list. The + * same for B when it's first encountered. Then C is traversed, B is moved + * _back_ to the reachable list. B is eventually traversed, and then A is + * moved back to the reachable list. + * + * So instead of not moving at all, the reachable objects B and A are moved + * twice each. Why is this a win? A straightforward algorithm to move the + * reachable objects instead would move A, B, and C once each. + * + * The key is that this dance leaves the objects in order C, B, A - it's + * reversed from the original order. On all _subsequent_ scans, none of + * them will move. Since most objects aren't in cycles, this can save an + * unbounded number of moves across an unbounded number of later collections. + * It can cost more only the first time the chain is scanned. + * + * Drawback: move_unreachable is also used to find out what's still trash + * after finalizers may resurrect objects. In _that_ case most unreachable + * objects will remain unreachable, so it would be more efficient to move + * the reachable objects instead. But this is a one-time cost, probably not + * worth complicating the code to speed just a little. + */ + gc_list_init(unreachable); + move_unreachable(base, unreachable); // gc_prev is pointer again + validate_list(base, collecting_clear_unreachable_clear); + validate_list(unreachable, collecting_set_unreachable_set); +} + +/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving + them to 'old_generation' and placing the rest on 'still_unreachable'. + + Contracts: + * After this function 'unreachable' must not be used anymore and 'still_unreachable' + will contain the objects that did not resurrect. + + * The "still_unreachable" list must be uninitialized (this function calls + gc_list_init over 'still_unreachable'). + +IMPORTANT: After a call to this function, the 'still_unreachable' set will have the +PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so +we can skip the expense of clearing the flag to avoid extra iteration. */ +static inline void +handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, + PyGC_Head *old_generation) +{ + // Remove the PREV_MASK_COLLECTING from unreachable + // to prepare it for a new call to 'deduce_unreachable' + gc_list_clear_collecting(unreachable); + + // After the call to deduce_unreachable, the 'still_unreachable' set will + // have the PREV_MARK_COLLECTING set, but the objects are going to be + // removed so we can skip the expense of clearing the flag. + PyGC_Head* resurrected = unreachable; + deduce_unreachable(resurrected, still_unreachable); + clear_unreachable_mask(still_unreachable); + + // Move the resurrected objects to the old generation for future collection. + gc_list_merge(resurrected, old_generation); +} + + +/* Invoke progress callbacks to notify clients that garbage collection + * is starting or stopping + */ +static void +invoke_gc_callback(PyThreadState *tstate, const char *phase, + int generation, Py_ssize_t collected, + Py_ssize_t uncollectable) +{ + assert(!_PyErr_Occurred(tstate)); + + /* we may get called very early */ + GCState *gcstate = &tstate->interp->gc; + if (gcstate->callbacks == NULL) { + return; + } + + /* The local variable cannot be rebound, check it for sanity */ + assert(PyList_CheckExact(gcstate->callbacks)); + PyObject *info = NULL; + if (PyList_GET_SIZE(gcstate->callbacks) != 0) { + info = Py_BuildValue("{sisnsn}", + "generation", generation, + "collected", collected, + "uncollectable", uncollectable); + if (info == NULL) { + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + } + + PyObject *phase_obj = PyUnicode_FromString(phase); + if (phase_obj == NULL) { + Py_XDECREF(info); + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + + PyObject *stack[] = {phase_obj, info}; + for (Py_ssize_t i=0; icallbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); + Py_INCREF(cb); /* make sure cb doesn't go away */ + r = PyObject_Vectorcall(cb, stack, 2, NULL); + if (r == NULL) { + PyErr_WriteUnraisable(cb); + } + else { + Py_DECREF(r); + } + Py_DECREF(cb); + } + Py_DECREF(phase_obj); + Py_XDECREF(info); + assert(!_PyErr_Occurred(tstate)); +} + + +/* Find the oldest generation (highest numbered) where the count + * exceeds the threshold. Objects in the that generation and + * generations younger than it will be collected. */ +static int +gc_select_generation(GCState *gcstate) +{ + for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + if (gcstate->generations[i].count > gcstate->generations[i].threshold) { + /* Avoid quadratic performance degradation in number + of tracked objects (see also issue #4074): + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + + long_lived_pending / long_lived_total + + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html + */ + if (i == NUM_GENERATIONS - 1 + && gcstate->long_lived_pending < gcstate->long_lived_total / 4) + { + continue; + } + return i; + } + } + return -1; +} + + +/* This is the main function. Read this to understand how the + * collection process works. */ +static Py_ssize_t +gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) +{ + int i; + Py_ssize_t m = 0; /* # objects collected */ + Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ + PyGC_Head *young; /* the generation we are examining */ + PyGC_Head *old; /* next older generation */ + PyGC_Head unreachable; /* non-problematic unreachable trash */ + PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ + PyGC_Head *gc; + _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ + GCState *gcstate = &tstate->interp->gc; + + // gc_collect_main() must not be called before _PyGC_Init + // or after _PyGC_Fini() + assert(gcstate->garbage != NULL); + assert(!_PyErr_Occurred(tstate)); + + int expected = 0; + if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { + // Don't start a garbage collection if one is already in progress. + return 0; + } + + if (generation == GENERATION_AUTO) { + // Select the oldest generation that needs collecting. We will collect + // objects from that generation and all generations younger than it. + generation = gc_select_generation(gcstate); + if (generation < 0) { + // No generation needs to be collected. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; + } + } + + assert(generation >= 0 && generation < NUM_GENERATIONS); + +#ifdef Py_STATS + if (_Py_stats) { + _Py_stats->object_stats.object_visits = 0; + } +#endif + GC_STAT_ADD(generation, collections, 1); + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "start", generation, 0, 0); + } + + if (gcstate->debug & _PyGC_DEBUG_STATS) { + PySys_WriteStderr("gc: collecting generation %d...\n", generation); + show_stats_each_generations(gcstate); + t1 = _PyTime_GetPerfCounter(); + } + + if (PyDTrace_GC_START_ENABLED()) { + PyDTrace_GC_START(generation); + } + + /* update collection and allocation counters */ + if (generation+1 < NUM_GENERATIONS) { + gcstate->generations[generation+1].count += 1; + } + for (i = 0; i <= generation; i++) { + gcstate->generations[i].count = 0; + } + + /* merge younger generations with one we are currently collecting */ + for (i = 0; i < generation; i++) { + gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation)); + } + + /* handy references */ + young = GEN_HEAD(gcstate, generation); + if (generation < NUM_GENERATIONS-1) { + old = GEN_HEAD(gcstate, generation+1); + } + else { + old = young; + } + validate_list(old, collecting_clear_unreachable_clear); + + deduce_unreachable(young, &unreachable); + + untrack_tuples(young); + /* Move reachable objects to next generation. */ + if (young != old) { + if (generation == NUM_GENERATIONS - 2) { + gcstate->long_lived_pending += gc_list_size(young); + } + gc_list_merge(young, old); + } + else { + /* We only un-track dicts in full collections, to avoid quadratic + dict build-up. See issue #14775. */ + untrack_dicts(young); + gcstate->long_lived_pending = 0; + gcstate->long_lived_total = gc_list_size(young); + } + + /* All objects in unreachable are trash, but objects reachable from + * legacy finalizers (e.g. tp_del) can't safely be deleted. + */ + gc_list_init(&finalizers); + // NEXT_MASK_UNREACHABLE is cleared here. + // After move_legacy_finalizers(), unreachable is normal list. + move_legacy_finalizers(&unreachable, &finalizers); + /* finalizers contains the unreachable objects with a legacy finalizer; + * unreachable objects reachable *from* those are also uncollectable, + * and we move those into the finalizers list too. + */ + move_legacy_finalizer_reachable(&finalizers); + + validate_list(&finalizers, collecting_clear_unreachable_clear); + validate_list(&unreachable, collecting_set_unreachable_clear); + + /* Print debugging information. */ + if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { + for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { + debug_cycle("collectable", FROM_GC(gc)); + } + } + + /* Clear weakrefs and invoke callbacks as necessary. */ + m += handle_weakrefs(&unreachable, old); + + validate_list(old, collecting_clear_unreachable_clear); + validate_list(&unreachable, collecting_set_unreachable_clear); + + /* Call tp_finalize on objects which have one. */ + finalize_garbage(tstate, &unreachable); + + /* Handle any objects that may have resurrected after the call + * to 'finalize_garbage' and continue the collection with the + * objects that are still unreachable */ + PyGC_Head final_unreachable; + handle_resurrected_objects(&unreachable, &final_unreachable, old); + + /* Call tp_clear on objects in the final_unreachable set. This will cause + * the reference cycles to be broken. It may also cause some objects + * in finalizers to be freed. + */ + m += gc_list_size(&final_unreachable); + delete_garbage(tstate, gcstate, &final_unreachable, old); + + /* Collect statistics on uncollectable objects found and print + * debugging information. */ + for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { + n++; + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) + debug_cycle("uncollectable", FROM_GC(gc)); + } + if (gcstate->debug & _PyGC_DEBUG_STATS) { + double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); + PySys_WriteStderr( + "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", + n+m, n, d); + } + + /* Append instances in the uncollectable set to a Python + * reachable list of garbage. The programmer has to deal with + * this if they insist on creating this type of structure. + */ + handle_legacy_finalizers(tstate, gcstate, &finalizers, old); + validate_list(old, collecting_clear_unreachable_clear); + + /* Clear free list only during the collection of the highest + * generation */ + if (generation == NUM_GENERATIONS-1) { + clear_freelists(tstate->interp); + } + + if (_PyErr_Occurred(tstate)) { + if (reason == _Py_GC_REASON_SHUTDOWN) { + _PyErr_Clear(tstate); + } + else { + PyErr_FormatUnraisable("Exception ignored in garbage collection"); + } + } + + /* Update stats */ + struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; + stats->collections++; + stats->collected += m; + stats->uncollectable += n; + + GC_STAT_ADD(generation, objects_collected, m); +#ifdef Py_STATS + if (_Py_stats) { + GC_STAT_ADD(generation, object_visits, + _Py_stats->object_stats.object_visits); + _Py_stats->object_stats.object_visits = 0; + } +#endif + + if (PyDTrace_GC_DONE_ENABLED()) { + PyDTrace_GC_DONE(n + m); + } + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "stop", generation, m, n); + } + + assert(!_PyErr_Occurred(tstate)); + _Py_atomic_store_int(&gcstate->collecting, 0); + return n + m; +} + +static int +referrersvisit(PyObject* obj, void *arg) +{ + PyObject *objs = arg; + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(objs); i++) { + if (PyTuple_GET_ITEM(objs, i) == obj) { + return 1; + } + } + return 0; +} + +static int +gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) +{ + PyGC_Head *gc; + PyObject *obj; + traverseproc traverse; + for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { + obj = FROM_GC(gc); + traverse = Py_TYPE(obj)->tp_traverse; + if (obj == objs || obj == resultlist) { + continue; + } + if (traverse(obj, referrersvisit, objs)) { + if (PyList_Append(resultlist, obj) < 0) { + return 0; /* error */ + } + } + } + return 1; /* no error */ +} + +PyObject * +_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) +{ + PyObject *result = PyList_New(0); + if (!result) { + return NULL; + } + + GCState *gcstate = &interp->gc; + for (int i = 0; i < NUM_GENERATIONS; i++) { + if (!(gc_referrers_for(objs, GEN_HEAD(gcstate, i), result))) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + +PyObject * +_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +{ + assert(generation >= -1 && generation < NUM_GENERATIONS); + GCState *gcstate = &interp->gc; + + PyObject *result = PyList_New(0); + if (result == NULL) { + return NULL; + } + + if (generation == -1) { + /* If generation is -1, get all objects from all generations */ + for (int i = 0; i < NUM_GENERATIONS; i++) { + if (append_objects(result, GEN_HEAD(gcstate, i))) { + goto error; + } + } + } + else { + if (append_objects(result, GEN_HEAD(gcstate, generation))) { + goto error; + } + } + + return result; +error: + Py_DECREF(result); + return NULL; +} + +void +_PyGC_Freeze(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + for (int i = 0; i < NUM_GENERATIONS; ++i) { + gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head); + gcstate->generations[i].count = 0; + } +} + +void +_PyGC_Unfreeze(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + gc_list_merge(&gcstate->permanent_generation.head, + GEN_HEAD(gcstate, NUM_GENERATIONS-1)); +} + +Py_ssize_t +_PyGC_GetFreezeCount(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + return gc_list_size(&gcstate->permanent_generation.head); +} + +/* C API for controlling the state of the garbage collector */ +int +PyGC_Enable(void) +{ + GCState *gcstate = get_gc_state(); + int old_state = gcstate->enabled; + gcstate->enabled = 1; + return old_state; +} + +int +PyGC_Disable(void) +{ + GCState *gcstate = get_gc_state(); + int old_state = gcstate->enabled; + gcstate->enabled = 0; + return old_state; +} + +int +PyGC_IsEnabled(void) +{ + GCState *gcstate = get_gc_state(); + return gcstate->enabled; +} + +/* Public API to invoke gc.collect() from C */ +Py_ssize_t +PyGC_Collect(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = &tstate->interp->gc; + + if (!gcstate->enabled) { + return 0; + } + + Py_ssize_t n; + PyObject *exc = _PyErr_GetRaisedException(tstate); + n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); + _PyErr_SetRaisedException(tstate, exc); + + return n; +} + +Py_ssize_t +_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) +{ + return gc_collect_main(tstate, generation, reason); +} + +Py_ssize_t +_PyGC_CollectNoFail(PyThreadState *tstate) +{ + /* Ideally, this function is only called on interpreter shutdown, + and therefore not recursively. Unfortunately, when there are daemon + threads, a daemon thread can start a cyclic garbage collection + during interpreter shutdown (and then never finish it). + See http://bugs.python.org/issue8713#msg195178 for an example. + */ + return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); +} + +void +_PyGC_DumpShutdownStats(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL) + && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) { + const char *message; + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + message = "gc: %zd uncollectable objects at shutdown"; + } + else { + message = "gc: %zd uncollectable objects at shutdown; " \ + "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; + } + /* PyErr_WarnFormat does too many things and we are at shutdown, + the warnings module's dependencies (e.g. linecache) may be gone + already. */ + if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, + "gc", NULL, message, + PyList_GET_SIZE(gcstate->garbage))) + { + PyErr_WriteUnraisable(NULL); + } + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + PyObject *repr = NULL, *bytes = NULL; + repr = PyObject_Repr(gcstate->garbage); + if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) { + PyErr_WriteUnraisable(gcstate->garbage); + } + else { + PySys_WriteStderr( + " %s\n", + PyBytes_AS_STRING(bytes) + ); + } + Py_XDECREF(repr); + Py_XDECREF(bytes); + } + } +} + + +void +_PyGC_Fini(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + Py_CLEAR(gcstate->garbage); + Py_CLEAR(gcstate->callbacks); + + /* We expect that none of this interpreters objects are shared + with other interpreters. + See https://github.com/python/cpython/issues/90228. */ +} + +/* for debugging */ +void +_PyGC_Dump(PyGC_Head *g) +{ + _PyObject_Dump(FROM_GC(g)); +} + + +#ifdef Py_DEBUG +static int +visit_validate(PyObject *op, void *parent_raw) +{ + PyObject *parent = _PyObject_CAST(parent_raw); + if (_PyObject_IsFreed(op)) { + _PyObject_ASSERT_FAILED_MSG(parent, + "PyObject_GC_Track() object is not valid"); + } + return 0; +} +#endif + + +/* extension modules might be compiled with GC support so these + functions must always be available */ + +void +PyObject_GC_Track(void *op_raw) +{ + PyObject *op = _PyObject_CAST(op_raw); + if (_PyObject_GC_IS_TRACKED(op)) { + _PyObject_ASSERT_FAILED_MSG(op, + "object already tracked " + "by the garbage collector"); + } + _PyObject_GC_TRACK(op); + +#ifdef Py_DEBUG + /* Check that the object is valid: validate objects traversed + by tp_traverse() */ + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void)traverse(op, visit_validate, op); +#endif +} + +void +PyObject_GC_UnTrack(void *op_raw) +{ + PyObject *op = _PyObject_CAST(op_raw); + /* Obscure: the Py_TRASHCAN mechanism requires that we be able to + * call PyObject_GC_UnTrack twice on an object. + */ + if (_PyObject_GC_IS_TRACKED(op)) { + _PyObject_GC_UNTRACK(op); + } +} + +int +PyObject_IS_GC(PyObject *obj) +{ + return _PyObject_IS_GC(obj); +} + +void +_Py_ScheduleGC(PyInterpreterState *interp) +{ + _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); +} + +void +_PyObject_GC_Link(PyObject *op) +{ + PyGC_Head *g = AS_GC(op); + assert(((uintptr_t)g & (sizeof(uintptr_t)-1)) == 0); // g must be correctly aligned + + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = &tstate->interp->gc; + g->_gc_next = 0; + g->_gc_prev = 0; + gcstate->generations[0].count++; /* number of allocated GC objects */ + if (gcstate->generations[0].count > gcstate->generations[0].threshold && + gcstate->enabled && + gcstate->generations[0].threshold && + !_Py_atomic_load_int_relaxed(&gcstate->collecting) && + !_PyErr_Occurred(tstate)) + { + _Py_ScheduleGC(tstate->interp); + } +} + +void +_Py_RunGC(PyThreadState *tstate) +{ + gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); +} + +static PyObject * +gc_alloc(size_t basicsize, size_t presize) +{ + PyThreadState *tstate = _PyThreadState_GET(); + if (basicsize > PY_SSIZE_T_MAX - presize) { + return _PyErr_NoMemory(tstate); + } + size_t size = presize + basicsize; + char *mem = PyObject_Malloc(size); + if (mem == NULL) { + return _PyErr_NoMemory(tstate); + } + ((PyObject **)mem)[0] = NULL; + ((PyObject **)mem)[1] = NULL; + PyObject *op = (PyObject *)(mem + presize); + _PyObject_GC_Link(op); + return op; +} + +PyObject * +_PyObject_GC_New(PyTypeObject *tp) +{ + size_t presize = _PyType_PreHeaderSize(tp); + PyObject *op = gc_alloc(_PyObject_SIZE(tp), presize); + if (op == NULL) { + return NULL; + } + _PyObject_Init(op, tp); + return op; +} + +PyVarObject * +_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) +{ + PyVarObject *op; + + if (nitems < 0) { + PyErr_BadInternalCall(); + return NULL; + } + size_t presize = _PyType_PreHeaderSize(tp); + size_t size = _PyObject_VAR_SIZE(tp, nitems); + op = (PyVarObject *)gc_alloc(size, presize); + if (op == NULL) { + return NULL; + } + _PyObject_InitVar(op, tp, nitems); + return op; +} + +PyObject * +PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size) +{ + size_t presize = _PyType_PreHeaderSize(tp); + PyObject *op = gc_alloc(_PyObject_SIZE(tp) + extra_size, presize); + if (op == NULL) { + return NULL; + } + memset(op, 0, _PyObject_SIZE(tp) + extra_size); + _PyObject_Init(op, tp); + return op; +} + +PyVarObject * +_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) +{ + const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); + const size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); + _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op)); + if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) { + return (PyVarObject *)PyErr_NoMemory(); + } + char *mem = (char *)op - presize; + mem = (char *)PyObject_Realloc(mem, presize + basicsize); + if (mem == NULL) { + return (PyVarObject *)PyErr_NoMemory(); + } + op = (PyVarObject *) (mem + presize); + Py_SET_SIZE(op, nitems); + return op; +} + +void +PyObject_GC_Del(void *op) +{ + size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); + PyGC_Head *g = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op)) { + gc_list_remove(g); +#ifdef Py_DEBUG + PyObject *exc = PyErr_GetRaisedException(); + if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, + "gc", NULL, "Object of type %s is not untracked before destruction", + ((PyObject*)op)->ob_type->tp_name)) { + PyErr_WriteUnraisable(NULL); + } + PyErr_SetRaisedException(exc); +#endif + } + GCState *gcstate = get_gc_state(); + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; + } + PyObject_Free(((char *)op)-presize); +} + +int +PyObject_GC_IsTracked(PyObject* obj) +{ + if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) { + return 1; + } + return 0; +} + +int +PyObject_GC_IsFinalized(PyObject *obj) +{ + if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) { + return 1; + } + return 0; +} + +void +PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) +{ + size_t i; + GCState *gcstate = get_gc_state(); + int origenstate = gcstate->enabled; + gcstate->enabled = 0; + for (i = 0; i < NUM_GENERATIONS; i++) { + PyGC_Head *gc_list, *gc; + gc_list = GEN_HEAD(gcstate, i); + for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + Py_INCREF(op); + int res = callback(op, arg); + Py_DECREF(op); + if (!res) { + goto done; + } + } + } +done: + gcstate->enabled = origenstate; +} From 3c4e972d6d0945a5401377bed25b307a88b19c75 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 5 Jan 2024 21:41:19 +0000 Subject: [PATCH 68/71] GH-113568: Stop raising auditing events from pathlib ABCs (#113571) Raise auditing events in `pathlib.Path.glob()`, `rglob()` and `walk()`, but not in `pathlib._abc.PathBase` methods. Also move generation of a deprecation warning into `pathlib.Path` so it gets the right stack level. --- Lib/pathlib/__init__.py | 43 ++++++++++++++++++- Lib/pathlib/_abc.py | 29 ++++--------- Lib/test/test_pathlib/test_pathlib.py | 12 ++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 13 ------ ...-12-29-17-30-49.gh-issue-113568.UpWNAI.rst | 2 + 5 files changed, 64 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-29-17-30-49.gh-issue-113568.UpWNAI.rst diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 79b8b4917f6cc4..6a94886040f95d 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -9,6 +9,8 @@ import ntpath import os import posixpath +import sys +import warnings try: import pwd @@ -230,7 +232,6 @@ def _unsupported(cls, method_name): def __init__(self, *args, **kwargs): if kwargs: - import warnings msg = ("support for supplying keyword arguments to pathlib.PurePath " "is deprecated and scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) @@ -309,6 +310,46 @@ def _make_child_entry(self, entry): path._tail_cached = self._tail + [entry.name] return path + def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given relative pattern. + """ + sys.audit("pathlib.Path.glob", self, pattern) + if pattern.endswith('**'): + # GH-70303: '**' only matches directories. Add trailing slash. + warnings.warn( + "Pattern ending '**' will match files and directories in a " + "future Python release. Add a trailing slash to match only " + "directories and remove this warning.", + FutureWarning, 2) + pattern = f'{pattern}/' + return _abc.PathBase.glob( + self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) + + def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): + """Recursively yield all existing files (of any kind, including + directories) matching the given relative pattern, anywhere in + this subtree. + """ + sys.audit("pathlib.Path.rglob", self, pattern) + if pattern.endswith('**'): + # GH-70303: '**' only matches directories. Add trailing slash. + warnings.warn( + "Pattern ending '**' will match files and directories in a " + "future Python release. Add a trailing slash to match only " + "directories and remove this warning.", + FutureWarning, 2) + pattern = f'{pattern}/' + pattern = f'**/{pattern}' + return _abc.PathBase.glob( + self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) + + def walk(self, top_down=True, on_error=None, follow_symlinks=False): + """Walk the directory tree from this directory, similar to os.walk().""" + sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) + return _abc.PathBase.walk( + self, top_down=top_down, on_error=on_error, follow_symlinks=follow_symlinks) + def absolute(self): """Return an absolute version of this path No normalization or symlink resolution is performed. diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f75b20a1d5f1e5..da8d67f624107a 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -811,18 +811,6 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ - sys.audit("pathlib.Path.glob", self, pattern) - return self._glob(pattern, case_sensitive, follow_symlinks) - - def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): - """Recursively yield all existing files (of any kind, including - directories) matching the given relative pattern, anywhere in - this subtree. - """ - sys.audit("pathlib.Path.rglob", self, pattern) - return self._glob(f'**/{pattern}', case_sensitive, follow_symlinks) - - def _glob(self, pattern, case_sensitive, follow_symlinks): path_pattern = self.with_segments(pattern) if path_pattern.drive or path_pattern.root: raise NotImplementedError("Non-relative patterns are unsupported") @@ -833,14 +821,6 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): if pattern[-1] in (self.pathmod.sep, self.pathmod.altsep): # GH-65238: pathlib doesn't preserve trailing slash. Add it back. pattern_parts.append('') - if pattern_parts[-1] == '**': - # GH-70303: '**' only matches directories. Add trailing slash. - warnings.warn( - "Pattern ending '**' will match files and directories in a " - "future Python release. Add a trailing slash to match only " - "directories and remove this warning.", - FutureWarning, 3) - pattern_parts.append('') if case_sensitive is None: # TODO: evaluate case-sensitivity of each directory in _select_children(). @@ -895,9 +875,16 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): paths = _select_children(paths, dir_only, follow_symlinks, match) return paths + def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): + """Recursively yield all existing files (of any kind, including + directories) matching the given relative pattern, anywhere in + this subtree. + """ + return self.glob( + f'**/{pattern}', case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) + def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" - sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) paths = [self] while paths: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 8f95c804f80e69..b64e6b59da5d9a 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1703,6 +1703,18 @@ def test_glob_above_recursion_limit(self): with set_recursion_limit(recursion_limit): list(base.glob('**/')) + def test_glob_recursive_no_trailing_slash(self): + P = self.cls + p = P(self.base) + with self.assertWarns(FutureWarning): + p.glob('**') + with self.assertWarns(FutureWarning): + p.glob('*/**') + with self.assertWarns(FutureWarning): + p.rglob('**') + with self.assertWarns(FutureWarning): + p.rglob('*/**') + @only_posix class PosixPathTest(PathTest, PurePosixPathTest): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index e4a4e81e547cd1..a67235b4da3dd3 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1266,19 +1266,6 @@ def test_glob_long_symlink(self): bad_link.symlink_to("bad" * 200) self.assertEqual(sorted(base.glob('**/*')), [bad_link]) - def test_glob_recursive_no_trailing_slash(self): - P = self.cls - p = P(self.base) - with self.assertWarns(FutureWarning): - p.glob('**') - with self.assertWarns(FutureWarning): - p.glob('*/**') - with self.assertWarns(FutureWarning): - p.rglob('**') - with self.assertWarns(FutureWarning): - p.rglob('*/**') - - def test_readlink(self): if not self.can_symlink: self.skipTest("symlinks required") diff --git a/Misc/NEWS.d/next/Library/2023-12-29-17-30-49.gh-issue-113568.UpWNAI.rst b/Misc/NEWS.d/next/Library/2023-12-29-17-30-49.gh-issue-113568.UpWNAI.rst new file mode 100644 index 00000000000000..aaca5250184122 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-29-17-30-49.gh-issue-113568.UpWNAI.rst @@ -0,0 +1,2 @@ +Raise audit events from :class:`pathlib.Path` and not its private base class +``PathBase``. From bd754b93ca837aa1f239252437a211271d068b71 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 6 Jan 2024 00:12:34 +0200 Subject: [PATCH 69/71] gh-85567: Fix resouce warnings in pickle and pickletools CLIs (GH-113618) Explicitly open and close files instead of using FileType. --- Lib/pickle.py | 10 ++++-- Lib/pickletools.py | 31 +++++++++++++------ ...4-01-01-13-26-02.gh-issue-85567.K4U15m.rst | 2 ++ 3 files changed, 30 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-01-13-26-02.gh-issue-85567.K4U15m.rst diff --git a/Lib/pickle.py b/Lib/pickle.py index 988c0887341310..33c97c8c5efb28 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1793,7 +1793,7 @@ def _test(): parser = argparse.ArgumentParser( description='display contents of the pickle files') parser.add_argument( - 'pickle_file', type=argparse.FileType('br'), + 'pickle_file', nargs='*', help='the pickle file') parser.add_argument( '-t', '--test', action='store_true', @@ -1809,6 +1809,10 @@ def _test(): parser.print_help() else: import pprint - for f in args.pickle_file: - obj = load(f) + for fn in args.pickle_file: + if fn == '-': + obj = load(sys.stdin.buffer) + else: + with open(fn, 'rb') as f: + obj = load(f) pprint.pprint(obj) diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 95706e746c9870..95a77aeb2afe2a 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2848,10 +2848,10 @@ def _test(): parser = argparse.ArgumentParser( description='disassemble one or more pickle files') parser.add_argument( - 'pickle_file', type=argparse.FileType('br'), + 'pickle_file', nargs='*', help='the pickle file') parser.add_argument( - '-o', '--output', default=sys.stdout, type=argparse.FileType('w'), + '-o', '--output', help='the file where the output should be written') parser.add_argument( '-m', '--memo', action='store_true', @@ -2876,15 +2876,26 @@ def _test(): if args.test: _test() else: - annotate = 30 if args.annotate else 0 if not args.pickle_file: parser.print_help() - elif len(args.pickle_file) == 1: - dis(args.pickle_file[0], args.output, None, - args.indentlevel, annotate) else: + annotate = 30 if args.annotate else 0 memo = {} if args.memo else None - for f in args.pickle_file: - preamble = args.preamble.format(name=f.name) - args.output.write(preamble + '\n') - dis(f, args.output, memo, args.indentlevel, annotate) + if args.output is None: + output = sys.stdout + else: + output = open(args.output, 'w') + try: + for arg in args.pickle_file: + if len(args.pickle_file) > 1: + name = '' if arg == '-' else arg + preamble = args.preamble.format(name=name) + output.write(preamble + '\n') + if arg == '-': + dis(sys.stdin.buffer, output, memo, args.indentlevel, annotate) + else: + with open(arg, 'rb') as f: + dis(f, output, memo, args.indentlevel, annotate) + finally: + if output is not sys.stdout: + output.close() diff --git a/Misc/NEWS.d/next/Library/2024-01-01-13-26-02.gh-issue-85567.K4U15m.rst b/Misc/NEWS.d/next/Library/2024-01-01-13-26-02.gh-issue-85567.K4U15m.rst new file mode 100644 index 00000000000000..063443e5aecc02 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-01-13-26-02.gh-issue-85567.K4U15m.rst @@ -0,0 +1,2 @@ +Fix resource warnings for unclosed files in :mod:`pickle` and +:mod:`pickletools` command line interfaces. From d99d8712253c3affc54cf7f6e71f161dea8347ce Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 6 Jan 2024 00:23:16 +0200 Subject: [PATCH 70/71] gh-113360: Fix the documentation of module's attribute __test__ (GH-113393) It can only be a dict since Python 2.4. --- Doc/library/doctest.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index 8c28e4478bb70e..e4bb6f5ec958c6 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -280,7 +280,7 @@ searched. Objects imported into the module are not searched. In addition, there are cases when you want tests to be part of a module but not part of the help text, which requires that the tests not be included in the docstring. Doctest looks for a module-level variable called ``__test__`` and uses it to locate other -tests. If ``M.__test__`` exists and is truthy, it must be a dict, and each +tests. If ``M.__test__`` exists, it must be a dict, and each entry maps a (string) name to a function object, class object, or string. Function and class object docstrings found from ``M.__test__`` are searched, and strings are treated as if they were docstrings. In output, a key ``K`` in @@ -944,8 +944,8 @@ and :ref:`doctest-simple-testfile`. (or module :mod:`__main__` if *m* is not supplied or is ``None``), starting with ``m.__doc__``. - Also test examples reachable from dict ``m.__test__``, if it exists and is not - ``None``. ``m.__test__`` maps names (strings) to functions, classes and + Also test examples reachable from dict ``m.__test__``, if it exists. + ``m.__test__`` maps names (strings) to functions, classes and strings; function and class docstrings are searched for examples; strings are searched directly, as if they were docstrings. From 3375dfed400494ba5cc1b744d52f6fb8b7796059 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 5 Jan 2024 22:56:04 +0000 Subject: [PATCH 71/71] GH-113568: Stop raising deprecation warnings from pathlib ABCs (#113757) --- Lib/pathlib/__init__.py | 27 +++++++++++++++++++ Lib/pathlib/_abc.py | 21 +++------------ Lib/test/test_pathlib/test_pathlib.py | 13 +++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 7 ----- ...-01-05-21-52-59.gh-issue-113568._0FkpZ.rst | 2 ++ 5 files changed, 46 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-05-21-52-59.gh-issue-113568._0FkpZ.rst diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 6a94886040f95d..115ccf78e3befe 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -166,6 +166,33 @@ def __ge__(self, other): return NotImplemented return self._parts_normcase >= other._parts_normcase + def relative_to(self, other, /, *_deprecated, walk_up=False): + """Return the relative path to another path identified by the passed + arguments. If the operation is not possible (because this is not + related to the other path), raise ValueError. + + The *walk_up* parameter controls whether `..` may be used to resolve + the path. + """ + if _deprecated: + msg = ("support for supplying more than one positional argument " + "to pathlib.PurePath.relative_to() is deprecated and " + "scheduled for removal in Python 3.14") + warnings.warn(msg, DeprecationWarning, stacklevel=2) + other = self.with_segments(other, *_deprecated) + return _abc.PurePathBase.relative_to(self, other, walk_up=walk_up) + + def is_relative_to(self, other, /, *_deprecated): + """Return True if the path is relative to another path or False. + """ + if _deprecated: + msg = ("support for supplying more than one argument to " + "pathlib.PurePath.is_relative_to() is deprecated and " + "scheduled for removal in Python 3.14") + warnings.warn(msg, DeprecationWarning, stacklevel=2) + other = self.with_segments(other, *_deprecated) + return _abc.PurePathBase.is_relative_to(self, other) + def as_uri(self): """Return the path as a URI.""" if not self.is_absolute(): diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index da8d67f624107a..b1204e88044a3f 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -2,7 +2,6 @@ import ntpath import posixpath import sys -import warnings from _collections_abc import Sequence from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from itertools import chain @@ -383,7 +382,7 @@ def with_suffix(self, suffix): else: raise ValueError(f"Invalid suffix {suffix!r}") - def relative_to(self, other, /, *_deprecated, walk_up=False): + def relative_to(self, other, *, walk_up=False): """Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not related to the other path), raise ValueError. @@ -391,13 +390,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): The *walk_up* parameter controls whether `..` may be used to resolve the path. """ - if _deprecated: - msg = ("support for supplying more than one positional argument " - "to pathlib.PurePath.relative_to() is deprecated and " - "scheduled for removal in Python 3.14") - warnings.warn(msg, DeprecationWarning, stacklevel=2) - other = self.with_segments(other, *_deprecated) - elif not isinstance(other, PurePathBase): + if not isinstance(other, PurePathBase): other = self.with_segments(other) for step, path in enumerate(chain([other], other.parents)): if path == self or path in self.parents: @@ -411,16 +404,10 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): parts = ['..'] * step + self._tail[len(path._tail):] return self._from_parsed_parts('', '', parts) - def is_relative_to(self, other, /, *_deprecated): + def is_relative_to(self, other): """Return True if the path is relative to another path or False. """ - if _deprecated: - msg = ("support for supplying more than one argument to " - "pathlib.PurePath.is_relative_to() is deprecated and " - "scheduled for removal in Python 3.14") - warnings.warn(msg, DeprecationWarning, stacklevel=2) - other = self.with_segments(other, *_deprecated) - elif not isinstance(other, PurePathBase): + if not isinstance(other, PurePathBase): other = self.with_segments(other) return other == self or other in self.parents diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b64e6b59da5d9a..93fe327a0d3c23 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -214,6 +214,19 @@ def test_repr_roundtrips(self): self.assertEqual(q, p) self.assertEqual(repr(q), r) + def test_relative_to_several_args(self): + P = self.cls + p = P('a/b') + with self.assertWarns(DeprecationWarning): + p.relative_to('a', 'b') + p.relative_to('a', 'b', walk_up=True) + + def test_is_relative_to_several_args(self): + P = self.cls + p = P('a/b') + with self.assertWarns(DeprecationWarning): + p.is_relative_to('a', 'b') + class PurePosixPathTest(PurePathTest): cls = pathlib.PurePosixPath diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index a67235b4da3dd3..3a7c036077e2a1 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -542,10 +542,6 @@ def test_relative_to_common(self): self.assertEqual(p.relative_to('a/b/c', walk_up=True), P('..')) self.assertEqual(p.relative_to(P('c'), walk_up=True), P('../a/b')) self.assertEqual(p.relative_to('c', walk_up=True), P('../a/b')) - # With several args. - with self.assertWarns(DeprecationWarning): - p.relative_to('a', 'b') - p.relative_to('a', 'b', walk_up=True) # Unrelated paths. self.assertRaises(ValueError, p.relative_to, P('c')) self.assertRaises(ValueError, p.relative_to, P('a/b/c')) @@ -607,9 +603,6 @@ def test_is_relative_to_common(self): self.assertTrue(p.is_relative_to('a/')) self.assertTrue(p.is_relative_to(P('a/b'))) self.assertTrue(p.is_relative_to('a/b')) - # With several args. - with self.assertWarns(DeprecationWarning): - p.is_relative_to('a', 'b') # Unrelated paths. self.assertFalse(p.is_relative_to(P('c'))) self.assertFalse(p.is_relative_to(P('a/b/c'))) diff --git a/Misc/NEWS.d/next/Library/2024-01-05-21-52-59.gh-issue-113568._0FkpZ.rst b/Misc/NEWS.d/next/Library/2024-01-05-21-52-59.gh-issue-113568._0FkpZ.rst new file mode 100644 index 00000000000000..4900730ddff5fa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-05-21-52-59.gh-issue-113568._0FkpZ.rst @@ -0,0 +1,2 @@ +Raise deprecation warnings from :class:`pathlib.PurePath` and not its +private base class ``PurePathBase``.