From 38f5efefcd41c48f5f7585eb3d4f17a4376798b9 Mon Sep 17 00:00:00 2001 From: geisserml Date: Sat, 18 May 2024 23:32:34 +0200 Subject: [PATCH] PdfPage.get_objects(): don't register objects as kids This was especially problematic as weakrefs are not cleaned up when the object in question is closed/collected, so we potentially store many dead pointers. Imagine a caller invoking get_objects() repeatedly for iterating and a page handle living for a long time afterwards - that somewhat resembles a memory leak. --- src/pypdfium2/_helpers/page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypdfium2/_helpers/page.py b/src/pypdfium2/_helpers/page.py index 38c7ab4be..bff335f5e 100644 --- a/src/pypdfium2/_helpers/page.py +++ b/src/pypdfium2/_helpers/page.py @@ -283,8 +283,8 @@ def get_objects(self, filter=None, max_depth=2, form=None, level=0): if raw_obj is None: raise PdfiumError("Failed to get page object.") + # Not a child object, because the lifetime of pageobjects that are part of a page is managed by pdfium. The .page reference is enough to keep the parent alive, unless the caller explicitly closes it (which may not merit storing countless of weakrefs). helper_obj = PdfObject(raw_obj, page=self, pdf=self.pdf, level=level) - self._add_kid(helper_obj) if not filter or helper_obj.type in filter: yield helper_obj