From 0cfe9061f882e30182918dbcd6d414adde5c777b Mon Sep 17 00:00:00 2001 From: geisserml Date: Fri, 12 May 2023 23:37:48 +0200 Subject: [PATCH] Do NOT call bitmap.close() in renderer worker (see description!) This fixes premature buffer destruction when using the COMBINATION of foreign bitmap and zero-copy converter. --- docs/devel/changelog_staging.md | 1 + src/pypdfium2/_helpers/bitmap.py | 9 ++++++++- src/pypdfium2/_helpers/document.py | 5 ++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/devel/changelog_staging.md b/docs/devel/changelog_staging.md index d15b83b2a..034d86e4b 100644 --- a/docs/devel/changelog_staging.md +++ b/docs/devel/changelog_staging.md @@ -4,3 +4,4 @@ # Changelog for next release +- In `PdfDocument.render()`, fixed a bad `bitmap.close()` call that would lead to a downstream use after free when using the combination of foreign bitmap and no-copy conversion. Using foreign bitmaps was not the default and expressly not recommended. diff --git a/src/pypdfium2/_helpers/bitmap.py b/src/pypdfium2/_helpers/bitmap.py index 789395fb6..8dae04084 100644 --- a/src/pypdfium2/_helpers/bitmap.py +++ b/src/pypdfium2/_helpers/bitmap.py @@ -38,6 +38,10 @@ class PdfBitmap (AutoCloseable): Note: All attributes of :class:`.PdfBitmapInfo` are available in this class as well. + Warning: + ``bitmap.close()``, which frees the buffer of foreign bitmaps, is not validated for safety. + A bitmap must not be closed when other objects still depend on its buffer! + Attributes: raw (FPDF_BITMAP): The underlying PDFium bitmap handle. @@ -161,7 +165,10 @@ def fill_rect(self, left, top, width, height, color): pdfium_c.FPDFBitmap_FillRect(self, left, top, width, height, c_color) - # Assumption: If the result is a view of the buffer, it holds a reference to said buffer (directly or indirectly), so that a possible finalizer is not called prematurely. This seems to hold true for numpy and PIL. + # Requirement: If the result is a view of the buffer (not a copy), it keeps the buffer's memory valid by retaining a reference to the buffer object. + # As of May 2023, this seems to hold true for numpy and PIL. + # TODO? Consider attaching a buffer keep-alive finalizer to the converted object + def to_numpy(self): """ diff --git a/src/pypdfium2/_helpers/document.py b/src/pypdfium2/_helpers/document.py index e7965300a..fe853e39c 100644 --- a/src/pypdfium2/_helpers/document.py +++ b/src/pypdfium2/_helpers/document.py @@ -550,7 +550,10 @@ def _process_page(cls, index, input_data, password, renderer, converter, pass_in info = bitmap.get_info() result = converter(bitmap) - for g in (bitmap, page, pdf): + # NOTE We MUST NOT call bitmap.close() before the converted object is serialized to the main process, otherwise we would free the buffer of a foreign bitmap prematurely if the converted object references the buffer rather than owning a copy. Confirmed by POC. + # This is not an issue when freeing the bitmap on garbage collection, provided the converted object keeps the buffer alive. + + for g in (page, pdf): g.close() return (result, info) if pass_info else result