py-pdf · pubpub-zz · Dec 19, 2024 · Dec 19, 2024
diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -446,7 +446,13 @@ def decode(
         if isinstance(data, str):
             data = data.encode()
         data = data.strip(WHITESPACES_AS_BYTES)
-        return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
+        try:
+            return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
+        except ValueError as error:
+            if error.args[0] == "Ascii85 encoded byte sequences must end with b'~>'":
+                logger_warning("Ignoring missing Ascii85 end marker.", __name__)
+                return a85decode(data, adobe=False, ignorechars=WHITESPACES_AS_BYTES)
+            raise
 
 
 class DCTDecode:

diff --git a/tests/test_filters.py b/tests/test_filters.py
@@ -594,3 +594,29 @@ def test_flate_decode_with_image_mode_1__whitespace_at_end_of_lookup():
     name = "issue2331.pdf"
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     reader.pages[0].images[0]
+
+
+@pytest.mark.enable_socket
+def test_ascii85decode__invalid_end__recoverable(caplog):
+    """From #2996"""
+    url = "https://github.com/user-attachments/files/18050808/1af7d56a-5c8c-4914-85b3-b2536a5525cd.pdf"
+    name = "issue2996.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    page = reader.pages[1]
+    assert page.extract_text() == ""
+    assert "Ignoring missing Ascii85 end marker." in caplog.text
+
+
+def test_ascii85decode__non_recoverable(caplog):
+    # Without our custom handling, this would complain about the final `~>` being missing.
+    data = "äöüß"
+    with pytest.raises(ValueError, match="Non-Ascii85 digit found: Ã"):
+        ASCII85Decode.decode(data)
+    assert "Ignoring missing Ascii85 end marker." in caplog.text
+    caplog.clear()
+
+    data += "~>"
+    with pytest.raises(ValueError, match="Non-Ascii85 digit found: Ã"):
+        ASCII85Decode.decode(data)
+    assert caplog.text == ""