ROB: Handle params NullObject in decode_stream_data (#1738)

Multiple fiters in streams are not using their decodParms Closes #1737 Co-authored-by: Martin Thoma <info@martin-thoma.de>
py-pdf · Mar 25, 2023 · 20ebcfd · 20ebcfd
1 parent 0acf949
commit 20ebcfd
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 13 deletions.
diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -50,7 +50,12 @@
 from .constants import LzwFilterParameters as LZW
 from .constants import StreamAttributes as SA
 from .errors import PdfReadError, PdfStreamError
-from .generic import ArrayObject, DictionaryObject, IndirectObject, NameObject
+from .generic import (
+    ArrayObject,
+    DictionaryObject,
+    IndirectObject,
+    NullObject,
+)
 
 if TYPE_CHECKING:
     try:
@@ -534,19 +539,24 @@ def decode_stream_data(stream: Any) -> Union[str, bytes]:  # utils.StreamObject
     filters = stream.get(SA.FILTER, ())
     if isinstance(filters, IndirectObject):
         filters = cast(ArrayObject, filters.get_object())
-    if len(filters) and not isinstance(filters[0], NameObject):
+    if not isinstance(filters, ArrayObject):
         # we have a single filter instance
         filters = (filters,)
+    decodparms = stream.get(SA.DECODE_PARMS, ({},) * len(filters))
+    if not isinstance(decodparms, (list, tuple)):
+        decodparms = (decodparms,)
     data: bytes = stream._data
     # If there is not data to decode we should not try to decode the data.
     if data:
-        for filter_type in filters:
+        for filter_type, params in zip(filters, decodparms):
+            if isinstance(params, NullObject):
+                params = {}
             if filter_type in (FT.FLATE_DECODE, FTA.FL):
-                data = FlateDecode.decode(data, stream.get(SA.DECODE_PARMS))
+                data = FlateDecode.decode(data, params)
             elif filter_type in (FT.ASCII_HEX_DECODE, FTA.AHx):
                 data = ASCIIHexDecode.decode(data)  # type: ignore
             elif filter_type in (FT.LZW_DECODE, FTA.LZW):
-                data = LZWDecode.decode(data, stream.get(SA.DECODE_PARMS))  # type: ignore
+                data = LZWDecode.decode(data, params)  # type: ignore
             elif filter_type in (FT.ASCII_85_DECODE, FTA.A85):
                 data = ASCII85Decode.decode(data)
             elif filter_type == FT.DCT_DECODE:
@@ -555,10 +565,9 @@ def decode_stream_data(stream: Any) -> Union[str, bytes]:  # utils.StreamObject
                 data = JPXDecode.decode(data)
             elif filter_type == FT.CCITT_FAX_DECODE:
                 height = stream.get(IA.HEIGHT, ())
-                data = CCITTFaxDecode.decode(data, stream.get(SA.DECODE_PARMS), height)
+                data = CCITTFaxDecode.decode(data, params, height)
             elif filter_type == "/Crypt":
-                decode_parms = stream.get(SA.DECODE_PARMS, {})
-                if "/Name" not in decode_parms and "/Type" not in decode_parms:
+                if "/Name" not in params and "/Type" not in params:
                     pass
                 else:
                     raise NotImplementedError(

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,9 +1,16 @@
 """Fixtures that are available automatically for all tests."""
 
 import uuid
+from pathlib import Path
 
 import pytest
 
+from pypdf import PdfReader
+
+TESTS_ROOT = Path(__file__).parent.resolve()
+PROJECT_ROOT = TESTS_ROOT.parent
+RESOURCE_ROOT = PROJECT_ROOT / "resources"
+
 
 @pytest.fixture(scope="session")
 def pdf_file_path(tmp_path_factory):
@@ -15,3 +22,12 @@ def pdf_file_path(tmp_path_factory):
 def txt_file_path(tmp_path_factory):
     fn = tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt"
     return fn
+
+
+@pytest.fixture(scope="session")
+def pdf_reader_page():
+    """Gives a page that was retrieved from a PDF via PdfReader."""
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(pdf_path)
+    page = reader.pages[0]
+    return page
diff --git a/tests/test_generic.py b/tests/test_generic.py
@@ -770,13 +770,10 @@ def test_annotation_builder_polygon(pdf_file_path):
         writer.write(fp)
 
 
-def test_annotation_builder_polyline(pdf_file_path):
+def test_annotation_builder_polyline(pdf_file_path, pdf_reader_page):
     # Arrange
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(pdf_path)
-    page = reader.pages[0]
     writer = PdfWriter()
-    writer.add_page(page)
+    writer.add_page(pdf_reader_page)
 
     # Act
     with pytest.raises(ValueError) as exc: