Skip to content

Commit

Permalink
ROB: Handle params NullObject in decode_stream_data (#1738)
Browse files Browse the repository at this point in the history
Multiple fiters in streams are not using their decodParms

Closes #1737

Co-authored-by: Martin Thoma <info@martin-thoma.de>
  • Loading branch information
pubpub-zz and MartinThoma authored Mar 25, 2023
1 parent 0acf949 commit 20ebcfd
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 13 deletions.
25 changes: 17 additions & 8 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@
from .constants import LzwFilterParameters as LZW
from .constants import StreamAttributes as SA
from .errors import PdfReadError, PdfStreamError
from .generic import ArrayObject, DictionaryObject, IndirectObject, NameObject
from .generic import (
ArrayObject,
DictionaryObject,
IndirectObject,
NullObject,
)

if TYPE_CHECKING:
try:
Expand Down Expand Up @@ -534,19 +539,24 @@ def decode_stream_data(stream: Any) -> Union[str, bytes]: # utils.StreamObject
filters = stream.get(SA.FILTER, ())
if isinstance(filters, IndirectObject):
filters = cast(ArrayObject, filters.get_object())
if len(filters) and not isinstance(filters[0], NameObject):
if not isinstance(filters, ArrayObject):
# we have a single filter instance
filters = (filters,)
decodparms = stream.get(SA.DECODE_PARMS, ({},) * len(filters))
if not isinstance(decodparms, (list, tuple)):
decodparms = (decodparms,)
data: bytes = stream._data
# If there is not data to decode we should not try to decode the data.
if data:
for filter_type in filters:
for filter_type, params in zip(filters, decodparms):
if isinstance(params, NullObject):
params = {}
if filter_type in (FT.FLATE_DECODE, FTA.FL):
data = FlateDecode.decode(data, stream.get(SA.DECODE_PARMS))
data = FlateDecode.decode(data, params)
elif filter_type in (FT.ASCII_HEX_DECODE, FTA.AHx):
data = ASCIIHexDecode.decode(data) # type: ignore
elif filter_type in (FT.LZW_DECODE, FTA.LZW):
data = LZWDecode.decode(data, stream.get(SA.DECODE_PARMS)) # type: ignore
data = LZWDecode.decode(data, params) # type: ignore
elif filter_type in (FT.ASCII_85_DECODE, FTA.A85):
data = ASCII85Decode.decode(data)
elif filter_type == FT.DCT_DECODE:
Expand All @@ -555,10 +565,9 @@ def decode_stream_data(stream: Any) -> Union[str, bytes]: # utils.StreamObject
data = JPXDecode.decode(data)
elif filter_type == FT.CCITT_FAX_DECODE:
height = stream.get(IA.HEIGHT, ())
data = CCITTFaxDecode.decode(data, stream.get(SA.DECODE_PARMS), height)
data = CCITTFaxDecode.decode(data, params, height)
elif filter_type == "/Crypt":
decode_parms = stream.get(SA.DECODE_PARMS, {})
if "/Name" not in decode_parms and "/Type" not in decode_parms:
if "/Name" not in params and "/Type" not in params:
pass
else:
raise NotImplementedError(
Expand Down
16 changes: 16 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
"""Fixtures that are available automatically for all tests."""

import uuid
from pathlib import Path

import pytest

from pypdf import PdfReader

TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


@pytest.fixture(scope="session")
def pdf_file_path(tmp_path_factory):
Expand All @@ -15,3 +22,12 @@ def pdf_file_path(tmp_path_factory):
def txt_file_path(tmp_path_factory):
fn = tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt"
return fn


@pytest.fixture(scope="session")
def pdf_reader_page():
"""Gives a page that was retrieved from a PDF via PdfReader."""
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
return page
7 changes: 2 additions & 5 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,13 +770,10 @@ def test_annotation_builder_polygon(pdf_file_path):
writer.write(fp)


def test_annotation_builder_polyline(pdf_file_path):
def test_annotation_builder_polyline(pdf_file_path, pdf_reader_page):
# Arrange
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)
writer.add_page(pdf_reader_page)

# Act
with pytest.raises(ValueError) as exc:
Expand Down

0 comments on commit 20ebcfd

Please sign in to comment.