Skip to content

Commit

Permalink
BUG: Process CMYK in deflate images (#1977)
Browse files Browse the repository at this point in the history
Closes #1954
  • Loading branch information
pubpub-zz authored Jul 18, 2023
1 parent 1d16ca5 commit c2a741e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
18 changes: 13 additions & 5 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
return bytes(nbuff)

extension = ".png" # mime_type = "image/png"
image_format = "PNG"
lookup: Any
base: Any
hival: Any
Expand Down Expand Up @@ -794,10 +795,14 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":
# see Table 66 - Additional Entries Specific to an ICC Profile
# Stream Dictionary
mode = _get_imagemode(color_space, colors, mode)
extension = ".png"
img = Image.frombytes(mode, size, data) # reloaded as mode may have change
image_format = "PNG"
mode2 = _get_imagemode(color_space, colors, mode)
if mode != mode2:
img = Image.frombytes(
mode2, size, data
) # reloaded as mode may have change
if mode == "CMYK":
extension = ".tif"
image_format = "TIFF"
return img, image_format, extension

def _handle_jpx(
Expand Down Expand Up @@ -907,7 +912,10 @@ def _handle_jpx(

# CMYK image without decode requires reverting scale (cf p243,2§ last sentence)
decode = x_object_obj.get(
IA.DECODE, ([1.0, 0.0] * 4) if img.mode == "CMYK" else None
IA.DECODE,
([1.0, 0.0] * 4)
if img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)
else None,
)
if (
isinstance(color_space, ArrayObject)
Expand Down
20 changes: 20 additions & 0 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ def test_rgba():
@pytest.mark.enable_socket()
def test_cmyk():
"""Decode cmyk"""
# JPEG compression
try:
from Crypto.Cipher import AES # noqa: F401
except ImportError:
Expand All @@ -401,11 +402,30 @@ def test_cmyk():
BytesIO(get_pdf_from_url(url_png, name=name_png))
) # not a pdf but it works
data = reader.pages[1].images[0]
assert data.image.mode == "CMYK"
assert ".jpg" in data.name
diff = ImageChops.difference(data.image, refimg)
d = sqrt(
sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()])
) / (diff.size[0] * diff.size[1])
assert d < 0.01
# deflate
url = "https://github.com/py-pdf/pypdf/files/12078533/cmyk2.pdf"
name = "cmyk_deflate.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://github.com/py-pdf/pypdf/files/12078556/cmyk.tif.txt"
name_png = "cmyk_deflate.tif"
refimg = Image.open(
BytesIO(get_pdf_from_url(url_png, name=name_png))
) # not a pdf but it works
data = reader.pages[0].images[0]
assert data.image.mode == "CMYK"
assert ".tif" in data.name
diff = ImageChops.difference(data.image, refimg)
d = sqrt(
sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()])
) / (diff.size[0] * diff.size[1])
assert d < 0.001 # lossless compression expected


@pytest.mark.enable_socket()
Expand Down

0 comments on commit c2a741e

Please sign in to comment.