diff --git a/pypdf/filters.py b/pypdf/filters.py index e4aa6ea4c..461411d31 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -743,6 +743,7 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes: return bytes(nbuff) extension = ".png" # mime_type = "image/png" + image_format = "PNG" lookup: Any base: Any hival: Any @@ -794,10 +795,14 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes: elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased": # see Table 66 - Additional Entries Specific to an ICC Profile # Stream Dictionary - mode = _get_imagemode(color_space, colors, mode) - extension = ".png" - img = Image.frombytes(mode, size, data) # reloaded as mode may have change - image_format = "PNG" + mode2 = _get_imagemode(color_space, colors, mode) + if mode != mode2: + img = Image.frombytes( + mode2, size, data + ) # reloaded as mode may have change + if mode == "CMYK": + extension = ".tif" + image_format = "TIFF" return img, image_format, extension def _handle_jpx( @@ -907,7 +912,10 @@ def _handle_jpx( # CMYK image without decode requires reverting scale (cf p243,2ยง last sentence) decode = x_object_obj.get( - IA.DECODE, ([1.0, 0.0] * 4) if img.mode == "CMYK" else None + IA.DECODE, + ([1.0, 0.0] * 4) + if img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE) + else None, ) if ( isinstance(color_space, ArrayObject) diff --git a/tests/test_filters.py b/tests/test_filters.py index 2eb8b58c0..7353a9bdf 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -388,6 +388,7 @@ def test_rgba(): @pytest.mark.enable_socket() def test_cmyk(): """Decode cmyk""" + # JPEG compression try: from Crypto.Cipher import AES # noqa: F401 except ImportError: @@ -401,11 +402,30 @@ def test_cmyk(): BytesIO(get_pdf_from_url(url_png, name=name_png)) ) # not a pdf but it works data = reader.pages[1].images[0] + assert data.image.mode == "CMYK" + assert ".jpg" in data.name diff = ImageChops.difference(data.image, refimg) d = sqrt( sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()]) ) / (diff.size[0] * diff.size[1]) assert d < 0.01 + # deflate + url = "https://github.com/py-pdf/pypdf/files/12078533/cmyk2.pdf" + name = "cmyk_deflate.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + url_png = "https://github.com/py-pdf/pypdf/files/12078556/cmyk.tif.txt" + name_png = "cmyk_deflate.tif" + refimg = Image.open( + BytesIO(get_pdf_from_url(url_png, name=name_png)) + ) # not a pdf but it works + data = reader.pages[0].images[0] + assert data.image.mode == "CMYK" + assert ".tif" in data.name + diff = ImageChops.difference(data.image, refimg) + d = sqrt( + sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()]) + ) / (diff.size[0] * diff.size[1]) + assert d < 0.001 # lossless compression expected @pytest.mark.enable_socket()