Skip to content

Commit

Permalink
BUG: Fix sheared image (#2801)
Browse files Browse the repository at this point in the history
Closes #2411.
  • Loading branch information
pubpub-zz authored Aug 15, 2024
1 parent d9a8c54 commit 799630d
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pypdf/_xobj_image_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
by = 0
bit = 8 - bits
for y in range(size[1]):
if (bit != 0) and (bit != 8 - bits):
if bit != 8 - bits:
by += 1
bit = 8 - bits
for x in range(size[0]):
Expand Down
11 changes: 11 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,3 +462,14 @@ def test_extract_image_from_object(caplog):
co = reader.pages[0].get_contents()
co.decode_as_image()
assert "does not seem to be an Image" in caplog.text


@pytest.mark.enable_socket()
def test_4bits_images(caplog):
url = "https://github.com/user-attachments/files/16624406/tt.pdf"
name = "iss2411.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
url = "https://github.com/user-attachments/assets/53058564-9a28-4e4a-818f-a6528013d7dc"
name = "iss2411.png"
img = Image.open(BytesIO(get_data_from_url(url, name=name)))
assert image_similarity(reader.pages[0].images[1].image, img) == 1.0
2 changes: 1 addition & 1 deletion tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def test_get_metadata(url, name, expected_metadata):
("url", "name", "strict", "exception"),
[
(
"https://corpora.tika.apache.org/base/docs/govdocs1/938/938702.pdf",
"https://github.com/user-attachments/files/16624503/tika-938702.pdf",
"tika-938702.pdf",
False,
None, # iss #1090 is now fixed
Expand Down

0 comments on commit 799630d

Please sign in to comment.