Skip to content

Commit

Permalink
add image replace
Browse files Browse the repository at this point in the history
  • Loading branch information
pubpub-zz committed May 20, 2023
1 parent 7a1a714 commit 2d531d0
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 6 deletions.
17 changes: 12 additions & 5 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,11 +540,18 @@ def images(self) -> List[FileImage]:
images.keys() and image.items() work
The File object properties are:
.name : name of the object
.data : bytes of the object
.image : PIL Image Object
.indirect_reference : object reference
The FileImage object:
properties:
`.name` : name of the object
`.data` : bytes of the object
`.image` : PIL Image Object
`.indirect_reference` : object reference
methods:
`.replace(new_image: PIL.Image.Image, **kwargs)` :
replace the image in the pdf with the new image
applying the saving parameters indicated (such as quality)
e.g. :
`reader.pages[0].images[0]=replace(Image.open("new_image.jpg", quality = 20)`
"""
return _VirtualListImages(self._get_ids_image, self._get_image) # type: ignore

Expand Down
42 changes: 41 additions & 1 deletion pypdf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import warnings
from codecs import getencoder
from dataclasses import dataclass
from io import DEFAULT_BUFFER_SIZE
from io import DEFAULT_BUFFER_SIZE, BytesIO
from os import SEEK_CUR
from typing import (
IO,
Expand All @@ -45,6 +45,7 @@
Pattern,
Tuple,
Union,
cast,
overload,
)

Expand Down Expand Up @@ -508,3 +509,42 @@ class FileImage(File):

image: Optional[Any] = None # optional ; direct PIL image access
indirect_reference: Optional[IndirectObject] = None # optional ; link to PdfObject

def replace(self, new_image: Any, **kwargs: Any) -> None:
"""
replace the Image with a new PIL image
This is not allowed for inline image or image in a PdfReader
kwargs allows to pass parameters to `Image.Image.save()` such as quality
"""
from PIL import Image

from ._reader import PdfReader

# to prevent circular import
from .filters import _xobj_to_image
from .generic import DictionaryObject, PdfObject

if self.indirect_reference is None:
raise TypeError("Can not update an inline image")
if not hasattr(self.indirect_reference.pdf, "_id_translated"):
raise TypeError("Can not update an image not belonging to a PdfWriter")
if not isinstance(new_image, Image.Image):
raise TypeError("new_image shall be a PIL Image")
b = BytesIO()
new_image.save(b, "PDF", **kwargs)
reader = PdfReader(b)
assert reader.pages[0].images[0].indirect_reference is not None
self.indirect_reference.pdf._objects[self.indirect_reference.idnum - 1] = (
reader.pages[0].images[0].indirect_reference.get_object()
)
cast(
PdfObject, self.indirect_reference.get_object()
).indirect_reference = self.indirect_reference
# change the object attributes
extension, byte_stream, img = _xobj_to_image(
cast(DictionaryObject, self.indirect_reference.get_object())
)
assert extension is not None
self.name = self.name[: self.name.rfind(".")] + extension
self.data = byte_stream
self.image = img
19 changes: 19 additions & 0 deletions tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from re import findall

import pytest
from PIL import ImageChops

from pypdf import PdfMerger, PdfReader, PdfWriter
from pypdf.constants import PageAttributes as PG
Expand Down Expand Up @@ -934,3 +935,21 @@ def test_fields_returning_stream():
data = BytesIO(get_pdf_from_url(url, name=name))
reader = PdfReader(data, strict=False)
assert "BtchIssQATit_time" in reader.get_form_text_fields()["TimeStampData"]


def test_replace_image(tmp_path):
writer = PdfWriter(clone_from=RESOURCE_ROOT / "labeled-edges-center-image.pdf")
reader = PdfReader(RESOURCE_ROOT / "jpeg.pdf")
img = reader.pages[0].images[0].image
writer.pages[0].images[0].replace(img)
b = BytesIO()
writer.write(b)
reader2 = PdfReader(b)
# very simple image distance evaluation
diff = ImageChops.difference(reader2.pages[0].images[0].image, img)
d = sum(diff.convert("L").getdata()) / (diff.size[0] * diff.size[1])
assert d < 1
writer.pages[0].images[0].replace(img, quality=20)
diff = ImageChops.difference(writer.pages[0].images[0].image, img)
d1 = sum(diff.convert("L").getdata()) / (diff.size[0] * diff.size[1])
assert d1 > d

0 comments on commit 2d531d0

Please sign in to comment.