diff --git a/pypdf/_merger.py b/pypdf/_merger.py index a52a354e3..b6a830402 100644 --- a/pypdf/_merger.py +++ b/pypdf/_merger.py @@ -25,69 +25,10 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -from io import BytesIO, FileIO, IOBase -from pathlib import Path -from types import TracebackType -from typing import ( - Any, - Dict, - Iterable, - List, - Optional, - Tuple, - Type, - Union, - cast, -) -from ._encryption import Encryption -from ._page import PageObject -from ._reader import PdfReader from ._utils import ( - StrByteType, - deprecate_with_replacement, -) -from ._writer import PdfWriter -from .constants import GoToActionArguments, TypArguments, TypFitArguments -from .constants import PagesAttributes as PA -from .generic import ( - PAGE_FIT, - ArrayObject, - Destination, - DictionaryObject, - Fit, - FloatObject, - IndirectObject, - NameObject, - NullObject, - NumberObject, - OutlineItem, - TextStringObject, - TreeObject, + deprecation_with_replacement, ) -from .pagerange import PageRange, PageRangeSpec -from .types import LayoutType, OutlineType, PagemodeType - -ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore" - - -class _MergedPage: - """Collect necessary information on each page that is being merged.""" - - def __init__(self, pagedata: PageObject, src: PdfReader, id: int) -> None: - self.src = src - self.pagedata = pagedata - self.out_pagedata = None - self.id = id - - -# transfered from _utils : as this function is only required here -# and merger will be soon deprecated -def str_(b: Any) -> str: # pragma: no cover - if isinstance(b, bytes): - return b.decode("latin-1") - else: - return str(b) # will return b.__str__() if defined class PdfMerger: @@ -97,590 +38,5 @@ class PdfMerger: .. deprecated:: 5.0.0 """ - def __init__( - self, strict: bool = False, fileobj: Union[Path, StrByteType] = "" - ) -> None: - deprecate_with_replacement("PdfMerger", "PdfWriter", "5.0.0") - self.inputs: List[Tuple[Any, PdfReader]] = [] - self.pages: List[Any] = [] - self.output: Optional[PdfWriter] = PdfWriter() - self.outline: OutlineType = [] - self.named_dests: List[Any] = [] - self.id_count = 0 - self.fileobj = fileobj - self.strict = strict - - def __enter__(self) -> "PdfMerger": - # There is nothing to do. - deprecate_with_replacement("PdfMerger", "PdfWriter", "5.0.0") - return self - - def __exit__( - self, - exc_type: Optional[Type[BaseException]], - exc: Optional[BaseException], - traceback: Optional[TracebackType], - ) -> None: - """Write to the fileobj and close the merger.""" - if self.fileobj: - self.write(self.fileobj) - self.close() - - def merge( - self, - page_number: int, - fileobj: Union[Path, StrByteType, PdfReader], - outline_item: Optional[str] = None, - pages: Optional[PageRangeSpec] = None, - import_outline: bool = True, - ) -> None: - """ - Merge the pages from the given file into the output file at the - specified page number. - - Args: - page_number: The *page number* to insert this file. File will - be inserted after the given number. - fileobj: A File Object or an object that supports the standard - read and seek methods similar to a File Object. Could also be a - string representing a path to a PDF file. - outline_item: Optionally, you may specify an outline item - (previously referred to as a 'bookmark') to be applied at the - beginning of the included file by supplying the text of the outline item. - pages: can be a :class:`PageRange` - or a ``(start, stop[, step])`` tuple - to merge only the specified range of pages from the source - document into the output document. - Can also be a list of pages to merge. - import_outline: You may prevent the source document's - outline (collection of outline items, previously referred to as - 'bookmarks') from being imported by specifying this as ``False``. - """ - stream, encryption_obj = self._create_stream(fileobj) - - # Create a new PdfReader instance using the stream - # (either file or BytesIO or StringIO) created above - reader = PdfReader(stream, strict=self.strict) # type: ignore[arg-type] - self.inputs.append((stream, reader)) - if encryption_obj is not None: - reader._encryption = encryption_obj - - # Find the range of pages to merge. - if pages is None: - pages = (0, len(reader.pages)) - elif isinstance(pages, PageRange): - pages = pages.indices(len(reader.pages)) - elif isinstance(pages, list): - pass - elif not isinstance(pages, tuple): - raise TypeError('"pages" must be a tuple of (start, stop[, step])') - - srcpages = [] - - outline = [] - if import_outline: - outline = reader.outline - outline = self._trim_outline(reader, outline, pages) - - if outline_item: - outline_item_typ = OutlineItem( - TextStringObject(outline_item), - NumberObject(self.id_count), - Fit.fit(), - ) - self.outline += [outline_item_typ, outline] # type: ignore - else: - self.outline += outline - - dests = reader.named_destinations - trimmed_dests = self._trim_dests(reader, dests, pages) - self.named_dests += trimmed_dests - - # Gather all the pages that are going to be merged - for i in range(*pages): - page = reader.pages[i] - - id = self.id_count - self.id_count += 1 - - mp = _MergedPage(page, reader, id) - - srcpages.append(mp) - - self._associate_dests_to_pages(srcpages) - self._associate_outline_items_to_pages(srcpages) - - # Slice to insert the pages at the specified page_number - self.pages[page_number:page_number] = srcpages - - def _create_stream( - self, fileobj: Union[Path, StrByteType, PdfReader] - ) -> Tuple[IOBase, Optional[Encryption]]: - # If the fileobj parameter is a string, assume it is a path - # and create a file object at that location. If it is a file, - # copy the file's contents into a BytesIO stream object; if - # it is a PdfReader, copy that reader's stream into a - # BytesIO stream. - # If fileobj is none of the above types, it is not modified - encryption_obj = None - stream: IOBase - if isinstance(fileobj, (str, Path)): - stream = FileIO(fileobj, "rb") - elif isinstance(fileobj, PdfReader): - if fileobj._encryption: - encryption_obj = fileobj._encryption - orig_tell = fileobj.stream.tell() - fileobj.stream.seek(0) - stream = BytesIO(fileobj.stream.read()) - - # reset the stream to its original location - fileobj.stream.seek(orig_tell) - elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"): - fileobj.seek(0) - file_content = fileobj.read() - stream = BytesIO(file_content) - else: - raise NotImplementedError( - "PdfMerger.merge requires an object that PdfReader can parse. " - "Typically, that is a Path or a string representing a Path, " - "a file object, or an object implementing .seek and .read. " - "Passing a PdfReader directly works as well." - ) - return stream, encryption_obj - - def append( - self, - fileobj: Union[StrByteType, PdfReader, Path], - outline_item: Optional[str] = None, - pages: Union[ - None, PageRange, Tuple[int, int], Tuple[int, int, int], List[int] - ] = None, - import_outline: bool = True, - ) -> None: - """ - Identical to the :meth:`merge()` method, but assumes you want to - concatenate all pages onto the end of the file instead of specifying a - position. - - Args: - fileobj: A File Object or an object that supports the standard - read and seek methods similar to a File Object. Could also be a - string representing a path to a PDF file. - outline_item: Optionally, you may specify an outline item - (previously referred to as a 'bookmark') to be applied at the - beginning of the included file by supplying the text of the outline item. - pages: can be a :class:`PageRange` - or a ``(start, stop[, step])`` tuple - to merge only the specified range of pages from the source - document into the output document. - Can also be a list of pages to append. - import_outline: You may prevent the source document's - outline (collection of outline items, previously referred to as - 'bookmarks') from being imported by specifying this as ``False``. - """ - self.merge(len(self.pages), fileobj, outline_item, pages, import_outline) - - def write(self, fileobj: Union[Path, StrByteType]) -> None: - """ - Write all data that has been merged to the given output file. - - Args: - fileobj: Output file. Can be a filename or any kind of - file-like object. - """ - if self.output is None: - raise RuntimeError(ERR_CLOSED_WRITER) - - # Add pages to the PdfWriter - # The commented out line below was replaced with the two lines below it - # to allow PdfMerger to work with PyPdf 1.13 - for page in self.pages: - self.output.add_page(page.pagedata) - pages_obj = cast(Dict[str, Any], self.output._pages.get_object()) - page.out_pagedata = self.output.get_reference( - pages_obj[PA.KIDS][-1].get_object() - ) - - # Once all pages are added, create outline items to point at those pages - self._write_dests() - self._write_outline() - - # Write the output to the file - my_file, ret_fileobj = self.output.write(fileobj) - - if my_file: - ret_fileobj.close() - - def close(self) -> None: - """Shut all file descriptors (input and output) and clear all memory usage.""" - self.pages = [] - for file_descriptor, _reader in self.inputs: - file_descriptor.close() - - self.inputs = [] - self.output = None - - def add_metadata(self, infos: Dict[str, Any]) -> None: - """ - Add custom metadata to the output. - - Args: - infos: a Python dictionary where each key is a field - and each value is your new metadata. - An example is ``{'/Title': 'My title'}`` - """ - if self.output is None: - raise RuntimeError(ERR_CLOSED_WRITER) - self.output.add_metadata(infos) - - def set_page_layout(self, layout: LayoutType) -> None: - """ - Set the page layout. - - Args: - layout: The page layout to be used - - .. list-table:: Valid ``layout`` arguments - :widths: 50 200 - - * - /NoLayout - - Layout explicitly not specified - * - /SinglePage - - Show one page at a time - * - /OneColumn - - Show one column at a time - * - /TwoColumnLeft - - Show pages in two columns, odd-numbered pages on the left - * - /TwoColumnRight - - Show pages in two columns, odd-numbered pages on the right - * - /TwoPageLeft - - Show two pages at a time, odd-numbered pages on the left - * - /TwoPageRight - - Show two pages at a time, odd-numbered pages on the right - """ - if self.output is None: - raise RuntimeError(ERR_CLOSED_WRITER) - self.output._set_page_layout(layout) - - def set_page_mode(self, mode: PagemodeType) -> None: - """ - Set the page mode. - - Args: - mode: The page mode to use. - - .. list-table:: Valid ``mode`` arguments - :widths: 50 200 - - * - /UseNone - - Do not show outline or thumbnails panels - * - /UseOutlines - - Show outline (aka bookmarks) panel - * - /UseThumbs - - Show page thumbnails panel - * - /FullScreen - - Fullscreen view - * - /UseOC - - Show Optional Content Group (OCG) panel - * - /UseAttachments - - Show attachments panel - """ - self.page_mode = mode - - @property - def page_mode(self) -> Optional[PagemodeType]: - """ - Set the page mode. - - Args: - mode: The page mode to use. - - .. list-table:: Valid ``mode`` arguments - :widths: 50 200 - - * - /UseNone - - Do not show outline or thumbnails panels - * - /UseOutlines - - Show outline (aka bookmarks) panel - * - /UseThumbs - - Show page thumbnails panel - * - /FullScreen - - Fullscreen view - * - /UseOC - - Show Optional Content Group (OCG) panel - * - /UseAttachments - - Show attachments panel - """ - if self.output is None: - raise RuntimeError(ERR_CLOSED_WRITER) - return self.output.page_mode - - @page_mode.setter - def page_mode(self, mode: PagemodeType) -> None: - if self.output is None: - raise RuntimeError(ERR_CLOSED_WRITER) - self.output.page_mode = mode - - def _trim_dests( - self, - pdf: PdfReader, - dests: Dict[str, Dict[str, Any]], - pages: Union[Tuple[int, int], Tuple[int, int, int], List[int]], - ) -> List[Dict[str, Any]]: - """ - Remove named destinations that are not a part of the specified page set. - - Args: - pdf: - dests: - pages: - """ - new_dests = [] - lst = pages if isinstance(pages, list) else list(range(*pages)) - for key, obj in dests.items(): - for j in lst: - if pdf.pages[j].get_object() == obj["/Page"].get_object(): - obj[NameObject("/Page")] = obj["/Page"].get_object() - assert str_(key) == str_(obj["/Title"]) - new_dests.append(obj) - break - return new_dests - - def _trim_outline( - self, - pdf: PdfReader, - outline: OutlineType, - pages: Union[Tuple[int, int], Tuple[int, int, int], List[int]], - ) -> OutlineType: - """ - Remove outline item entries that are not a part of the specified page set. - - Args: - pdf: - outline: - pages: - - Returns: - An outline type - """ - new_outline = [] - prev_header_added = True - lst = pages if isinstance(pages, list) else list(range(*pages)) - for i, outline_item in enumerate(outline): - if isinstance(outline_item, list): - sub = self._trim_outline(pdf, outline_item, lst) # type: ignore - if sub: - if not prev_header_added: - new_outline.append(outline[i - 1]) - new_outline.append(sub) # type: ignore - else: - prev_header_added = False - for j in lst: - if outline_item["/Page"] is None: - continue - if pdf.pages[j].get_object() == outline_item["/Page"].get_object(): - outline_item[NameObject("/Page")] = outline_item[ - "/Page" - ].get_object() - new_outline.append(outline_item) - prev_header_added = True - break - return new_outline - - def _write_dests(self) -> None: - if self.output is None: - raise RuntimeError(ERR_CLOSED_WRITER) - for named_dest in self.named_dests: - page_index = None - if "/Page" in named_dest: # deprecated - for page_index, page in enumerate(self.pages): # noqa: B007 - if page.id == named_dest["/Page"]: - named_dest[NameObject("/Page")] = page.out_pagedata - break - - if page_index is not None: # deprecated - self.output.add_named_destination_object(named_dest) - - def _write_outline( - self, - outline: Optional[Iterable[OutlineItem]] = None, - parent: Optional[TreeObject] = None, - ) -> None: - if self.output is None: - raise RuntimeError(ERR_CLOSED_WRITER) - if outline is None: - outline = self.outline # type: ignore - assert outline is not None, "hint for mypy" # TODO: is that true? - - last_added = None - for outline_item in outline: - if isinstance(outline_item, list): - self._write_outline(outline_item, last_added) - continue - - page_no = None - if "/Page" in outline_item: - for page_no, page in enumerate(self.pages): # noqa: B007 - if page.id == outline_item["/Page"]: - self._write_outline_item_on_page(outline_item, page) - break - if page_no is not None: - del outline_item["/Page"], outline_item["/Type"] - last_added = self.output.add_outline_item_dict(outline_item, parent) - - def _write_outline_item_on_page( - self, outline_item: Union[OutlineItem, Destination], page: _MergedPage - ) -> None: - oi_type = cast(str, outline_item["/Type"]) - args = [NumberObject(page.id), NameObject(oi_type)] - fit2arg_keys: Dict[str, Tuple[str, ...]] = { - TypFitArguments.FIT_H: (TypArguments.TOP,), - TypFitArguments.FIT_BH: (TypArguments.TOP,), - TypFitArguments.FIT_V: (TypArguments.LEFT,), - TypFitArguments.FIT_BV: (TypArguments.LEFT,), - TypFitArguments.XYZ: (TypArguments.LEFT, TypArguments.TOP, "/Zoom"), - TypFitArguments.FIT_R: ( - TypArguments.LEFT, - TypArguments.BOTTOM, - TypArguments.RIGHT, - TypArguments.TOP, - ), - } - for arg_key in fit2arg_keys.get(oi_type, ()): - if arg_key in outline_item and not isinstance( - outline_item[arg_key], NullObject - ): - args.append(FloatObject(outline_item[arg_key])) - else: - args.append(FloatObject(0)) - del outline_item[arg_key] - - outline_item[NameObject("/A")] = DictionaryObject( - { - NameObject(GoToActionArguments.S): NameObject("/GoTo"), - NameObject(GoToActionArguments.D): ArrayObject(args), - } - ) - - def _associate_dests_to_pages(self, pages: List[_MergedPage]) -> None: - for named_dest in self.named_dests: - page_index = None - np = named_dest["/Page"] - - if isinstance(np, NumberObject): - continue - - for page in pages: - if np.get_object() == page.pagedata.get_object(): - page_index = page.id - - if page_index is None: # deprecated - raise ValueError( - f"Unresolved named destination '{named_dest['/Title']}'" - ) - named_dest[NameObject("/Page")] = NumberObject(page_index) - - def _associate_outline_items_to_pages( - self, pages: List[_MergedPage], outline: Optional[Iterable[OutlineItem]] = None - ) -> None: - if outline is None: - outline = self.outline # type: ignore # TODO: self.bookmarks can be None! - assert outline is not None, "hint for mypy" - for outline_item in outline: - if isinstance(outline_item, list): - self._associate_outline_items_to_pages(pages, outline_item) - continue - - page_index = None - outline_item_page = outline_item["/Page"] - - if isinstance(outline_item_page, NumberObject): - continue - - for p in pages: - if outline_item_page.get_object() == p.pagedata.get_object(): - page_index = p.id - - if page_index is not None: - outline_item[NameObject("/Page")] = NumberObject(page_index) - - def find_outline_item( - self, - outline_item: Dict[str, Any], - root: Optional[OutlineType] = None, - ) -> Optional[List[int]]: - if root is None: - root = self.outline - - for i, oi_enum in enumerate(root): - if isinstance(oi_enum, list): - # oi_enum is still an inner node - # (OutlineType, if recursive types were supported by mypy) - res = self.find_outline_item(outline_item, oi_enum) # type: ignore - if res: # deprecated - return [i] + res - elif ( - oi_enum == outline_item - or cast(Dict[Any, Any], oi_enum["/Title"]) == outline_item - ): - # we found a leaf node - return [i] - - return None - - def add_outline_item( - self, - title: str, - page_number: int, - parent: Union[None, TreeObject, IndirectObject] = None, - color: Optional[Tuple[float, float, float]] = None, - bold: bool = False, - italic: bool = False, - fit: Fit = PAGE_FIT, - ) -> IndirectObject: - """ - Add an outline item (commonly referred to as a "Bookmark") to this PDF file. - - Args: - title: Title to use for this outline item. - page_number: Page number this outline item will point to. - parent: A reference to a parent outline item to create nested - outline items. - color: Color of the outline item's font as a red, green, blue tuple - from 0.0 to 1.0 - bold: Outline item font is bold - italic: Outline item font is italic - fit: The fit of the destination page. - """ - writer = self.output - if writer is None: - raise RuntimeError(ERR_CLOSED_WRITER) - return writer.add_outline_item( - title, - page_number, - parent, - None, - color, - bold, - italic, - fit, - ) - - def add_named_destination( - self, - title: str, - page_number: int, - ) -> None: - """ - Add a destination to the output. - - Args: - title: Title to use - page_number: Page number this destination points at. - """ - dest = Destination( - TextStringObject(title), - NumberObject(page_number), - Fit.fit_horizontally(top=826), - ) - self.named_dests.append(dest) + def __init__(self) -> None: + deprecation_with_replacement("PdfMerger", "PdfWriter", "5.0.0") diff --git a/pypdf/_writer.py b/pypdf/_writer.py index edcd391e4..1e6cb9e26 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -63,7 +63,7 @@ StreamType, _get_max_pdf_version_header, deprecate, - deprecate_with_replacement, + deprecation_with_replacement, logger_warning, ) from .constants import AnnotationDictionaryAttributes as AA @@ -1847,6 +1847,7 @@ def add_outline_item_dict( outline_item_object = TreeObject() outline_item_object.update(outline_item) + """code currently unreachable if "/A" in outline_item: action = DictionaryObject() a_dict = cast(DictionaryObject, outline_item["/A"]) @@ -1854,7 +1855,7 @@ def add_outline_item_dict( action[NameObject(str(k))] = v action_ref = self._add_object(action) outline_item_object[NameObject("/A")] = action_ref - + """ return self.add_outline_item_destination( outline_item_object, parent, before, is_open ) @@ -2541,7 +2542,7 @@ def _create_stream( stream = BytesIO(filecontent) else: raise NotImplementedError( - "PdfMerger.merge requires an object that PdfReader can parse. " + "Merging requires an object that PdfReader can parse. " "Typically, that is a Path or a string representing a Path, " "a file object, or an object implementing .seek and .read. " "Passing a PdfReader directly works as well." @@ -2894,14 +2895,12 @@ def add_filtered_articles( def _get_cloned_page( self, - page: Union[None, int, IndirectObject, PageObject, NullObject], + page: Union[None, IndirectObject, PageObject, NullObject], pages: Dict[int, PageObject], reader: PdfReader, ) -> Optional[IndirectObject]: if isinstance(page, NullObject): return None - if isinstance(page, int): - _i = reader.pages[page].indirect_reference elif isinstance(page, DictionaryObject) and page.get("/Type", "") == "/Page": _i = page.indirect_reference elif isinstance(page, IndirectObject): @@ -3084,13 +3083,12 @@ def find_bookmark( self, outline_item: Dict[str, Any], root: Optional[OutlineType] = None, - ) -> Optional[List[int]]: # deprecated + ) -> None: # deprecated """ .. deprecated:: 2.9.0 Use :meth:`find_outline_item` instead. """ - deprecate_with_replacement("find_bookmark", "find_outline_item", "5.0.0") - return self.find_outline_item(outline_item, root) + deprecation_with_replacement("find_bookmark", "find_outline_item", "5.0.0") def reset_translation( self, reader: Union[None, PdfReader, IndirectObject] = None diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 98a222483..580b8bf58 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -50,7 +50,7 @@ class MarkupAnnotation(AnnotationDictionary, ABC): def __init__(self, *, title_bar: Optional[str] = None): if title_bar is not None: - self[NameObject("T")] = TextStringObject(title_bar) + self[NameObject("/T")] = TextStringObject(title_bar) class Text(MarkupAnnotation): diff --git a/pypdf/constants.py b/pypdf/constants.py index d7a8e310f..89fb55359 100644 --- a/pypdf/constants.py +++ b/pypdf/constants.py @@ -14,8 +14,6 @@ from enum import IntFlag, auto from typing import Dict, Tuple -from ._utils import classproperty, deprecate_with_replacement - class Core: """Keywords that don't quite belong anywhere else.""" @@ -162,46 +160,6 @@ class Ressources: # deprecated .. deprecated:: 5.0.0 """ - @classproperty - def EXT_G_STATE(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/ExtGState" # dictionary, optional - - @classproperty - def COLOR_SPACE(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/ColorSpace" # dictionary, optional - - @classproperty - def PATTERN(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/Pattern" # dictionary, optional - - @classproperty - def SHADING(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/Shading" # dictionary, optional - - @classproperty - def XOBJECT(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/XObject" # dictionary, optional - - @classproperty - def FONT(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/Font" # dictionary, optional - - @classproperty - def PROC_SET(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/ProcSet" # array, optional - - @classproperty - def PROPERTIES(cls) -> str: # noqa: N805 - deprecate_with_replacement("Ressources", "Resources", "5.0.0") - return "/Properties" # dictionary, optional - class PagesAttributes: """§7.7.3.2 of the 1.7 and 2.0 reference.""" diff --git a/pypdf/filters.py b/pypdf/filters.py index 43730cc8e..7589c8051 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -44,7 +44,6 @@ from ._utils import ( WHITESPACES_AS_BYTES, deprecate, - deprecate_with_replacement, deprecation_no_replacement, logger_warning, ord_, @@ -118,9 +117,6 @@ def decode( Raises: PdfReadError: """ - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] if isinstance(decode_parms, ArrayObject): raise DeprecationError("decode_parms as ArrayObject is depreciated") @@ -611,9 +607,6 @@ def decode( **kwargs: Any, ) -> bytes: # decode_parms is unused here - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] if isinstance(decode_parms, ArrayObject): # deprecated deprecation_no_replacement( "decode_parms being an ArrayObject", removed_in="3.15.5" @@ -729,12 +722,6 @@ def decode_stream_data(stream: Any) -> bytes: # utils.StreamObject return data -def decodeStreamData(stream: Any) -> Union[str, bytes]: # deprecated - """Deprecated. Use decode_stream_data.""" - deprecate_with_replacement("decodeStreamData", "decode_stream_data", "4.0.0") - return decode_stream_data(stream) - - def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes, Any]: """ Users need to have the pillow package installed. diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py index 48045e0a6..63ccf1bdc 100644 --- a/pypdf/generic/__init__.py +++ b/pypdf/generic/__init__.py @@ -29,9 +29,11 @@ __author__ = "Mathieu Fenniak" __author_email__ = "biziqe@mathieu.fenniak.net" -from typing import Dict, List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union -from .._utils import StreamType, deprecate_with_replacement +from .._utils import ( + deprecation_with_replacement, +) from ..constants import OutlineFontFlag from ._base import ( BooleanObject, @@ -69,41 +71,10 @@ ) from ._viewerpref import ViewerPreferences - -def readHexStringFromStream( - stream: StreamType, -) -> Union["TextStringObject", "ByteStringObject"]: # deprecated - """Deprecated, use read_hex_string_from_stream.""" - deprecate_with_replacement( - "readHexStringFromStream", "read_hex_string_from_stream", "4.0.0" - ) - return read_hex_string_from_stream(stream) - - -def readStringFromStream( - stream: StreamType, - forced_encoding: Union[None, str, List[str], Dict[int, str]] = None, -) -> Union["TextStringObject", "ByteStringObject"]: # deprecated - """Deprecated, use read_string_from_stream.""" - deprecate_with_replacement( - "readStringFromStream", "read_string_from_stream", "4.0.0" - ) - return read_string_from_stream(stream, forced_encoding) - - -def createStringObject( - string: Union[str, bytes], - forced_encoding: Union[None, str, List[str], Dict[int, str]] = None, -) -> Union[TextStringObject, ByteStringObject]: # deprecated - """Deprecated, use create_string_object.""" - deprecate_with_replacement("createStringObject", "create_string_object", "4.0.0") - return create_string_object(string, forced_encoding) - - PAGE_FIT = Fit.fit() -class AnnotationBuilder: +class AnnotationBuilder: # deprecated """ The AnnotationBuilder is deprecated. @@ -121,26 +92,10 @@ def text( text: str, open: bool = False, flags: int = 0, - ) -> DictionaryObject: - """ - Add text annotation. - - Args: - rect: array of four integers ``[xLL, yLL, xUR, yUR]`` - specifying the clickable rectangular area - text: The text that is added to the document - open: - flags: - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.text", "pypdf.annotations.Text", "4.0.0" ) - from ..annotations import Text - - return Text(rect=rect, text=text, open=open, flags=flags) @staticmethod def free_text( @@ -153,43 +108,10 @@ def free_text( font_color: str = "000000", border_color: Optional[str] = "000000", background_color: Optional[str] = "ffffff", - ) -> DictionaryObject: - """ - Add text in a rectangle to a page. - - Args: - text: Text to be added - rect: array of four integers ``[xLL, yLL, xUR, yUR]`` - specifying the clickable rectangular area - font: Name of the Font, e.g. 'Helvetica' - bold: Print the text in bold - italic: Print the text in italic - font_size: How big the text will be, e.g. '14pt' - font_color: Hex-string for the color, e.g. cdcdcd - border_color: Hex-string for the border color, e.g. cdcdcd. - Use ``None`` for no border. - background_color: Hex-string for the background of the annotation, - e.g. cdcdcd. Use ``None`` for transparent background. - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.free_text", "pypdf.annotations.FreeText", "4.0.0" ) - from ..annotations import FreeText - - return FreeText( - text=text, - rect=rect, - font=font, - bold=bold, - italic=italic, - font_size=font_size, - font_color=font_color, - background_color=background_color, - border_color=border_color, - ) @staticmethod def popup( @@ -198,34 +120,10 @@ def popup( flags: int = 0, parent: Optional[DictionaryObject] = None, open: bool = False, - ) -> DictionaryObject: - """ - Add a popup to the document. - - Args: - rect: - Specifies the clickable rectangular area as `[xLL, yLL, xUR, yUR]` - flags: - 1 - invisible, 2 - hidden, 3 - print, 4 - no zoom, - 5 - no rotate, 6 - no view, 7 - read only, 8 - locked, - 9 - toggle no view, 10 - locked contents - open: - Whether the popup should be shown directly (default is False). - parent: - The contents of the popup. Create this via the AnnotationBuilder. - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.popup", "pypdf.annotations.Popup", "4.0.0" ) - from ..annotations import Popup - - popup = Popup(rect=rect, open=open, parent=parent) - popup.flags = flags # type: ignore - - return popup @staticmethod def line( @@ -234,74 +132,27 @@ def line( rect: Union[RectangleObject, Tuple[float, float, float, float]], text: str = "", title_bar: Optional[str] = None, - ) -> DictionaryObject: - """ - Draw a line on the PDF. - - Args: - p1: First point - p2: Second point - rect: array of four integers ``[xLL, yLL, xUR, yUR]`` - specifying the clickable rectangular area - text: Text to be displayed as the line annotation - title_bar: Text to be displayed in the title bar of the - annotation; by convention this is the name of the author - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.line", "pypdf.annotations.Line", "4.0.0" ) - from ..annotations import Line - - return Line(p1=p1, p2=p2, rect=rect, text=text, title_bar=title_bar) @staticmethod def polyline( vertices: List[Tuple[float, float]], - ) -> DictionaryObject: - """ - Draw a polyline on the PDF. - - Args: - vertices: Array specifying the vertices (x, y) coordinates of the poly-line. - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.polyline", "pypdf.annotations.PolyLine", "4.0.0" ) - from ..annotations import PolyLine - - return PolyLine(vertices=vertices) @staticmethod def rectangle( rect: Union[RectangleObject, Tuple[float, float, float, float]], interiour_color: Optional[str] = None, - ) -> DictionaryObject: - """ - Draw a rectangle on the PDF. - - This method uses the /Square annotation type of the PDF format. - - Args: - rect: array of four integers ``[xLL, yLL, xUR, yUR]`` - specifying the clickable rectangular area - interiour_color: None or hex-string for the color, e.g. cdcdcd - If None is used, the interiour is transparent. - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.rectangle", "pypdf.annotations.Rectangle", "4.0.0" ) - from ..annotations import Rectangle - - return Rectangle(rect=rect, interiour_color=interiour_color) @staticmethod def highlight( @@ -310,65 +161,25 @@ def highlight( quad_points: ArrayObject, highlight_color: str = "ff0000", printing: bool = False, - ) -> DictionaryObject: - """ - Add a highlight annotation to the document. - - Args: - rect: Array of four integers ``[xLL, yLL, xUR, yUR]`` - specifying the highlighted area - quad_points: An ArrayObject of 8 FloatObjects. Must match a word or - a group of words, otherwise no highlight will be shown. - highlight_color: The color used for the highlight. - printing: Whether to print out the highlight annotation when the page - is printed. - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.highlight", "pypdf.annotations.Highlight", "4.0.0" ) - from ..annotations import Highlight - - return Highlight( - rect=rect, quad_points=quad_points, highlight_color=highlight_color, printing=printing - ) @staticmethod def ellipse( rect: Union[RectangleObject, Tuple[float, float, float, float]], interiour_color: Optional[str] = None, - ) -> DictionaryObject: - """ - Draw an ellipse on the PDF. - - This method uses the /Circle annotation type of the PDF format. - - Args: - rect: array of four integers ``[xLL, yLL, xUR, yUR]`` specifying - the bounding box of the ellipse - interiour_color: None or hex-string for the color, e.g. cdcdcd - If None is used, the interiour is transparent. - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.ellipse", "pypdf.annotations.Ellipse", "4.0.0" ) - from ..annotations import Ellipse - - return Ellipse(rect=rect, interiour_color=interiour_color) @staticmethod - def polygon(vertices: List[Tuple[float, float]]) -> DictionaryObject: - deprecate_with_replacement( + def polygon(vertices: List[Tuple[float, float]]) -> None: + deprecation_with_replacement( "AnnotationBuilder.polygon", "pypdf.annotations.Polygon", "4.0.0" ) - from ..annotations import Polygon - - return Polygon(vertices=vertices) from ._fit import DEFAULT_FIT @@ -379,45 +190,10 @@ def link( url: Optional[str] = None, target_page_index: Optional[int] = None, fit: Fit = DEFAULT_FIT, - ) -> DictionaryObject: - """ - Add a link to the document. - - The link can either be an external link or an internal link. - - An external link requires the URL parameter. - An internal link requires the target_page_index, fit, and fit args. - - Args: - rect: array of four integers ``[xLL, yLL, xUR, yUR]`` - specifying the clickable rectangular area - border: if provided, an array describing border-drawing - properties. See the PDF spec for details. No border will be - drawn if this argument is omitted. - - horizontal corner radius, - - vertical corner radius, and - - border width - - Optionally: Dash - url: Link to a website (if you want to make an external link) - target_page_index: index of the page to which the link should go - (if you want to make an internal link) - fit: Page fit or 'zoom' option. - - Returns: - A dictionary object representing the annotation. - """ - deprecate_with_replacement( + ) -> None: + deprecation_with_replacement( "AnnotationBuilder.link", "pypdf.annotations.Link", "4.0.0" ) - from ..annotations import Link - - return Link( - rect=rect, - border=border, - url=url, - target_page_index=target_page_index, - fit=fit, - ) __all__ = [ diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 08bc2806d..215f2c75e 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -52,8 +52,8 @@ from .._utils import ( WHITESPACES, StreamType, - deprecate_no_replacement, - deprecate_with_replacement, + deprecation_no_replacement, + deprecation_with_replacement, logger_warning, read_non_whitespace, read_until_regex, @@ -207,7 +207,7 @@ def write_to_stream( self, stream: StreamType, encryption_key: Union[None, str, bytes] = None ) -> None: if encryption_key is not None: # deprecated - deprecate_no_replacement( + deprecation_no_replacement( "the encryption_key parameter of write_to_stream", "5.0.0" ) stream.write(b"[") @@ -464,7 +464,7 @@ def write_to_stream( self, stream: StreamType, encryption_key: Union[None, str, bytes] = None ) -> None: if encryption_key is not None: # deprecated - deprecate_no_replacement( + deprecation_no_replacement( "the encryption_key parameter of write_to_stream", "5.0.0" ) stream.write(b"<<\n") @@ -634,10 +634,6 @@ def __init__(self, dct: Optional[DictionaryObject] = None) -> None: if dct: self.update(dct) - def hasChildren(self) -> bool: # deprecated - deprecate_with_replacement("hasChildren", "has_children", "4.0.0") - return self.has_children() - def has_children(self) -> bool: return "/First" in self @@ -827,10 +823,6 @@ def remove_from_tree(self) -> None: else: cast("TreeObject", self["/Parent"]).remove_child(self) - def emptyTree(self) -> None: # deprecated - deprecate_with_replacement("emptyTree", "empty_tree", "4.0.0") - self.empty_tree() - def empty_tree(self) -> None: for child in self: child_obj = child.get_object() @@ -921,7 +913,7 @@ def write_to_stream( self, stream: StreamType, encryption_key: Union[None, str, bytes] = None ) -> None: if encryption_key is not None: # deprecated - deprecate_no_replacement( + deprecation_no_replacement( "the encryption_key parameter of write_to_stream", "5.0.0" ) self[NameObject(SA.LENGTH)] = NumberObject(len(self._data)) @@ -932,13 +924,10 @@ def write_to_stream( stream.write(b"\nendstream") @staticmethod - def initializeFromDictionary( - data: Dict[str, Any] - ) -> Union["EncodedStreamObject", "DecodedStreamObject"]: - deprecate_with_replacement( + def initializeFromDictionary(data: Dict[str, Any]) -> None: + deprecation_with_replacement( "initializeFromDictionary", "initialize_from_dictionary", "5.0.0" ) # pragma: no cover - return StreamObject.initialize_from_dictionary(data) # pragma: no cover @staticmethod def initialize_from_dictionary( @@ -1041,7 +1030,7 @@ def get_data(self) -> bytes: return decoded.get_data() # This overrides the parent method: - def set_data(self, data: bytes) -> None: # deprecated + def set_data(self, data: bytes) -> None: from ..filters import FlateDecode if self.get(SA.FILTER, "") in (FT.FLATE_DECODE, [FT.FLATE_DECODE]): @@ -1561,7 +1550,7 @@ def write_to_stream( self, stream: StreamType, encryption_key: Union[None, str, bytes] = None ) -> None: if encryption_key is not None: # deprecated - deprecate_no_replacement( + deprecation_no_replacement( "the encryption_key parameter of write_to_stream", "5.0.0" ) stream.write(b"<<\n") diff --git a/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf b/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf index 8a04001dd..99da2bb86 100644 Binary files a/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf and b/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf differ diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 2064ed402..f6d14c5e4 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -2,8 +2,23 @@ from pathlib import Path +import pytest + from pypdf import PdfReader, PdfWriter -from pypdf.annotations import Ellipse, FreeText, Rectangle, Text +from pypdf.annotations import ( + AnnotationDictionary, + Ellipse, + FreeText, + Highlight, + Line, + Link, + Polygon, + PolyLine, + Popup, + Rectangle, + Text, +) +from pypdf.generic import ArrayObject, FloatObject, NumberObject TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent @@ -19,6 +34,12 @@ def test_ellipse_annotation(pdf_file_path): writer.add_page(page) # Act + with pytest.warns(DeprecationWarning): + ellipse_annotation = Ellipse( + rect=(50, 550, 500, 650), + interiour_color="ff0000", + ) + ellipse_annotation = Ellipse( rect=(50, 550, 500, 650), interior_color="ff0000", @@ -109,3 +130,250 @@ def test_free_text_annotation(pdf_file_path): # Assert: You need to inspect the file manually with open(pdf_file_path, "wb") as fp: writer.write(fp) + + +def test_annotationdictionary(): + a = AnnotationDictionary() + a.flags = 123 + assert a.flags == 123 + + +def test_polygon(pdf_file_path): + # Arrange + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + with pytest.raises(ValueError): + Polygon( + vertices=[], + ) + + annotation = Polygon( + vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], + ) + writer.add_annotation(0, annotation) + + # Assert: You need to inspect the file manually + with open(pdf_file_path, "wb") as fp: + writer.write(fp) + + +def test_polyline(pdf_file_path): + # Arrange + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + with pytest.raises(ValueError): + PolyLine( + vertices=[], + ) + + annotation = PolyLine( + vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], + ) + writer.add_annotation(0, annotation) + + # Assert: You need to inspect the file manually + with open(pdf_file_path, "wb") as fp: + writer.write(fp) + + +def test_line(pdf_file_path): + # Arrange + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + # Act + line_annotation = Line( + text="Hello World\nLine2", + rect=(50, 550, 200, 650), + p1=(50, 550), + p2=(200, 650), + ) + writer.add_annotation(0, line_annotation) + + # Assert: You need to inspect the file manually + with open(pdf_file_path, "wb") as fp: + writer.write(fp) + + +def test_square(pdf_file_path): + # Arrange + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + # Act + with pytest.warns(DeprecationWarning): + square_annotation = Rectangle( + rect=(50, 550, 200, 650), interiour_color="ff0000" + ) + + square_annotation = Rectangle(rect=(50, 550, 200, 650), interior_color="ff0000") + writer.add_annotation(0, square_annotation) + + square_annotation = Rectangle( + rect=(40, 400, 150, 450), + ) + writer.add_annotation(0, square_annotation) + + # Assert: You need to inspect the file manually + with open(pdf_file_path, "wb") as fp: + writer.write(fp) + + +def test_highlight(pdf_file_path): + # Arrange + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + # Act + highlight_annotation = Highlight( + rect=(95.79332, 704.31777, 138.55779, 724.6855), + highlight_color="ff0000", + quad_points=ArrayObject( + [ + FloatObject(100.060779), + FloatObject(723.55398), + FloatObject(134.29033), + FloatObject(723.55398), + FloatObject(100.060779), + FloatObject(705.4493), + FloatObject(134.29033), + FloatObject(705.4493), + ] + ), + printing=False, + ) + writer.add_annotation(0, highlight_annotation) + for annot in writer.pages[0]["/Annots"]: + obj = annot.get_object() + subtype = obj["/Subtype"] + if subtype == "/Highlight": + assert "/F" not in obj or obj["/F"] == NumberObject(0) + + writer.add_page(page) + # Act + highlight_annotation = Highlight( + rect=(95.79332, 704.31777, 138.55779, 724.6855), + highlight_color="ff0000", + quad_points=ArrayObject( + [ + FloatObject(100.060779), + FloatObject(723.55398), + FloatObject(134.29033), + FloatObject(723.55398), + FloatObject(100.060779), + FloatObject(705.4493), + FloatObject(134.29033), + FloatObject(705.4493), + ] + ), + printing=True, + ) + writer.add_annotation(1, highlight_annotation) + for annot in writer.pages[1]["/Annots"]: + obj = annot.get_object() + subtype = obj["/Subtype"] + if subtype == "/Highlight": + assert obj["/F"] == NumberObject(4) + + # Assert: You need to inspect the file manually + with open(pdf_file_path, "wb") as fp: + writer.write(fp) + + +def test_link(pdf_file_path): + # Arrange + pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + # Act + # Part 1: Too many args + with pytest.raises(ValueError): + Link( + rect=(50, 550, 200, 650), + url="https://martin-thoma.com/", + target_page_index=3, + ) + + # Part 2: Too few args + with pytest.raises(ValueError): + Link( + rect=(50, 550, 200, 650), + ) + + # Part 3: External Link + link_annotation = Link( + rect=(50, 50, 100, 100), + url="https://martin-thoma.com/", + border=[1, 0, 6, [3, 2]], + ) + writer.add_annotation(0, link_annotation) + + # Part 4: Internal Link + link_annotation = Link( + rect=(100, 100, 300, 200), + target_page_index=1, + border=[50, 10, 4], + ) + writer.add_annotation(0, link_annotation) + + for page in reader.pages[1:]: + writer.add_page(page) + + # Assert: You need to inspect the file manually + with open(pdf_file_path, "wb") as fp: + writer.write(fp) + + +def test_popup(caplog): + # Arrange + pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + # Act + text_annotation = Text( + title_bar="hello world", + text="Hello World\nThis is the second line!", + rect=(50, 550, 200, 650), + open=True, + ) + ta = writer.add_annotation(0, text_annotation) + popup_annotation = Popup( + rect=(50, 550, 200, 650), + open=True, + parent=ta, # prefer to use for evolutivity + ) + writer.add_annotation(writer.pages[0], popup_annotation) + + Popup( + rect=(50, 550, 200, 650), + open=True, + parent=True, # broken parameter # type: ignore + ) + assert "Unregistered Parent object : No Parent field set" in caplog.text + + target = "annotated-pdf-popup.pdf" + writer.write(target) + Path(target).unlink() # comment this out for manual inspection diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 39ee17453..f5c494cb9 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -171,7 +171,7 @@ def test_read_page_from_encrypted_file_aes_256(pdffile, password): @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_merge_encrypted_pdfs(names): """Encrypted PDFs can be merged after decryption.""" - merger = pypdf.PdfMerger() + merger = pypdf.PdfWriter() files = [RESOURCE_ROOT / "encryption" / x for x in names] pdfs = [pypdf.PdfReader(x) for x in files] for pdf in pdfs: diff --git a/tests/test_generic.py b/tests/test_generic.py index 190bb25e6..a13aa7b09 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -5,15 +5,13 @@ from copy import deepcopy from io import BytesIO from pathlib import Path -from unittest.mock import patch import pytest -from pypdf import PdfMerger, PdfReader, PdfWriter +from pypdf import PdfReader, PdfWriter from pypdf.constants import CheckboxRadioButtonAttributes from pypdf.errors import PdfReadError, PdfStreamError from pypdf.generic import ( - AnnotationBuilder, ArrayObject, BooleanObject, ByteStringObject, @@ -701,387 +699,27 @@ def test_bool_repr(tmp_path): @pytest.mark.enable_socket() -@patch("pypdf._reader.logger_warning") -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_issue_997(mock_logger_warning, pdf_file_path): +def test_issue_997(pdf_file_path): url = ( "https://github.com/py-pdf/pypdf/files/8908874/" "Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf" ) name = "gh-issue-997.pdf" - merger = PdfMerger() + merger = PdfWriter() merger.append(BytesIO(get_data_from_url(url, name=name))) # here the error raises with open(pdf_file_path, "wb") as f: merger.write(f) merger.close() - mock_logger_warning.assert_called_with("Overwriting cache for 0 4", "pypdf._reader") - # Strict - merger = PdfMerger(strict=True) - with pytest.raises(PdfReadError) as exc: - merger.append( - BytesIO(get_data_from_url(url, name=name)) - ) # here the error raises - assert exc.value.args[0] == "Could not find object." + merger = PdfWriter() + merger.append(BytesIO(get_data_from_url(url, name=name))) # here the error raises with open(pdf_file_path, "wb") as f: merger.write(f) merger.close() -def test_annotation_builder_free_text(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning): - free_text_annotation = AnnotationBuilder.free_text( - "Hello World - bold and italic\nThis is the second line!", - rect=(50, 550, 200, 650), - font="Arial", - bold=True, - italic=True, - font_size="20pt", - font_color="00ff00", - border_color=None, - background_color=None, - ) - writer.add_annotation(0, free_text_annotation) - - with pytest.warns(DeprecationWarning): - free_text_annotation = AnnotationBuilder.free_text( - "Another free text annotation (not bold, not italic)", - rect=(500, 550, 200, 650), - font="Arial", - bold=False, - italic=False, - font_size="20pt", - font_color="00ff00", - border_color="0000ff", - background_color="cdcdcd", - ) - writer.add_annotation(0, free_text_annotation) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_polygon(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning), pytest.raises(ValueError) as exc: - AnnotationBuilder.polygon( - vertices=[], - ) - assert exc.value.args[0] == "A polygon needs at least 1 vertex with two coordinates" - - with pytest.warns(DeprecationWarning): - annotation = AnnotationBuilder.polygon( - vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], - ) - writer.add_annotation(0, annotation) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_polyline(pdf_file_path, pdf_reader_page): - # Arrange - writer = PdfWriter() - writer.add_page(pdf_reader_page) - - # Act - with pytest.warns(DeprecationWarning), pytest.raises(ValueError) as exc: - AnnotationBuilder.polyline( - vertices=[], - ) - assert exc.value.args[0] == "A polygon needs at least 1 vertex with two coordinates" - - with pytest.warns(DeprecationWarning): - annotation = AnnotationBuilder.polyline( - vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], - ) - writer.add_annotation(0, annotation) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_line(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning): - line_annotation = AnnotationBuilder.line( - text="Hello World\nLine2", - rect=(50, 550, 200, 650), - p1=(50, 550), - p2=(200, 650), - ) - writer.add_annotation(0, line_annotation) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_square(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning): - square_annotation = AnnotationBuilder.rectangle( - rect=(50, 550, 200, 650), interiour_color="ff0000" - ) - writer.add_annotation(0, square_annotation) - - with pytest.warns(DeprecationWarning): - square_annotation = AnnotationBuilder.rectangle( - rect=(40, 400, 150, 450), - ) - writer.add_annotation(0, square_annotation) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_highlight(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning): - highlight_annotation = AnnotationBuilder.highlight( - rect=(95.79332, 704.31777, 138.55779, 724.6855), - highlight_color="ff0000", - quad_points=ArrayObject( - [ - FloatObject(100.060779), - FloatObject(723.55398), - FloatObject(134.29033), - FloatObject(723.55398), - FloatObject(100.060779), - FloatObject(705.4493), - FloatObject(134.29033), - FloatObject(705.4493), - ] - ), - printing=False, - ) - writer.add_annotation(0, highlight_annotation) - for annot in writer.pages[0]["/Annots"]: - obj = annot.get_object() - subtype = obj["/Subtype"] - if subtype == "/Highlight": - assert "/F" not in obj or obj["/F"] == NumberObject(0) - - writer.add_page(page) - # Act - with pytest.warns(DeprecationWarning): - highlight_annotation = AnnotationBuilder.highlight( - rect=(95.79332, 704.31777, 138.55779, 724.6855), - highlight_color="ff0000", - quad_points=ArrayObject( - [ - FloatObject(100.060779), - FloatObject(723.55398), - FloatObject(134.29033), - FloatObject(723.55398), - FloatObject(100.060779), - FloatObject(705.4493), - FloatObject(134.29033), - FloatObject(705.4493), - ] - ), - printing=True, - ) - writer.add_annotation(1, highlight_annotation) - for annot in writer.pages[1]["/Annots"]: - obj = annot.get_object() - subtype = obj["/Subtype"] - if subtype == "/Highlight": - assert obj["/F"] == NumberObject(4) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_circle(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning): - circle_annotation = AnnotationBuilder.ellipse( - rect=(50, 550, 200, 650), interiour_color="ff0000" - ) - writer.add_annotation(0, circle_annotation) - - diameter = 100 - with pytest.warns(DeprecationWarning): - circle_annotation = AnnotationBuilder.ellipse( - rect=(110, 500, 110 + diameter, 500 + diameter), - ) - writer.add_annotation(0, circle_annotation) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_link(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - # Part 1: Too many args - with pytest.warns(DeprecationWarning), pytest.raises(ValueError) as exc: - AnnotationBuilder.link( - rect=(50, 550, 200, 650), - url="https://martin-thoma.com/", - target_page_index=3, - ) - assert exc.value.args[0] == ( - "Either 'url' or 'target_page_index' have to be provided. " - "url='https://martin-thoma.com/', target_page_index=3" - ) - - # Part 2: Too few args - with pytest.warns(DeprecationWarning), pytest.raises(ValueError) as exc: - AnnotationBuilder.link( - rect=(50, 550, 200, 650), - ) - assert ( - exc.value.args[0] - == "Either 'url' or 'target_page_index' have to be provided. Both were None." - ) - - # Part 3: External Link - with pytest.warns(DeprecationWarning): - link_annotation = AnnotationBuilder.link( - rect=(50, 50, 100, 100), - url="https://martin-thoma.com/", - border=[1, 0, 6, [3, 2]], - ) - writer.add_annotation(0, link_annotation) - - # Part 4: Internal Link - with pytest.warns(DeprecationWarning): - link_annotation = AnnotationBuilder.link( - rect=(100, 100, 300, 200), - target_page_index=1, - border=[50, 10, 4], - ) - writer.add_annotation(0, link_annotation) - - for page in reader.pages[1:]: - writer.add_page(page) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_text(pdf_file_path): - # Arrange - pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning): - text_annotation = AnnotationBuilder.text( - text="Hello World\nThis is the second line!", - rect=(50, 550, 500, 650), - open=True, - ) - writer.add_annotation(0, text_annotation) - - # Assert: You need to inspect the file manually - with open(pdf_file_path, "wb") as fp: - writer.write(fp) - - -def test_annotation_builder_popup(caplog): - # Arrange - pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] - writer = PdfWriter() - writer.add_page(page) - - # Act - with pytest.warns(DeprecationWarning): - text_annotation = AnnotationBuilder.text( - text="Hello World\nThis is the second line!", - rect=(50, 550, 200, 650), - open=True, - ) - ta = writer.add_annotation(0, text_annotation) - - with pytest.warns(DeprecationWarning): - popup_annotation = AnnotationBuilder.popup( - rect=(50, 550, 200, 650), - open=True, - parent=ta, # prefer to use for evolutivity - ) - - assert caplog.text == "" - with pytest.warns(DeprecationWarning): - AnnotationBuilder.popup( - rect=(50, 550, 200, 650), - open=True, - parent=True, # broken parameter # type: ignore - ) - assert "Unregistered Parent object : No Parent field set" in caplog.text - - writer.add_annotation(writer.pages[0], popup_annotation) - - target = "annotated-pdf-popup.pdf" - writer.write(target) - Path(target).unlink() # comment this out for manual inspection - - def test_checkboxradiobuttonattributes_opt(): assert "/Opt" in CheckboxRadioButtonAttributes.attributes_dict() @@ -1325,6 +963,13 @@ def test_encodedstream_set_data(): assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]" assert cc[NameObject("/Test")] == "/MyTest" + with pytest.raises(TypeError): + aa.set_data("toto") + + aa[NameObject("/Filter")] = NameObject("/JPXEncode") + with pytest.raises(PdfReadError): + aa.set_data(b"toto") + @pytest.mark.enable_socket() def test_set_data_2(): diff --git a/tests/test_merger.py b/tests/test_merger.py index 3d7917902..c9112eae3 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -7,6 +7,7 @@ import pypdf from pypdf import PdfMerger, PdfReader, PdfWriter +from pypdf.errors import DeprecationError from pypdf.generic import Destination, Fit from . import get_data_from_url @@ -36,7 +37,7 @@ def merger_operate(merger): data = fp.read() merger.append(data) assert exc.value.args[0].startswith( - "PdfMerger.merge requires an object that PdfReader can parse. " + "Merging requires an object that PdfReader can parse. " "Typically, that is a Path" ) @@ -156,21 +157,6 @@ def check_outline(tmp_path): tmp_filename = "dont_commit_merged.pdf" -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_merger_operations_by_traditional_usage(tmp_path): - # Arrange - merger = PdfMerger() - merger_operate(merger) - path = tmp_path / tmp_filename - - # Act - merger.write(path) - merger.close() - - # Assert - check_outline(path) - - def test_merger_operations_by_traditional_usage_with_writer(tmp_path): # Arrange merger = PdfWriter() @@ -184,19 +170,6 @@ def test_merger_operations_by_traditional_usage_with_writer(tmp_path): check_outline(path) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_merger_operations_by_semi_traditional_usage(tmp_path): - path = tmp_path / tmp_filename - - with PdfMerger() as merger: - merger_operate(merger) - merger.write(path) # Act - - # Assert - assert Path(path).is_file() - check_outline(path) - - def test_merger_operations_by_semi_traditional_usage_with_writer(tmp_path): path = tmp_path / tmp_filename @@ -209,16 +182,6 @@ def test_merger_operations_by_semi_traditional_usage_with_writer(tmp_path): check_outline(path) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_merger_operation_by_new_usage(tmp_path): - path = tmp_path / tmp_filename - with PdfMerger(fileobj=path) as merger: - merger_operate(merger) - # Assert - assert Path(path).is_file() - check_outline(path) - - def test_merger_operation_by_new_usage_with_writer(tmp_path): path = tmp_path / tmp_filename with PdfWriter(fileobj=path) as merger: @@ -229,16 +192,6 @@ def test_merger_operation_by_new_usage_with_writer(tmp_path): check_outline(path) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_merge_page_exception(): - merger = pypdf.PdfMerger() - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - with pytest.raises(TypeError) as exc: - merger.merge(0, pdf_path, pages="a:b") - assert exc.value.args[0] == '"pages" must be a tuple of (start, stop[, step])' - merger.close() - - def test_merge_page_exception_with_writer(): merger = pypdf.PdfWriter() pdf_path = RESOURCE_ROOT / "crazyones.pdf" @@ -251,14 +204,6 @@ def test_merge_page_exception_with_writer(): merger.close() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_merge_page_tuple(): - merger = pypdf.PdfMerger() - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - merger.merge(0, pdf_path, pages=(0, 1)) - merger.close() - - def test_merge_page_tuple_with_writer(): merger = pypdf.PdfWriter() pdf_path = RESOURCE_ROOT / "crazyones.pdf" @@ -266,44 +211,6 @@ def test_merge_page_tuple_with_writer(): merger.close() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_merge_write_closed_fh(): - merger = pypdf.PdfMerger() - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - merger.append(pdf_path) - - err_closed = "close() was called and thus the writer cannot be used anymore" - - merger.close() - with pytest.raises(RuntimeError) as exc: - merger.write("test_merge_write_closed_fh.pdf") - assert exc.value.args[0] == err_closed - - with pytest.raises(RuntimeError) as exc: - merger.add_metadata({"author": "Martin Thoma"}) - assert exc.value.args[0] == err_closed - - with pytest.raises(RuntimeError) as exc: - merger.set_page_layout("/SinglePage") - assert exc.value.args[0] == err_closed - - with pytest.raises(RuntimeError) as exc: - merger.page_mode = "/UseNone" - assert exc.value.args[0] == err_closed - - with pytest.raises(RuntimeError) as exc: - merger._write_outline() - assert exc.value.args[0] == err_closed - - with pytest.raises(RuntimeError) as exc: - merger.add_outline_item("An outline item", 0) - assert exc.value.args[0] == err_closed - - with pytest.raises(RuntimeError) as exc: - merger._write_dests() - assert exc.value.args[0] == err_closed - - def test_merge_write_closed_fh_with_writer(pdf_file_path): merger = pypdf.PdfWriter() pdf_path = RESOURCE_ROOT / "crazyones.pdf" @@ -317,18 +224,6 @@ def test_merge_write_closed_fh_with_writer(pdf_file_path): merger.add_outline_item("An outline item", 0) -@pytest.mark.enable_socket() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_trim_outline_list(pdf_file_path): - url = "https://corpora.tika.apache.org/base/docs/govdocs1/995/995175.pdf" - name = "tika-995175.pdf" - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - merger.close() - - @pytest.mark.enable_socket() def test_trim_outline_list_with_writer(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/995/995175.pdf" @@ -336,18 +231,7 @@ def test_trim_outline_list_with_writer(pdf_file_path): reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) merger = PdfWriter() merger.append(reader) - merger.write(pdf_file_path) - merger.close() - - -@pytest.mark.enable_socket() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_zoom(pdf_file_path): - url = "https://corpora.tika.apache.org/base/docs/govdocs1/994/994759.pdf" - name = "tika-994759.pdf" - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) + merger.add_outline_item_dict(merger.outline[0]) merger.write(pdf_file_path) merger.close() @@ -365,12 +249,13 @@ def test_zoom_with_writer(pdf_file_path): @pytest.mark.enable_socket() @pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_zoom_xyz_no_left(pdf_file_path): +def test_zoom_xyz_no_left_with_add_page(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/933/933322.pdf" name = "tika-933322.pdf" reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) + merger = PdfWriter() + for p in reader.pages: + merger.add_page(p) merger.write(pdf_file_path) merger.close() @@ -386,18 +271,6 @@ def test_zoom_xyz_no_left_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_outline_item(pdf_file_path): - url = "https://corpora.tika.apache.org/base/docs/govdocs1/997/997511.pdf" - name = "tika-997511.pdf" - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - merger.close() - - @pytest.mark.enable_socket() @pytest.mark.slow() def test_outline_item_with_writer(pdf_file_path): @@ -410,19 +283,6 @@ def test_outline_item_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.slow() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_trim_outline(pdf_file_path): - url = "https://corpora.tika.apache.org/base/docs/govdocs1/982/982336.pdf" - name = "tika-982336.pdf" - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - merger.close() - - @pytest.mark.enable_socket() @pytest.mark.slow() def test_trim_outline_with_writer(pdf_file_path): @@ -435,19 +295,6 @@ def test_trim_outline_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.slow() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test1(pdf_file_path): - url = "https://corpora.tika.apache.org/base/docs/govdocs1/923/923621.pdf" - name = "tika-923621.pdf" - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - merger.close() - - @pytest.mark.enable_socket() @pytest.mark.slow() def test1_with_writer(pdf_file_path): @@ -460,23 +307,6 @@ def test1_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.slow() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_sweep_recursion1(pdf_file_path): - # TODO: This test looks like an infinite loop. - url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf" - name = "tika-924546.pdf" - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - merger.close() - - reader2 = PdfReader(pdf_file_path) - reader2.pages - - @pytest.mark.enable_socket() @pytest.mark.slow() def test_sweep_recursion1_with_writer(pdf_file_path): @@ -493,34 +323,6 @@ def test_sweep_recursion1_with_writer(pdf_file_path): reader2.pages -@pytest.mark.enable_socket() -@pytest.mark.slow() -@pytest.mark.parametrize( - ("url", "name"), - [ - ( - # TODO: This test looks like an infinite loop. - "https://corpora.tika.apache.org/base/docs/govdocs1/924/924794.pdf", - "tika-924794.pdf", - ), - ( - "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf", - "tika-924546.pdf", - ), - ], -) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_sweep_recursion2(url, name, pdf_file_path): - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - merger.close() - - reader2 = PdfReader(pdf_file_path) - reader2.pages - - @pytest.mark.enable_socket() @pytest.mark.slow() @pytest.mark.parametrize( @@ -548,22 +350,6 @@ def test_sweep_recursion2_with_writer(url, name, pdf_file_path): reader2.pages -@pytest.mark.enable_socket() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_sweep_indirect_list_newobj_is_none(caplog, pdf_file_path): - url = "https://corpora.tika.apache.org/base/docs/govdocs1/906/906769.pdf" - name = "tika-906769.pdf" - reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - merger.close() - # used to be: assert "Object 21 0 not defined." in caplog.text - - reader2 = PdfReader(pdf_file_path) - reader2.pages - - @pytest.mark.enable_socket() def test_sweep_indirect_list_newobj_is_none_with_writer(caplog, pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/906/906769.pdf" @@ -579,17 +365,6 @@ def test_sweep_indirect_list_newobj_is_none_with_writer(caplog, pdf_file_path): reader2.pages -@pytest.mark.enable_socket() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_iss1145(): - # issue with FitH destination with null param - url = "https://github.com/py-pdf/pypdf/files/9164743/file-0.pdf" - name = "iss1145.pdf" - merger = PdfMerger() - merger.append(PdfReader(BytesIO(get_data_from_url(url, name=name)))) - merger.close() - - @pytest.mark.enable_socket() def test_iss1145_with_writer(): # issue with FitH destination with null param @@ -600,22 +375,6 @@ def test_iss1145_with_writer(): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_iss1344(caplog): - url = "https://github.com/py-pdf/pypdf/files/9549001/input.pdf" - name = "iss1344.pdf" - m = PdfMerger() - m.append(PdfReader(BytesIO(get_data_from_url(url, name=name)))) - b = BytesIO() - m.write(b) - r = PdfReader(b) - p = r.pages[0] - assert "/DIJMAC+Arial Black" in p._debug_for_extract() - assert "adresse où le malade peut être visité" in p.extract_text() - assert r.threads is None - - @pytest.mark.enable_socket() def test_iss1344_with_writer(caplog): url = "https://github.com/py-pdf/pypdf/files/9549001/input.pdf" @@ -643,5 +402,10 @@ def test_articles_with_writer(caplog): def test_deprecate_pdfmerger(): - with pytest.warns(DeprecationWarning), PdfMerger() as merger: + with pytest.raises(DeprecationError), PdfMerger() as merger: merger.append(RESOURCE_ROOT / "crazyones.pdf") + + +def test_get_reference(): + writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf") + assert writer.get_reference(writer.pages[0]) == writer.pages[0].indirect_reference diff --git a/tests/test_workflows.py b/tests/test_workflows.py index f307271e7..77451ef99 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -14,7 +14,7 @@ from PIL import Image, ImageChops from PIL import __version__ as pil_version -from pypdf import PdfMerger, PdfReader, PdfWriter +from pypdf import PdfReader, PdfWriter from pypdf.constants import PageAttributes as PG from pypdf.errors import PdfReadError, PdfReadWarning from pypdf.generic import ( @@ -356,7 +356,7 @@ def test_overlay(pdf_file_path, base_path, overlay_path): def test_merge_with_warning(tmp_path, url, name): data = BytesIO(get_data_from_url(url, name=name)) reader = PdfReader(data) - merger = PdfMerger() + merger = PdfWriter() merger.append(reader) # This could actually be a performance bottleneck: merger.write(tmp_path / "tmp.merged.pdf") @@ -376,7 +376,7 @@ def test_merge_with_warning(tmp_path, url, name): def test_merge(tmp_path, url, name): data = BytesIO(get_data_from_url(url, name=name)) reader = PdfReader(data) - merger = PdfMerger() + merger = PdfWriter() merger.append(reader) merger.write(tmp_path / "tmp.merged.pdf") @@ -585,7 +585,6 @@ def test_scale_rectangle_indirect_object(): page.scale(sx=2, sy=3) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_merge_output(caplog): # Arrange base = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR.pdf" @@ -593,10 +592,8 @@ def test_merge_output(caplog): expected = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf" # Act - merger = PdfMerger(strict=True) + merger = PdfWriter() merger.append(base) - msg = "Xref table not zero-indexed. ID numbers for objects will be corrected." - assert normalize_warnings(caplog.text) == [msg] merger.merge(1, crazy) stream = BytesIO() merger.write(stream) diff --git a/tests/test_writer.py b/tests/test_writer.py index e06db389b..a31c6f6bb 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -14,7 +14,6 @@ ImageType, ObjectDeletionFlag, PageObject, - PdfMerger, PdfReader, PdfWriter, Transformation, @@ -840,7 +839,7 @@ def test_sweep_indirect_references_nullobject_exception(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf" name = "tika-924666.pdf" reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - merger = PdfMerger() + merger = PdfWriter() merger.append(reader) merger.write(pdf_file_path) @@ -864,11 +863,6 @@ def test_sweep_indirect_references_nullobject_exception(pdf_file_path): @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_some_appends(pdf_file_path, url, name): reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - # PdfMerger - merger = PdfMerger() - merger.append(reader) - merger.write(pdf_file_path) - # PdfWriter merger = PdfWriter() merger.append(reader) merger.write(pdf_file_path)