From 7ef8b8638f73a87d695c9f405cff8f230546c858 Mon Sep 17 00:00:00 2001 From: francois Date: Tue, 2 Jan 2024 16:58:45 +0100 Subject: [PATCH 01/19] handling name without leading / --- pypdf/generic/_base.py | 9 ++++++--- tests/test_generic.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 5a2757295..ef3f7c6cf 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -587,10 +587,13 @@ def write_to_stream( stream.write(self.renumber()) def renumber(self) -> bytes: - out = self[0].encode("utf-8") - if out != b"/": + out = self.surfix + val = self[:] + if val[0].encode("utf-8") != self.surfix: logger_warning(f"Incorrect first char in NameObject:({self})", __name__) - for c in self[1:]: + else: + val = val[1:] + for c in val: if c > "~": for x in c.encode("utf-8"): out += f"#{x:02X}".encode() diff --git a/tests/test_generic.py b/tests/test_generic.py index 0e0fff677..ce6342298 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -219,7 +219,7 @@ def test_name_object(caplog): caplog.clear() b = BytesIO() NameObject("hello").write_to_stream(b) - assert bytes(b.getbuffer()) == b"hello" + assert bytes(b.getbuffer()) == b"/hello" assert "Incorrect first char" in caplog.text caplog.clear() From 6cf707532ed969b4761eff6a8d469746cfdc3a3f Mon Sep 17 00:00:00 2001 From: francois Date: Fri, 19 Jan 2024 23:37:42 +0100 Subject: [PATCH 02/19] init --- pypdf/generic/_base.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index ef3f7c6cf..d982d3bfe 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -565,6 +565,11 @@ class NameObject(str, PdfObject): # noqa: SLOT000 **{chr(i): f"#{i:02X}".encode() for i in range(33)}, } + def __init__(self, value="", encoding=None, errors="strict"): + if value[0] != "/": + value = "/" + value + super.__init__(value, encoding, errors) + def clone( self, pdf_dest: Any, @@ -587,13 +592,10 @@ def write_to_stream( stream.write(self.renumber()) def renumber(self) -> bytes: - out = self.surfix - val = self[:] - if val[0].encode("utf-8") != self.surfix: + out = self[0].encode("utf-8") + if out != b"/": logger_warning(f"Incorrect first char in NameObject:({self})", __name__) - else: - val = val[1:] - for c in val: + for c in self[1:]: if c > "~": for x in c.encode("utf-8"): out += f"#{x:02X}".encode() From 15fbfca33f110d78120d7b45ad4a80d6cdad8f96 Mon Sep 17 00:00:00 2001 From: francois Date: Fri, 19 Jan 2024 23:41:25 +0100 Subject: [PATCH 03/19] type --- pypdf/generic/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index d982d3bfe..e98e3b0f8 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -565,10 +565,10 @@ class NameObject(str, PdfObject): # noqa: SLOT000 **{chr(i): f"#{i:02X}".encode() for i in range(33)}, } - def __init__(self, value="", encoding=None, errors="strict"): + def __init__(self, value: str = "", encoding: str = None, errors: str = "strict"): if value[0] != "/": value = "/" + value - super.__init__(value, encoding, errors) + super().__init__(value, encoding, errors) def clone( self, From 6a34b8d423bd28833aa9affb4688b2384fd37134 Mon Sep 17 00:00:00 2001 From: francois Date: Fri, 19 Jan 2024 23:50:32 +0100 Subject: [PATCH 04/19] optional --- pypdf/generic/_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index e98e3b0f8..26563c437 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -565,7 +565,9 @@ class NameObject(str, PdfObject): # noqa: SLOT000 **{chr(i): f"#{i:02X}".encode() for i in range(33)}, } - def __init__(self, value: str = "", encoding: str = None, errors: str = "strict"): + def __init__( + self, value: str = "", encoding: str | None = None, errors: str = "strict" + ): if value[0] != "/": value = "/" + value super().__init__(value, encoding, errors) From ae5ec5dc232fa6bc47f813dea2ac65b38544ec02 Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 18:08:10 +0100 Subject: [PATCH 05/19] exception --- pypdf/generic/_base.py | 9 +-------- tests/test_generic.py | 8 +++++--- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 26563c437..f5f09ee9d 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -565,13 +565,6 @@ class NameObject(str, PdfObject): # noqa: SLOT000 **{chr(i): f"#{i:02X}".encode() for i in range(33)}, } - def __init__( - self, value: str = "", encoding: str | None = None, errors: str = "strict" - ): - if value[0] != "/": - value = "/" + value - super().__init__(value, encoding, errors) - def clone( self, pdf_dest: Any, @@ -596,7 +589,7 @@ def write_to_stream( def renumber(self) -> bytes: out = self[0].encode("utf-8") if out != b"/": - logger_warning(f"Incorrect first char in NameObject:({self})", __name__) + raise Exception(f"Incorrect first char in NameObject:({self})", __name__) for c in self[1:]: if c > "~": for x in c.encode("utf-8"): diff --git a/tests/test_generic.py b/tests/test_generic.py index ce6342298..44b3c354a 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -218,9 +218,11 @@ def test_name_object(caplog): caplog.clear() b = BytesIO() - NameObject("hello").write_to_stream(b) - assert bytes(b.getbuffer()) == b"/hello" - assert "Incorrect first char" in caplog.text + try: + NameObject("hello").write_to_stream(b) + assert False + except Exception: + pass caplog.clear() b = BytesIO() From 89f964acfb4cda7e1933fd8d4909bcc0c0a1b6b2 Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 18:16:02 +0100 Subject: [PATCH 06/19] test --- pypdf/generic/_base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index f5f09ee9d..608300192 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -43,7 +43,12 @@ read_until_regex, str_, ) -from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError +from ..errors import ( + STREAM_TRUNCATED_PREMATURELY, + PdfReadError, + PdfStreamError, + PyPdfError, +) __author__ = "Mathieu Fenniak" __author_email__ = "biziqe@mathieu.fenniak.net" @@ -589,7 +594,7 @@ def write_to_stream( def renumber(self) -> bytes: out = self[0].encode("utf-8") if out != b"/": - raise Exception(f"Incorrect first char in NameObject:({self})", __name__) + raise PyPdfError(f"Incorrect first char in NameObject:({self})", __name__) for c in self[1:]: if c > "~": for x in c.encode("utf-8"): From 8bedb272a1048810fc9d1d90ace851a085890761 Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 18:18:11 +0100 Subject: [PATCH 07/19] test --- tests/test_generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 44b3c354a..2ac530fb7 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -7,7 +7,7 @@ from pypdf import PdfMerger, PdfReader, PdfWriter from pypdf.constants import CheckboxRadioButtonAttributes -from pypdf.errors import PdfReadError, PdfStreamError +from pypdf.errors import PdfReadError, PdfStreamError, PyPdfError from pypdf.generic import ( AnnotationBuilder, ArrayObject, @@ -220,8 +220,8 @@ def test_name_object(caplog): b = BytesIO() try: NameObject("hello").write_to_stream(b) - assert False - except Exception: + pytest.fail() + except PyPdfError: pass caplog.clear() From 9364c7e21cf36dd1b778b4a9117d3d8f5304e482 Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 18:19:30 +0100 Subject: [PATCH 08/19] test --- tests/test_generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 2ac530fb7..d8eb1db12 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -220,7 +220,7 @@ def test_name_object(caplog): b = BytesIO() try: NameObject("hello").write_to_stream(b) - pytest.fail() + pytest.fail("Corrupted name object") except PyPdfError: pass From 1317068c0b6d4ae1eccf4b288870ce811b6e5407 Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 20:33:33 +0100 Subject: [PATCH 09/19] missing / --- pypdf/annotations/_markup_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index c5e0a2fed..587ed67cd 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -290,7 +290,7 @@ def __init__( NameObject("/Type"): NameObject("/Annot"), NameObject("/Subtype"): NameObject("/Polygon"), NameObject("/Vertices"): ArrayObject(coord_list), - NameObject("/IT"): NameObject("PolygonCloud"), + NameObject("/IT"): NameObject("/PolygonCloud"), NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)), } ) From 000341ff4e4d9570dfed7e23bfaa7dc68564a79f Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 20:43:22 +0100 Subject: [PATCH 10/19] wrong name value --- pypdf/annotations/_markup_annotations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 587ed67cd..9b6d3253d 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -165,8 +165,8 @@ def __init__( ), NameObject("/LE"): ArrayObject( [ - NameObject(None), - NameObject(None), + NameObject("/None"), + NameObject("/None"), ] ), NameObject("/IC"): ArrayObject( From dcdb4811ad2dcb8307539f21a3e32f5199f06773 Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 21:09:53 +0100 Subject: [PATCH 11/19] wrong object --- pypdf/annotations/_markup_annotations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 9b6d3253d..23c641e1e 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -325,9 +325,9 @@ def __init__( border_arr: BorderArrayType if border is not None: - border_arr = [NameObject(n) for n in border[:3]] + border_arr = [NumberObject(n) for n in border[:3]] if len(border) == 4: - dash_pattern = ArrayObject([NameObject(n) for n in border[3]]) + dash_pattern = ArrayObject([NumberObject(n) for n in border[3]]) border_arr.append(dash_pattern) else: border_arr = [NumberObject(0)] * 3 From d971211cf75baa9941a0fe401e71675b9aba9f46 Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 21:27:38 +0100 Subject: [PATCH 12/19] wrong object --- pypdf/_writer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index fb3049ef0..258e5cab8 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -1898,15 +1898,15 @@ def add_uri( border_arr: BorderArrayType if border is not None: - border_arr = [NameObject(n) for n in border[:3]] + border_arr = [NumberObject(n) for n in border[:3]] if len(border) == 4: - dash_pattern = ArrayObject([NameObject(n) for n in border[3]]) + dash_pattern = ArrayObject([NumberObject(n) for n in border[3]]) border_arr.append(dash_pattern) else: border_arr = [NumberObject(2), NumberObject(2), NumberObject(2)] if isinstance(rect, str): - rect = NameObject(rect) + rect = NumberObject(rect) elif isinstance(rect, RectangleObject): pass else: From 462cf1adfb8774f52e02d57b66e18af38a96273c Mon Sep 17 00:00:00 2001 From: Rak424 Date: Sat, 20 Jan 2024 21:37:02 +0100 Subject: [PATCH 13/19] Update tests/test_generic.py Co-authored-by: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> --- tests/test_generic.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index d8eb1db12..6cfe1eaf1 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -218,11 +218,8 @@ def test_name_object(caplog): caplog.clear() b = BytesIO() - try: + with pytest.raises(PyPdfError) as exc:: NameObject("hello").write_to_stream(b) - pytest.fail("Corrupted name object") - except PyPdfError: - pass caplog.clear() b = BytesIO() From f2674af4a5ef74544ae4df4d2a32ee9bdc13489c Mon Sep 17 00:00:00 2001 From: francois Date: Sat, 20 Jan 2024 22:47:11 +0100 Subject: [PATCH 14/19] indent --- tests/test_generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 6cfe1eaf1..52ba404e1 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -218,7 +218,7 @@ def test_name_object(caplog): caplog.clear() b = BytesIO() - with pytest.raises(PyPdfError) as exc:: + with pytest.raises(PyPdfError) as exc: NameObject("hello").write_to_stream(b) caplog.clear() From f9a1c4330b58f3662b0b702c825cf08c67d0d0d2 Mon Sep 17 00:00:00 2001 From: francois Date: Mon, 26 Feb 2024 13:20:19 +0100 Subject: [PATCH 15/19] deprecate --- pypdf/generic/_base.py | 5 ++--- tests/test_generic.py | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 2da8d4d34..3f9721043 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -41,13 +41,12 @@ logger_warning, read_non_whitespace, read_until_regex, - str_, + str_, deprecate, ) from ..errors import ( STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError, - PyPdfError, ) __author__ = "Mathieu Fenniak" @@ -620,7 +619,7 @@ def write_to_stream( def renumber(self) -> bytes: out = self[0].encode("utf-8") if out != b"/": - raise PyPdfError(f"Incorrect first char in NameObject:({self})", __name__) + deprecate(f"Incorrect first char in NameObject, should start with '/':({self})") for c in self[1:]: if c > "~": for x in c.encode("utf-8"): diff --git a/tests/test_generic.py b/tests/test_generic.py index 4b179e2c4..2adb3c90a 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -8,7 +8,7 @@ from pypdf import PdfMerger, PdfReader, PdfWriter from pypdf.constants import CheckboxRadioButtonAttributes -from pypdf.errors import PdfReadError, PdfStreamError, PyPdfError +from pypdf.errors import PdfReadError, PdfStreamError from pypdf.generic import ( AnnotationBuilder, ArrayObject, @@ -219,8 +219,8 @@ def test_name_object(caplog): caplog.clear() b = BytesIO() - with pytest.raises(PyPdfError) as exc: - NameObject("hello").write_to_stream(b) + # with pytest.raises(PyPdfError) as exc: TODO + # NameObject("hello").write_to_stream(b) caplog.clear() b = BytesIO() From da3a58d548010d81d7e39edb70a681dc494357f1 Mon Sep 17 00:00:00 2001 From: francois Date: Mon, 26 Feb 2024 14:16:25 +0100 Subject: [PATCH 16/19] ruff --- pypdf/generic/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 3f9721043..6e43f9408 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -37,11 +37,12 @@ from .._utils import ( StreamType, b_, + deprecate, deprecate_no_replacement, logger_warning, read_non_whitespace, read_until_regex, - str_, deprecate, + str_, ) from ..errors import ( STREAM_TRUNCATED_PREMATURELY, From d176b2e509387ad778e792ee2389f29b96bc9153 Mon Sep 17 00:00:00 2001 From: francois Date: Mon, 26 Feb 2024 17:29:38 +0100 Subject: [PATCH 17/19] deprecate_no_replacement --- pypdf/generic/_base.py | 3 +-- tests/test_generic.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 6e43f9408..92d35db7b 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -37,7 +37,6 @@ from .._utils import ( StreamType, b_, - deprecate, deprecate_no_replacement, logger_warning, read_non_whitespace, @@ -620,7 +619,7 @@ def write_to_stream( def renumber(self) -> bytes: out = self[0].encode("utf-8") if out != b"/": - deprecate(f"Incorrect first char in NameObject, should start with '/':({self})") + deprecate_no_replacement(f"Incorrect first char in NameObject, should start with '/':({self})","5.0.0") for c in self[1:]: if c > "~": for x in c.encode("utf-8"): diff --git a/tests/test_generic.py b/tests/test_generic.py index 2adb3c90a..f77b018f8 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -219,8 +219,8 @@ def test_name_object(caplog): caplog.clear() b = BytesIO() - # with pytest.raises(PyPdfError) as exc: TODO - # NameObject("hello").write_to_stream(b) + with pytest.raises(DeprecationWarning): + NameObject("hello").write_to_stream(b) caplog.clear() b = BytesIO() From adc27a03034a70776a566eee0141874b7ff147fc Mon Sep 17 00:00:00 2001 From: francois Date: Tue, 27 Feb 2024 07:22:24 +0100 Subject: [PATCH 18/19] cleaning --- pypdf/generic/_base.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 92d35db7b..a1164c411 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -43,11 +43,7 @@ read_until_regex, str_, ) -from ..errors import ( - STREAM_TRUNCATED_PREMATURELY, - PdfReadError, - PdfStreamError, -) +from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError __author__ = "Mathieu Fenniak" __author_email__ = "biziqe@mathieu.fenniak.net" From c777d9f55f66acb7f7381dc75300d07124174f51 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 27 Feb 2024 10:25:00 +0100 Subject: [PATCH 19/19] fix formatting and version --- pypdf/generic/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index a1164c411..3fde174b0 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -615,7 +615,7 @@ def write_to_stream( def renumber(self) -> bytes: out = self[0].encode("utf-8") if out != b"/": - deprecate_no_replacement(f"Incorrect first char in NameObject, should start with '/':({self})","5.0.0") + deprecate_no_replacement(f"Incorrect first char in NameObject, should start with '/': ({self})", "6.0.0") for c in self[1:]: if c > "~": for x in c.encode("utf-8"):