diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 91f59f746..3ed891a00 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -828,16 +828,29 @@ def flate_encode(self) -> "EncodedStreamObject": if SA.FILTER in self: f = self[SA.FILTER] if isinstance(f, ArrayObject): - f.insert(0, NameObject(FT.FLATE_DECODE)) + f = ArrayObject([NameObject(FT.FLATE_DECODE), *f]) + try: + parms = ArrayObject( + [NullObject(), *self.get(SA.DECODE_PARMS, ArrayObject())] + ) + except TypeError: + # case of error where the * operator is not working (not an array + parms = ArrayObject( + [NullObject(), self.get(SA.DECODE_PARMS, ArrayObject())] + ) else: - newf = ArrayObject() - newf.append(NameObject("/FlateDecode")) - newf.append(f) - f = newf + f = ArrayObject([NameObject(FT.FLATE_DECODE), f]) + parms = ArrayObject( + [NullObject(), self.get(SA.DECODE_PARMS, NullObject())] + ) else: - f = NameObject("/FlateDecode") + f = NameObject(FT.FLATE_DECODE) + parms = None retval = EncodedStreamObject() + retval.update(self) retval[NameObject(SA.FILTER)] = f + if parms is not None: + retval[NameObject(SA.DECODE_PARMS)] = parms retval._data = FlateDecode.encode(self._data) return retval @@ -894,7 +907,18 @@ def getData(self) -> Union[None, str, bytes]: # deprecated return self.get_data() def set_data(self, data: Any) -> None: # deprecated - raise PdfReadError("Creating EncodedStreamObject is not currently supported") + from ..filters import FlateDecode + + if self.get(SA.FILTER, "") == FT.FLATE_DECODE: + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + assert self.decoded_self is not None + self.decoded_self._data = data + self._data = FlateDecode.encode(data) + else: + raise PdfReadError( + "Streams encoded with different filter from only FlateDecode is not supported" + ) def setData(self, data: Any) -> None: # deprecated deprecation_with_replacement("setData", "set_data", "3.0.0") diff --git a/tests/test_generic.py b/tests/test_generic.py index 2302767f9..80f604f8b 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1169,3 +1169,34 @@ def test_destination_withoutzoom(): name = "2021_book_security.pdf" reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) reader.outline + + +def test_encodedstream_set_data(): + """ + EncodedStreamObject.set_data to extend data stream works. + + Checks also the flate_encode. + """ + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + co = reader.pages[0]["/Contents"][0].get_object() + co.set_data(b"%hello\n" + co.get_data()) + assert b"hello" in co.get_data() + b = BytesIO() + co.write_to_stream(b) + b.seek(0) + aa = read_object(b, None) + assert b"hello" in aa.get_data() + assert aa["/Filter"] == "/FlateDecode" + assert "/DecodeParms" not in aa + bb = aa.flate_encode() + assert b"hello" in bb.get_data() + assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"] + assert str(bb["/DecodeParms"]) == "[NullObject, NullObject]" + bb[NameObject("/Test")] = NameObject("/MyTest") + cc = bb.flate_encode() + assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"] + assert b"hello" in cc.get_data() + assert cc["/Filter"] == ["/FlateDecode", "/FlateDecode", "/FlateDecode"] + assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]" + assert cc[NameObject("/Test")] == "/MyTest"