From f999fb2bd985ad56a392e2fc40ed63861a223ee5 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Mon, 22 May 2023 16:32:56 +0200 Subject: [PATCH] ENH : add set_data to EncodedStreamObject add set_data() for encoded streams also, complete FlateEncode to get all requierd attributes Ease data manipulation without going through ContentStream (slow) closes #656 --- pypdf/generic/_data_structures.py | 37 +++++++++++++++++++++++++------ tests/test_generic.py | 30 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 91f59f746..df50aae29 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -828,16 +828,29 @@ def flate_encode(self) -> "EncodedStreamObject": if SA.FILTER in self: f = self[SA.FILTER] if isinstance(f, ArrayObject): - f.insert(0, NameObject(FT.FLATE_DECODE)) + f = ArrayObject([NameObject(FT.FLATE_DECODE), *f]) + try: + parms = ArrayObject( + [NullObject(), *self.get(SA.DECODE_PARMS, ArrayObject())] + ) + except TypeError: + # case of error where the * operator is not working (not an array + parms = ArrayObject( + [NullObject(), self.get(SA.DECODE_PARMS, ArrayObject())] + ) else: - newf = ArrayObject() - newf.append(NameObject("/FlateDecode")) - newf.append(f) - f = newf + f = ArrayObject([NameObject(FT.FLATE_DECODE), f]) + parms = ArrayObject( + [NullObject(), self.get(SA.DECODE_PARMS, NullObject())] + ) else: - f = NameObject("/FlateDecode") + f = NameObject(FT.FLATE_DECODE) + parms = None retval = EncodedStreamObject() + retval.update(self) retval[NameObject(SA.FILTER)] = f + if parms is not None: + retval[NameObject(SA.DECODE_PARMS)] = parms retval._data = FlateDecode.encode(self._data) return retval @@ -894,7 +907,17 @@ def getData(self) -> Union[None, str, bytes]: # deprecated return self.get_data() def set_data(self, data: Any) -> None: # deprecated - raise PdfReadError("Creating EncodedStreamObject is not currently supported") + from ..filters import FlateDecode + + if self.get(SA.FILTER, "") == FT.FLATE_DECODE: + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + self.decoded_self._data = data + self._data = FlateDecode.encode(data) + else: + raise PdfReadError( + "Streams encoded with different filter from only FlateDecode is not supported" + ) def setData(self, data: Any) -> None: # deprecated deprecation_with_replacement("setData", "set_data", "3.0.0") diff --git a/tests/test_generic.py b/tests/test_generic.py index 5e464460d..e1f0a3167 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1168,3 +1168,33 @@ def test_destination_withoutzoom(): name = "2021_book_security.pdf" reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) reader.outline + + +def test_encodedstream_set_data(): + """ + test the encoded stream set_data to extend data stream + checks also the flate_encode + """ + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + co = reader.pages[0]["/Contents"][0].get_object() + co.set_data(b"%hello\n" + co.get_data()) + assert b"hello" in co.get_data() + b = BytesIO() + co.write_to_stream(b) + b.seek(0) + aa = read_object(b, None) + assert b"hello" in aa.get_data() + assert aa["/Filter"] == "/FlateDecode" + assert "/DecodeParms" not in aa + bb = aa.flate_encode() + assert b"hello" in bb.get_data() + assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"] + assert str(bb["/DecodeParms"]) == "[NullObject, NullObject]" + bb[NameObject("/Test")] = NameObject("/MyTest") + cc = bb.flate_encode() + assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"] + assert b"hello" in cc.get_data() + assert cc["/Filter"] == ["/FlateDecode", "/FlateDecode", "/FlateDecode"] + assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]" + assert cc[NameObject("/Test")] == "/MyTest"