Skip to content

Commit

Permalink
ENH : add set_data to EncodedStreamObject
Browse files Browse the repository at this point in the history
add set_data() for encoded streams
also, complete  FlateEncode to get all requierd attributes
Ease data manipulation without going through ContentStream (slow)
closes py-pdf#656
  • Loading branch information
pubpub-zz committed May 22, 2023
1 parent 0096c99 commit f999fb2
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 7 deletions.
37 changes: 30 additions & 7 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,16 +828,29 @@ def flate_encode(self) -> "EncodedStreamObject":
if SA.FILTER in self:
f = self[SA.FILTER]
if isinstance(f, ArrayObject):
f.insert(0, NameObject(FT.FLATE_DECODE))
f = ArrayObject([NameObject(FT.FLATE_DECODE), *f])
try:
parms = ArrayObject(
[NullObject(), *self.get(SA.DECODE_PARMS, ArrayObject())]
)
except TypeError:
# case of error where the * operator is not working (not an array
parms = ArrayObject(
[NullObject(), self.get(SA.DECODE_PARMS, ArrayObject())]
)
else:
newf = ArrayObject()
newf.append(NameObject("/FlateDecode"))
newf.append(f)
f = newf
f = ArrayObject([NameObject(FT.FLATE_DECODE), f])
parms = ArrayObject(
[NullObject(), self.get(SA.DECODE_PARMS, NullObject())]
)
else:
f = NameObject("/FlateDecode")
f = NameObject(FT.FLATE_DECODE)
parms = None
retval = EncodedStreamObject()
retval.update(self)
retval[NameObject(SA.FILTER)] = f
if parms is not None:
retval[NameObject(SA.DECODE_PARMS)] = parms
retval._data = FlateDecode.encode(self._data)
return retval

Expand Down Expand Up @@ -894,7 +907,17 @@ def getData(self) -> Union[None, str, bytes]: # deprecated
return self.get_data()

def set_data(self, data: Any) -> None: # deprecated
raise PdfReadError("Creating EncodedStreamObject is not currently supported")
from ..filters import FlateDecode

if self.get(SA.FILTER, "") == FT.FLATE_DECODE:
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
self.decoded_self._data = data
self._data = FlateDecode.encode(data)
else:
raise PdfReadError(
"Streams encoded with different filter from only FlateDecode is not supported"
)

def setData(self, data: Any) -> None: # deprecated
deprecation_with_replacement("setData", "set_data", "3.0.0")
Expand Down
30 changes: 30 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,3 +1168,33 @@ def test_destination_withoutzoom():
name = "2021_book_security.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.outline


def test_encodedstream_set_data():
"""
test the encoded stream set_data to extend data stream
checks also the flate_encode
"""
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
co = reader.pages[0]["/Contents"][0].get_object()
co.set_data(b"%hello\n" + co.get_data())
assert b"hello" in co.get_data()
b = BytesIO()
co.write_to_stream(b)
b.seek(0)
aa = read_object(b, None)
assert b"hello" in aa.get_data()
assert aa["/Filter"] == "/FlateDecode"
assert "/DecodeParms" not in aa
bb = aa.flate_encode()
assert b"hello" in bb.get_data()
assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"]
assert str(bb["/DecodeParms"]) == "[NullObject, NullObject]"
bb[NameObject("/Test")] = NameObject("/MyTest")
cc = bb.flate_encode()
assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"]
assert b"hello" in cc.get_data()
assert cc["/Filter"] == ["/FlateDecode", "/FlateDecode", "/FlateDecode"]
assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]"
assert cc[NameObject("/Test")] == "/MyTest"

0 comments on commit f999fb2

Please sign in to comment.