Skip to content

Commit

Permalink
ENH: Automatic access to pointed object for IndirectObject (#2464)
Browse files Browse the repository at this point in the history
alternative solution to #2460
fixes #2287
  • Loading branch information
pubpub-zz authored Feb 25, 2024
1 parent cd705f9 commit 03af2c2
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 17 deletions.
14 changes: 2 additions & 12 deletions pypdf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,21 +362,11 @@ def b_(s: Union[str, bytes]) -> bytes:
return r


@overload
def str_(b: str) -> str:
...


@overload
def str_(b: bytes) -> str:
...


def str_(b: Union[str, bytes]) -> str:
def str_(b: Any) -> str:
if isinstance(b, bytes):
return b.decode("latin-1")
else:
return b
return str(b) # will return b.__str__() if defined


@overload
Expand Down
34 changes: 30 additions & 4 deletions pypdf/generic/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,36 @@ def indirect_reference(self) -> "IndirectObject": # type: ignore[override]
return self

def get_object(self) -> Optional["PdfObject"]:
obj = self.pdf.get_object(self)
if obj is None:
return None
return obj.get_object()
return self.pdf.get_object(self)

def __deepcopy__(self, memo: Any) -> "IndirectObject":
return IndirectObject(self.idnum, self.generation, self.pdf)

def _get_object_with_check(self) -> Optional["PdfObject"]:
o = self.get_object()
# the check is done here to not slow down get_object()
if isinstance(o, IndirectObject):
raise PdfStreamError(
f"{self.__repr__()} references an IndirectObject {o.__repr__()}"
)
return o

def __getattr__(self, name: str) -> Any:
# Attribute not found in object: look in pointed object
try:
return getattr(self._get_object_with_check(), name)
except AttributeError:
raise AttributeError(
f"No attribute {name} found in IndirectObject or pointed object"
)

def __getitem__(self, key: Any) -> Any:
# items should be extracted from pointed Object
return self._get_object_with_check()[key] # type: ignore

def __str__(self) -> str:
# in this case we are looking for the pointed data
return self.get_object().__str__()

def __repr__(self) -> str:
return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"
Expand Down
32 changes: 32 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1246,3 +1246,35 @@ def test_encodedstream_set_data():
assert cc["/Filter"] == ["/FlateDecode", "/FlateDecode", "/FlateDecode"]
assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]"
assert cc[NameObject("/Test")] == "/MyTest"


def test_calling_indirect_objects():
"""Cope with cases where attributes/items are called from indirectObject"""
url = (
"https://raw.githubusercontent.com/xrkk/tmpppppp/main/"
"2021%20----%20book%20-%20Security%20of%20biquitous%20Computing%20Systems.pdf"
)
name = "2021_book_security.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
reader.trailer.get("/Info")["/Creator"]
reader.pages[0]["/Contents"][0].get_data()
writer = PdfWriter(clone_from=reader)
ind = writer._add_object(writer)
assert ind.fileobj == writer.fileobj
with pytest.raises(AttributeError):
ind.not_existing_attribute
# create an IndirectObject referencing an IndirectObject.
writer._objects.append(writer.pages[0].indirect_reference)
ind = IndirectObject(len(writer._objects), 0, writer)
with pytest.raises(PdfStreamError):
ind["/Type"]


@pytest.mark.enable_socket()
def test_indirect_object_page_dimensions():
url = "https://github.com/py-pdf/pypdf/files/13302338/Zymeworks_Corporate.Presentation_FINAL1101.pdf.pdf"
name = "issue2287.pdf"
data = BytesIO(get_data_from_url(url, name=name))
reader = PdfReader(data, strict=False)
mediabox = reader.pages[0].mediabox
assert mediabox == RectangleObject((0, 0, 792, 612))
3 changes: 2 additions & 1 deletion tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,8 +915,9 @@ def test_write_dict_stream_object(pdf_file_path):

for k, v in page_object.items():
if k == "/Test":
assert str(v) != str(stream_object)
assert repr(v) != repr(stream_object)
assert isinstance(v, IndirectObject)
assert str(v) == str(stream_object) # expansion of IndirectObjects
assert str(v.get_object()) == str(stream_object)
break
else:
Expand Down

0 comments on commit 03af2c2

Please sign in to comment.