Skip to content

Commit

Permalink
ENH : automatic access to pointed object
Browse files Browse the repository at this point in the history
alternative solution to py-pdf#2460
fixes py-pdf#2287
  • Loading branch information
pubpub-zz committed Feb 20, 2024
1 parent cc306ad commit 56dbd86
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 13 deletions.
14 changes: 2 additions & 12 deletions pypdf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,21 +362,11 @@ def b_(s: Union[str, bytes]) -> bytes:
return r


@overload
def str_(b: str) -> str:
...


@overload
def str_(b: bytes) -> str:
...


def str_(b: Union[str, bytes]) -> str:
def str_(b: Any) -> str:
if isinstance(b, bytes):
return b.decode("latin-1")
else:
return b
return b.__str__()


@overload
Expand Down
22 changes: 22 additions & 0 deletions pypdf/generic/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,28 @@ def get_object(self) -> Optional["PdfObject"]:
return None
return obj.get_object()

def __deepcopy__(self, memo: Any) -> "IndirectObject":
return IndirectObject(self.idnum, self.generation, self.pdf)

def __getattr__(self, name: str) -> Any:
# Attribute not found in object: look in pointed object
if name == "pdf" or name not in dir(self.get_object()):
raise AttributeError # xreturn lambda:None
try:
return getattr(self.get_object(), name)
except AttributeError:
raise AttributeError(
f"No attribute {name} found in IndirectObject or pointed object"
)

def __getitem__(self, key: Any) -> Any:
# items should be extracted from pointed Object
return self.get_object()[key] # type: ignore

def __str__(self) -> str:
# in this case we are looking for the pointed data
return self.get_object().__str__()

def __repr__(self) -> str:
return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"

Expand Down
22 changes: 22 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,3 +1235,25 @@ def test_encodedstream_set_data():
assert cc["/Filter"] == ["/FlateDecode", "/FlateDecode", "/FlateDecode"]
assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]"
assert cc[NameObject("/Test")] == "/MyTest"


def test_calling_indirect_objects():
"""Cope with cases where attributes/items are called from indirectObject"""
url = (
"https://raw.githubusercontent.com/xrkk/tmpppppp/main/"
"2021%20----%20book%20-%20Security%20of%20biquitous%20Computing%20Systems.pdf"
)
name = "2021_book_security.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
reader.trailer.get("/Info")["/Creator"]
reader.pages[0]["/Contents"][0].get_data()


@pytest.mark.enable_socket()
def test_indirect_object_page_dimensions():
url = "https://github.com/py-pdf/pypdf/files/13302338/Zymeworks_Corporate.Presentation_FINAL1101.pdf.pdf"
name = "issue2287.pdf"
data = BytesIO(get_data_from_url(url, name=name))
reader = PdfReader(data, strict=False)
mediabox = reader.pages[0].mediabox
assert mediabox == RectangleObject((0, 0, 792, 612))
3 changes: 2 additions & 1 deletion tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,8 +914,9 @@ def test_write_dict_stream_object(pdf_file_path):

for k, v in page_object.items():
if k == "/Test":
assert str(v) != str(stream_object)
assert repr(v) != repr(stream_object)
assert isinstance(v, IndirectObject)
assert str(v) == str(stream_object) # expansion of IndirectObjects
assert str(v.get_object()) == str(stream_object)
break
else:
Expand Down

0 comments on commit 56dbd86

Please sign in to comment.