Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Handle IndirectObject in media boxes #2460

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion pypdf/generic/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,16 @@ def get_object(self) -> Optional["PdfObject"]:
return None
return obj.get_object()

@staticmethod
def fully_unwrap(obj: Optional["PdfObject"]) -> Optional["PdfObject"]:
SamStephens marked this conversation as resolved.
Show resolved Hide resolved
"""
Given a PdfObject that may be an IndirectObject, recursively unwrap that IndirectObject until a None or
PdfObject that is not an IndirectObject is returned.
"""
if isinstance(obj, IndirectObject):
return IndirectObject.fully_unwrap(obj.get_object())
return obj

def __repr__(self) -> str:
return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"

Expand Down Expand Up @@ -348,7 +358,7 @@ def __new__(
try:
value = float(str_(value))
return float.__new__(cls, value)
except Exception as e:
except ValueError as e:
# If this isn't a valid decimal (happens in malformed PDFs)
# fallback to 0
logger_warning(
Expand Down
3 changes: 2 additions & 1 deletion pypdf/generic/_rectangle.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, Tuple, Union

from ._base import FloatObject, NumberObject
from ._base import FloatObject, IndirectObject, NumberObject
from ._data_structures import ArrayObject


Expand All @@ -26,6 +26,7 @@ def __init__(
ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr]) # type: ignore

def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
value = IndirectObject.fully_unwrap(value)
if not isinstance(value, (NumberObject, FloatObject)):
value = FloatObject(value)
return value
Expand Down
24 changes: 22 additions & 2 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,26 @@ def indirect_reference(self):
return self


def test_float_object_exception(caplog):
def test_float_object_invalid_format_warning(caplog):
assert FloatObject("abc") == 0
assert caplog.text != ""


def test_number_object_exception(caplog):
def test_number_object_invalid_format_warning(caplog):
assert NumberObject("0,0") == 0
assert caplog.text != ""


def test_float_object_indirect_object_exception(caplog):
with pytest.raises(TypeError):
FloatObject(IndirectObject(0, 0, None))


def test_number_object_indirect_object_exception(caplog):
with pytest.raises(TypeError):
NumberObject(IndirectObject(0, 0, None))


def test_number_object_no_exception():
NumberObject(2**100_000_000)

Expand Down Expand Up @@ -1038,6 +1048,16 @@ def test_name_object_invalid_decode():
NameObject.read_from_stream(stream, ReaderDummy(strict=False))


@pytest.mark.enable_socket()
def test_indirect_object_page_dimensions():
url = "https://github.com/py-pdf/pypdf/files/13302338/Zymeworks_Corporate.Presentation_FINAL1101.pdf.pdf"
name = "issue2287.pdf"
data = BytesIO(get_data_from_url(url, name=name))
reader = PdfReader(data, strict=False)
mediabox = reader.pages[0].mediabox
assert mediabox == RectangleObject((0, 0, 792, 612))


def test_indirect_object_invalid_read():
stream = BytesIO(b"0 1 s")
with pytest.raises(PdfReadError) as exc:
Expand Down
Loading