Skip to content

Commit

Permalink
IndirectObject.fully_unwrap infinite recursion guard
Browse files Browse the repository at this point in the history
Ensures we only go to a maximum depth, so a malicious PDF cannot
get us indefinitely stuck. Also introduces unit tests.
  • Loading branch information
SamStephens committed Feb 19, 2024
1 parent ea85c75 commit 323e162
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 2 deletions.
12 changes: 10 additions & 2 deletions pypdf/generic/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@

__author__ = "Mathieu Fenniak"
__author_email__ = "[email protected]"
MAX_INDIRECT_OBJECT_NESTING_DEPTH = 10


class PdfObject(PdfObjectProtocol):
Expand Down Expand Up @@ -292,8 +293,15 @@ def fully_unwrap(obj: Optional["PdfObject"]) -> Optional["PdfObject"]:
Given a PdfObject that may be an IndirectObject, recursively unwrap that IndirectObject until a None or
PdfObject that is not an IndirectObject is returned.
"""
if isinstance(obj, IndirectObject):
return IndirectObject.fully_unwrap(obj.get_object())
depth = 0
while isinstance(obj, IndirectObject):
if depth > MAX_INDIRECT_OBJECT_NESTING_DEPTH:
raise PdfReadError(
"IndirectObject nested too deep. "
"If required, consider increasing MAX_INDIRECT_OBJECT_NESTING_DEPTH."
)
depth += 1
obj = obj.get_object()
return obj

def __repr__(self) -> str:
Expand Down
67 changes: 67 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Test the pypdf.generic module."""
from io import BytesIO
from pathlib import Path
from typing import Optional
from unittest.mock import patch

import pytest
Expand Down Expand Up @@ -1058,6 +1059,72 @@ def test_indirect_object_page_dimensions():
assert mediabox == RectangleObject((0, 0, 792, 612))


def test_indirect_object_fully_unwrap():
unwrapping = {}
expected_result = NumberObject(123)

class FakeGetObjectReturn:
"""Fake class to allow the IndirectObject to resolve its underlying object"""

def __init__(self, result):
self.result = result

def get_object(self) -> Optional[PdfObject]:
return self.result

class FakePdf:
"""Fake class to allow the IndirectObject to resolve its underlying object"""

def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
return FakeGetObjectReturn(unwrapping[indirect_reference.idnum])

fake_pdf = FakePdf()
# Now we set up two layers of indirection: indirect_object0 contains the object indirect_object1 contains
# the object expected_result
indirect_object0 = IndirectObject(0, 0, fake_pdf)
indirect_object1 = IndirectObject(1, 0, fake_pdf)
unwrapping[0] = indirect_object1
unwrapping[1] = expected_result

# Confirm our setup is correct
assert indirect_object0.get_object() == indirect_object1
assert indirect_object1.get_object() == expected_result

# And test
assert IndirectObject.fully_unwrap(indirect_object0) == expected_result


def test_indirect_object_fully_unwrap_depth_limit():

class FakeGetObjectReturn:
"""Fake class to allow the IndirectObject to resolve its underlying object"""

def __init__(self, result):
self.result = result

def get_object(self) -> Optional[PdfObject]:
return self.result

class FakePdf:
"""
Fake class to allow the IndirectObject to resolve its underlying object. This version returns the IndirectObject
that is passed in, triggering our guard against indefinite recursion.
"""

def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
return FakeGetObjectReturn(indirect_reference)

fake_pdf = FakePdf()

indirect_object = IndirectObject(0, 0, fake_pdf)

# And test
with pytest.raises(PdfReadError) as exc:
IndirectObject.fully_unwrap(indirect_object)
assert exc.value.args[0] == \
"IndirectObject nested too deep. If required, consider increasing MAX_INDIRECT_OBJECT_NESTING_DEPTH."


def test_indirect_object_invalid_read():
stream = BytesIO(b"0 1 s")
with pytest.raises(PdfReadError) as exc:
Expand Down

0 comments on commit 323e162

Please sign in to comment.