Skip to content

Commit

Permalink
ROB: Relax flate decoding for too many lookup values (#2331)
Browse files Browse the repository at this point in the history
When handling flate objects with a lookup table and the image mode `1`, we would previously raise a generic `AssertionError` if the number of lookup values did not match.

Cases where too many values are specified are now considered a warning only.
Additionally, this PR adds a more meaningful error message.
  • Loading branch information
stefan6419846 authored Dec 10, 2023
1 parent 5e59160 commit 6dad92a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
10 changes: 8 additions & 2 deletions pypdf/_xobj_image_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from io import BytesIO
from typing import Any, List, Tuple, Union, cast

from ._utils import logger_warning
from ._utils import WHITESPACES, logger_warning
from .constants import ColorSpaces
from .errors import PdfReadError
from .generic import (
Expand Down Expand Up @@ -195,7 +195,13 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
else:
if img.mode == "1":
# Two values ("high" and "low").
assert len(lookup) == 2 * nb, len(lookup)
expected_count = 2 * nb
if len(lookup) != expected_count:
if len(lookup) < expected_count:
raise PdfReadError(f"Not enough lookup values: Expected {expected_count}, got {len(lookup)}.")
lookup = lookup[:expected_count]
if not all(_value in WHITESPACES for _value in lookup[expected_count:]):
raise PdfReadError(f"Too many lookup values: Expected {expected_count}, got {len(lookup)}.")
colors_arr = [lookup[:nb], lookup[nb:]]
arr = b"".join(
[
Expand Down
9 changes: 9 additions & 0 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,3 +589,12 @@ def test_flate_decode_with_image_mode_1():
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
for image in reader.pages[7].images:
_ = image


@pytest.mark.enable_socket()
def test_flate_decode_with_image_mode_1__whitespace_at_end_of_lookup():
"""From #2331"""
url = "https://github.com/py-pdf/pypdf/files/13611048/out1.pdf"
name = "issue2331.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
reader.pages[0].images[0]

0 comments on commit 6dad92a

Please sign in to comment.