Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Byte math errors for decoding bitmap PNGs #1067

Merged
merged 9 commits into from
Jul 9, 2022
48 changes: 40 additions & 8 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import math
import struct
import zlib
from io import BytesIO, StringIO
from io import BytesIO
from typing import Any, Dict, Optional, Tuple, Union

from .generic import ArrayObject, DictionaryObject, NameObject
Expand Down Expand Up @@ -108,36 +108,51 @@ def decode(
if predictor != 1:
# The /Columns param. has 1 as the default value; see ISO 32000,
# §7.4.4.3 LZWDecode and FlateDecode Parameters, Table 8
DEFAULT_BITS_PER_COMPONENT = 8
if isinstance(decode_parms, ArrayObject):
columns = 1
bits_per_component = DEFAULT_BITS_PER_COMPONENT
for decode_parm in decode_parms:
if "/Columns" in decode_parm:
columns = decode_parm["/Columns"]
if LZW.BITS_PER_COMPONENT in decode_parm:
bits_per_component = decode_parm[LZW.BITS_PER_COMPONENT]
else:
columns = (
1 if decode_parms is None else decode_parms.get(LZW.COLUMNS, 1)
)
bits_per_component = (
decode_parms.get(LZW.BITS_PER_COMPONENT, DEFAULT_BITS_PER_COMPONENT)
if decode_parms
else DEFAULT_BITS_PER_COMPONENT
)

# PNG predictor can vary by row and so is the lead byte on each row
rowlength = (
math.ceil(columns * bits_per_component / 8) + 1
) # number of bytes

# PNG prediction:
if 10 <= predictor <= 15:
str_data = FlateDecode._decode_png_prediction(str_data, columns) # type: ignore
str_data = FlateDecode._decode_png_prediction(str_data, columns, rowlength) # type: ignore
else:
# unsupported predictor
raise PdfReadError(f"Unsupported flatedecode predictor {predictor!r}")
return str_data

@staticmethod
def _decode_png_prediction(data: str, columns: int) -> str:
output = StringIO()
def _decode_png_prediction(data: str, columns: int, rowlength: int) -> bytes:
output = BytesIO()
# PNG prediction can vary from row to row
rowlength = columns + 1
assert len(data) % rowlength == 0
if len(data) % rowlength != 0:
raise PdfReadError("Image data is not rectangular")
prev_rowdata = (0,) * rowlength
for row in range(len(data) // rowlength):
rowdata = [
ord_(x) for x in data[(row * rowlength) : ((row + 1) * rowlength)]
]
filter_byte = rowdata[0]

if filter_byte == 0:
pass
elif filter_byte == 1:
Expand All @@ -162,7 +177,7 @@ def _decode_png_prediction(data: str, columns: int) -> str:
# unsupported PNG filter
raise PdfReadError(f"Unsupported PNG filter {filter_byte!r}")
prev_rowdata = tuple(rowdata)
output.write("".join([chr(x) for x in rowdata[1:]]))
output.write(bytearray(rowdata[1:]))
return output.getvalue()

@staticmethod
Expand Down Expand Up @@ -544,15 +559,32 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:

size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT])
data = x_object_obj.get_data() # type: ignore
if x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB:
if (
IA.COLOR_SPACE in x_object_obj
and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB
):
mode: Literal["RGB", "P"] = "RGB"
else:
mode = "P"
extension = None
if SA.FILTER in x_object_obj:
if x_object_obj[SA.FILTER] == FT.FLATE_DECODE:
extension = ".png"
color_space = None
if "/ColorSpace" in x_object_obj:
color_space = x_object_obj["/ColorSpace"].get_object()
if (
isinstance(color_space, ArrayObject)
and color_space[0] == "/Indexed"
):
color_space, base, hival, lookup = (
value.get_object() for value in color_space
)

img = Image.frombytes(mode, size, data)
if color_space == "/Indexed":
img.putpalette(lookup.get_data())
img = img.convert("RGB")
if G.S_MASK in x_object_obj: # add alpha channel
alpha = Image.frombytes("L", size, x_object_obj[G.S_MASK].get_data())
img.putalpha(alpha)
Expand Down