py-pdf · MartinThoma · Jul 9, 2022 · Jul 6, 2022 · Jul 7, 2022 · Jul 7, 2022
diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py
@@ -37,7 +37,7 @@
 import math
 import struct
 import zlib
-from io import BytesIO, StringIO
+from io import BytesIO
 from typing import Any, Dict, Optional, Tuple, Union
 
 from .generic import ArrayObject, DictionaryObject, NameObject
@@ -108,36 +108,51 @@ def decode(
         if predictor != 1:
             # The /Columns param. has 1 as the default value; see ISO 32000,
             # §7.4.4.3 LZWDecode and FlateDecode Parameters, Table 8
+            DEFAULT_BITS_PER_COMPONENT = 8
             if isinstance(decode_parms, ArrayObject):
                 columns = 1
+                bits_per_component = DEFAULT_BITS_PER_COMPONENT
                 for decode_parm in decode_parms:
                     if "/Columns" in decode_parm:
                         columns = decode_parm["/Columns"]
+                    if LZW.BITS_PER_COMPONENT in decode_parm:
+                        bits_per_component = decode_parm[LZW.BITS_PER_COMPONENT]
             else:
                 columns = (
                     1 if decode_parms is None else decode_parms.get(LZW.COLUMNS, 1)
                 )
+                bits_per_component = (
+                    decode_parms.get(LZW.BITS_PER_COMPONENT, DEFAULT_BITS_PER_COMPONENT)
+                    if decode_parms
+                    else DEFAULT_BITS_PER_COMPONENT
+                )
+
+            # PNG predictor can vary by row and so is the lead byte on each row
+            rowlength = (
+                math.ceil(columns * bits_per_component / 8) + 1
+            )  # number of bytes
 
             # PNG prediction:
             if 10 <= predictor <= 15:
-                str_data = FlateDecode._decode_png_prediction(str_data, columns)  # type: ignore
+                str_data = FlateDecode._decode_png_prediction(str_data, columns, rowlength)  # type: ignore
             else:
                 # unsupported predictor
                 raise PdfReadError(f"Unsupported flatedecode predictor {predictor!r}")
         return str_data
 
     @staticmethod
-    def _decode_png_prediction(data: str, columns: int) -> str:
-        output = StringIO()
+    def _decode_png_prediction(data: str, columns: int, rowlength: int) -> bytes:
+        output = BytesIO()
         # PNG prediction can vary from row to row
-        rowlength = columns + 1
-        assert len(data) % rowlength == 0
+        if len(data) % rowlength != 0:
+            raise PdfReadError("Image data is not rectangular")
         prev_rowdata = (0,) * rowlength
         for row in range(len(data) // rowlength):
             rowdata = [
                 ord_(x) for x in data[(row * rowlength) : ((row + 1) * rowlength)]
             ]
             filter_byte = rowdata[0]
+
             if filter_byte == 0:
                 pass
             elif filter_byte == 1:
@@ -162,7 +177,7 @@ def _decode_png_prediction(data: str, columns: int) -> str:
                 # unsupported PNG filter
                 raise PdfReadError(f"Unsupported PNG filter {filter_byte!r}")
             prev_rowdata = tuple(rowdata)
-            output.write("".join([chr(x) for x in rowdata[1:]]))
+            output.write(bytearray(rowdata[1:]))
         return output.getvalue()
 
     @staticmethod
@@ -544,15 +559,32 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:
 
     size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT])
     data = x_object_obj.get_data()  # type: ignore
-    if x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB:
+    if (
+        IA.COLOR_SPACE in x_object_obj
+        and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB
+    ):
         mode: Literal["RGB", "P"] = "RGB"
     else:
         mode = "P"
     extension = None
     if SA.FILTER in x_object_obj:
         if x_object_obj[SA.FILTER] == FT.FLATE_DECODE:
             extension = ".png"
+            color_space = None
+            if "/ColorSpace" in x_object_obj:
+                color_space = x_object_obj["/ColorSpace"].get_object()
+                if (
+                    isinstance(color_space, ArrayObject)
+                    and color_space[0] == "/Indexed"
+                ):
+                    color_space, base, hival, lookup = (
+                        value.get_object() for value in color_space
+                    )
+
             img = Image.frombytes(mode, size, data)
+            if color_space == "/Indexed":
+                img.putpalette(lookup.get_data())
+                img = img.convert("RGB")
             if G.S_MASK in x_object_obj:  # add alpha channel
                 alpha = Image.frombytes("L", size, x_object_obj[G.S_MASK].get_data())
                 img.putalpha(alpha)