TST: Add test for FlateDecode (#823)

Full credit to 9f628b3 who added the test in 2018 to PyPDF4 Co-authored-by: Acsor <[email protected]>
py-pdf · Apr 25, 2022 · 39ffc1d · 39ffc1d
1 parent 10ccbae
commit 39ffc1d
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 20 deletions.
diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
@@ -127,8 +128,15 @@ def compress(data):
 class FlateDecode(object):
     @staticmethod
     def decode(data, decodeParms):
+        """
+        :param data: flate-encoded data.
+        :param decodeParms: a dictionary of values, understanding the
+            "/Predictor":<int> key only
+        :return: the flate-decoded data.
+        """
         data = decompress(data)
         predictor = 1
+
         if decodeParms:
             try:
                 from PyPDF2.generic import ArrayObject
@@ -139,12 +147,15 @@ def decode(data, decodeParms):
                 else:
                     predictor = decodeParms.get("/Predictor", 1)
             except AttributeError:
-                pass  # usually an array with a null object was read
+                pass  # Usually an array with a null object was read
         # predictor 1 == no predictor
         if predictor != 1:
-            columns = decodeParms[LZW.COLUMNS]
+            # The /Columns param. has 1 as the default value; see ISO 32000,
+            # §7.4.4.3 LZWDecode and FlateDecode Parameters, Table 8
+            columns = decodeParms.get(LZW.COLUMNS, 1)
+
             # PNG prediction:
-            if predictor >= 10 and predictor <= 15:
+            if 10 <= predictor <= 15:
                 data = FlateDecode._decode_png_prediction(data, columns)
             else:
                 # unsupported predictor

diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
@@ -417,7 +418,6 @@ class ByteStringObject(utils.bytes_type, PdfObject):  # type: ignore
     /O) is clearly not text, but is still stored in a "String" object.
     """
 
-    ##
     # For compatibility with TextStringObject.original_bytes.  This method
     #  self.
     original_bytes = property(lambda self: self)
@@ -442,7 +442,6 @@ class TextStringObject(utils.string_type, PdfObject):  # type: ignore
     autodetect_pdfdocencoding = False
     autodetect_utf16 = False
 
-    ##
     # It is occasionally possible that a text string object gets created where
     # a byte string object was expected due to the autodetection mechanism --
     # if that occurs, this "original_bytes" property can be used to
@@ -538,15 +537,16 @@ def setdefault(self, key, value=None):
     def __getitem__(self, key):
         return dict.__getitem__(self, key).getObject()
 
-    ##
-    # Retrieves XMP (Extensible Metadata Platform) data relevant to the
-    # this object, if available.
-    # <p>
-    # Stability: Added in v1.12, will exist for all future v1.x releases.
-    # @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
-    # that can be used to access XMP metadata from the document.  Can also
-    # return None if no metadata was found on the document root.
     def getXmpMetadata(self):
+        """
+        Retrieves XMP (Extensible Metadata Platform) data relevant to the
+        this object, if available.
+
+        Stability: Added in v1.12, will exist for all future v1.x releases.
+        @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
+        that can be used to access XMP metadata from the document.  Can also
+        return None if no metadata was found on the document root.
+        """
         metadata = self.get("/Metadata", None)
         if metadata is None:
             return None
@@ -557,7 +557,6 @@ def getXmpMetadata(self):
             self[NameObject("/Metadata")] = metadata
         return metadata
 
-    ##
     # Read-only property that accesses the {@link
     # #DictionaryObject.getXmpData getXmpData} function.
     # <p>

diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
@@ -1305,7 +1305,7 @@ def getPage(self, pageNumber):
         :return: a :class:`PageObject<pdf.PageObject>` instance.
         :rtype: :class:`PageObject<pdf.PageObject>`
         """
-        ## ensure that we're not trying to access an encrypted PDF
+        # ensure that we're not trying to access an encrypted PDF
         # assert not self.trailer.has_key(TK.ENCRYPT)
         if self.flattenedPages is None:
             self._flatten()

diff --git a/Tests/test_filters.py b/Tests/test_filters.py
@@ -1,8 +1,48 @@
-from PyPDF2.filters import ASCIIHexDecode
 import string
-from PyPDF2.errors import PdfStreamError
+from itertools import product as cartesian_product
+
 import pytest
 
+from PyPDF2.errors import PdfReadError, PdfStreamError
+from PyPDF2.filters import ASCIIHexDecode, FlateDecode
+
+filter_inputs = (
+    # "", '', """""",
+    string.ascii_lowercase,
+    string.ascii_uppercase,
+    string.ascii_letters,
+    string.digits,
+    string.hexdigits,
+    string.punctuation,
+    string.whitespace,  # Add more...
+)
+
+
+@pytest.mark.parametrize("predictor, s", list(cartesian_product([1], filter_inputs)))
+def test_FlateDecode(predictor, s):
+    """
+    Tests FlateDecode decode() and encode() methods.
+    """
+    codec = FlateDecode()
+    s = s.encode()
+    encoded = codec.encode(s)
+    assert codec.decode(encoded, {"/Predictor": predictor}) == s
+
+
+def test_FlateDecode_unsupported_predictor():
+    """
+    Inputs an unsupported predictor (outside the [10, 15] range) checking
+    that PdfReadError() is raised. Once this predictor support is updated
+    in the future, this test case may be removed.
+    """
+    codec = FlateDecode()
+    predictors = (-10, -1, 0, 9, 16, 20, 100)
+
+    for predictor, s in cartesian_product(predictors, filter_inputs):
+        s = s.encode()
+        with pytest.raises(PdfReadError):
+            codec.decode(codec.encode(s), {"/Predictor": predictor})
+
 
 @pytest.mark.parametrize(
     "input,expected",
@@ -40,7 +80,7 @@
     ],
 )
 @pytest.mark.no_py27
-def test_expected_results(input, expected):
+def test_ASCIIHexDecode(input, expected):
     """
     Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the
     correct output is returned.
@@ -52,7 +92,7 @@ def test_expected_results(input, expected):
     assert ASCIIHexDecode.decode(input) == expected
 
 
-def test_no_eod():
+def test_ASCIIHexDecode_no_eod():
     """Ensuring an exception is raised when no EOD character is present"""
     with pytest.raises(PdfStreamError) as exc:
         ASCIIHexDecode.decode("")

diff --git a/Tests/test_papersizes.py b/Tests/test_papersizes.py
@@ -1,6 +1,7 @@
-from PyPDF2 import papersizes
 import pytest
 
+from PyPDF2 import papersizes
+
 
 def test_din_a0():
     dim = papersizes.PaperSize.A0