From 39ffc1d6265e1b710d87cd6fc1a5f6b270978090 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 25 Apr 2022 22:38:54 +0200 Subject: [PATCH] TST: Add test for FlateDecode (#823) Full credit to https://github.com/py-pdf/PyPDF2/pull/817/commits/9f628b3989b2f9714db9eb850bed323329a61922 who added the test in 2018 to PyPDF4 Co-authored-by: Acsor --- PyPDF2/filters.py | 17 +++++++++++--- PyPDF2/generic.py | 21 +++++++++--------- PyPDF2/pdf.py | 2 +- Tests/test_filters.py | 48 ++++++++++++++++++++++++++++++++++++---- Tests/test_papersizes.py | 3 ++- 5 files changed, 71 insertions(+), 20 deletions(-) diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 44f8b92d9..2a08dfb59 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright (c) 2006, Mathieu Fenniak # All rights reserved. # @@ -127,8 +128,15 @@ def compress(data): class FlateDecode(object): @staticmethod def decode(data, decodeParms): + """ + :param data: flate-encoded data. + :param decodeParms: a dictionary of values, understanding the + "/Predictor": key only + :return: the flate-decoded data. + """ data = decompress(data) predictor = 1 + if decodeParms: try: from PyPDF2.generic import ArrayObject @@ -139,12 +147,15 @@ def decode(data, decodeParms): else: predictor = decodeParms.get("/Predictor", 1) except AttributeError: - pass # usually an array with a null object was read + pass # Usually an array with a null object was read # predictor 1 == no predictor if predictor != 1: - columns = decodeParms[LZW.COLUMNS] + # The /Columns param. has 1 as the default value; see ISO 32000, + # ยง7.4.4.3 LZWDecode and FlateDecode Parameters, Table 8 + columns = decodeParms.get(LZW.COLUMNS, 1) + # PNG prediction: - if predictor >= 10 and predictor <= 15: + if 10 <= predictor <= 15: data = FlateDecode._decode_png_prediction(data, columns) else: # unsupported predictor diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 903ef7f97..bed824696 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright (c) 2006, Mathieu Fenniak # All rights reserved. # @@ -417,7 +418,6 @@ class ByteStringObject(utils.bytes_type, PdfObject): # type: ignore /O) is clearly not text, but is still stored in a "String" object. """ - ## # For compatibility with TextStringObject.original_bytes. This method # self. original_bytes = property(lambda self: self) @@ -442,7 +442,6 @@ class TextStringObject(utils.string_type, PdfObject): # type: ignore autodetect_pdfdocencoding = False autodetect_utf16 = False - ## # It is occasionally possible that a text string object gets created where # a byte string object was expected due to the autodetection mechanism -- # if that occurs, this "original_bytes" property can be used to @@ -538,15 +537,16 @@ def setdefault(self, key, value=None): def __getitem__(self, key): return dict.__getitem__(self, key).getObject() - ## - # Retrieves XMP (Extensible Metadata Platform) data relevant to the - # this object, if available. - #

- # Stability: Added in v1.12, will exist for all future v1.x releases. - # @return Returns a {@link #xmp.XmpInformation XmlInformation} instance - # that can be used to access XMP metadata from the document. Can also - # return None if no metadata was found on the document root. def getXmpMetadata(self): + """ + Retrieves XMP (Extensible Metadata Platform) data relevant to the + this object, if available. + + Stability: Added in v1.12, will exist for all future v1.x releases. + @return Returns a {@link #xmp.XmpInformation XmlInformation} instance + that can be used to access XMP metadata from the document. Can also + return None if no metadata was found on the document root. + """ metadata = self.get("/Metadata", None) if metadata is None: return None @@ -557,7 +557,6 @@ def getXmpMetadata(self): self[NameObject("/Metadata")] = metadata return metadata - ## # Read-only property that accesses the {@link # #DictionaryObject.getXmpData getXmpData} function. #

diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index bc9ae02f2..cfe2b6d16 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -1305,7 +1305,7 @@ def getPage(self, pageNumber): :return: a :class:`PageObject` instance. :rtype: :class:`PageObject` """ - ## ensure that we're not trying to access an encrypted PDF + # ensure that we're not trying to access an encrypted PDF # assert not self.trailer.has_key(TK.ENCRYPT) if self.flattenedPages is None: self._flatten() diff --git a/Tests/test_filters.py b/Tests/test_filters.py index ada1db4a2..4c1ff8e78 100644 --- a/Tests/test_filters.py +++ b/Tests/test_filters.py @@ -1,8 +1,48 @@ -from PyPDF2.filters import ASCIIHexDecode import string -from PyPDF2.errors import PdfStreamError +from itertools import product as cartesian_product + import pytest +from PyPDF2.errors import PdfReadError, PdfStreamError +from PyPDF2.filters import ASCIIHexDecode, FlateDecode + +filter_inputs = ( + # "", '', """""", + string.ascii_lowercase, + string.ascii_uppercase, + string.ascii_letters, + string.digits, + string.hexdigits, + string.punctuation, + string.whitespace, # Add more... +) + + +@pytest.mark.parametrize("predictor, s", list(cartesian_product([1], filter_inputs))) +def test_FlateDecode(predictor, s): + """ + Tests FlateDecode decode() and encode() methods. + """ + codec = FlateDecode() + s = s.encode() + encoded = codec.encode(s) + assert codec.decode(encoded, {"/Predictor": predictor}) == s + + +def test_FlateDecode_unsupported_predictor(): + """ + Inputs an unsupported predictor (outside the [10, 15] range) checking + that PdfReadError() is raised. Once this predictor support is updated + in the future, this test case may be removed. + """ + codec = FlateDecode() + predictors = (-10, -1, 0, 9, 16, 20, 100) + + for predictor, s in cartesian_product(predictors, filter_inputs): + s = s.encode() + with pytest.raises(PdfReadError): + codec.decode(codec.encode(s), {"/Predictor": predictor}) + @pytest.mark.parametrize( "input,expected", @@ -40,7 +80,7 @@ ], ) @pytest.mark.no_py27 -def test_expected_results(input, expected): +def test_ASCIIHexDecode(input, expected): """ Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the correct output is returned. @@ -52,7 +92,7 @@ def test_expected_results(input, expected): assert ASCIIHexDecode.decode(input) == expected -def test_no_eod(): +def test_ASCIIHexDecode_no_eod(): """Ensuring an exception is raised when no EOD character is present""" with pytest.raises(PdfStreamError) as exc: ASCIIHexDecode.decode("") diff --git a/Tests/test_papersizes.py b/Tests/test_papersizes.py index 605678e76..aeb4e1acd 100644 --- a/Tests/test_papersizes.py +++ b/Tests/test_papersizes.py @@ -1,6 +1,7 @@ -from PyPDF2 import papersizes import pytest +from PyPDF2 import papersizes + def test_din_a0(): dim = papersizes.PaperSize.A0