Skip to content

Commit

Permalink
TST: Add test for FlateDecode (#823)
Browse files Browse the repository at this point in the history
Full credit to
9f628b3
who added the test in 2018 to PyPDF4

Co-authored-by: Acsor <[email protected]>
  • Loading branch information
MartinThoma and acsor authored Apr 25, 2022
1 parent 10ccbae commit 39ffc1d
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 20 deletions.
17 changes: 14 additions & 3 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
Expand Down Expand Up @@ -127,8 +128,15 @@ def compress(data):
class FlateDecode(object):
@staticmethod
def decode(data, decodeParms):
"""
:param data: flate-encoded data.
:param decodeParms: a dictionary of values, understanding the
"/Predictor":<int> key only
:return: the flate-decoded data.
"""
data = decompress(data)
predictor = 1

if decodeParms:
try:
from PyPDF2.generic import ArrayObject
Expand All @@ -139,12 +147,15 @@ def decode(data, decodeParms):
else:
predictor = decodeParms.get("/Predictor", 1)
except AttributeError:
pass # usually an array with a null object was read
pass # Usually an array with a null object was read
# predictor 1 == no predictor
if predictor != 1:
columns = decodeParms[LZW.COLUMNS]
# The /Columns param. has 1 as the default value; see ISO 32000,
# §7.4.4.3 LZWDecode and FlateDecode Parameters, Table 8
columns = decodeParms.get(LZW.COLUMNS, 1)

# PNG prediction:
if predictor >= 10 and predictor <= 15:
if 10 <= predictor <= 15:
data = FlateDecode._decode_png_prediction(data, columns)
else:
# unsupported predictor
Expand Down
21 changes: 10 additions & 11 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
Expand Down Expand Up @@ -417,7 +418,6 @@ class ByteStringObject(utils.bytes_type, PdfObject): # type: ignore
/O) is clearly not text, but is still stored in a "String" object.
"""

##
# For compatibility with TextStringObject.original_bytes. This method
# self.
original_bytes = property(lambda self: self)
Expand All @@ -442,7 +442,6 @@ class TextStringObject(utils.string_type, PdfObject): # type: ignore
autodetect_pdfdocencoding = False
autodetect_utf16 = False

##
# It is occasionally possible that a text string object gets created where
# a byte string object was expected due to the autodetection mechanism --
# if that occurs, this "original_bytes" property can be used to
Expand Down Expand Up @@ -538,15 +537,16 @@ def setdefault(self, key, value=None):
def __getitem__(self, key):
return dict.__getitem__(self, key).getObject()

##
# Retrieves XMP (Extensible Metadata Platform) data relevant to the
# this object, if available.
# <p>
# Stability: Added in v1.12, will exist for all future v1.x releases.
# @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
# that can be used to access XMP metadata from the document. Can also
# return None if no metadata was found on the document root.
def getXmpMetadata(self):
"""
Retrieves XMP (Extensible Metadata Platform) data relevant to the
this object, if available.
Stability: Added in v1.12, will exist for all future v1.x releases.
@return Returns a {@link #xmp.XmpInformation XmlInformation} instance
that can be used to access XMP metadata from the document. Can also
return None if no metadata was found on the document root.
"""
metadata = self.get("/Metadata", None)
if metadata is None:
return None
Expand All @@ -557,7 +557,6 @@ def getXmpMetadata(self):
self[NameObject("/Metadata")] = metadata
return metadata

##
# Read-only property that accesses the {@link
# #DictionaryObject.getXmpData getXmpData} function.
# <p>
Expand Down
2 changes: 1 addition & 1 deletion PyPDF2/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,7 @@ def getPage(self, pageNumber):
:return: a :class:`PageObject<pdf.PageObject>` instance.
:rtype: :class:`PageObject<pdf.PageObject>`
"""
## ensure that we're not trying to access an encrypted PDF
# ensure that we're not trying to access an encrypted PDF
# assert not self.trailer.has_key(TK.ENCRYPT)
if self.flattenedPages is None:
self._flatten()
Expand Down
48 changes: 44 additions & 4 deletions Tests/test_filters.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,48 @@
from PyPDF2.filters import ASCIIHexDecode
import string
from PyPDF2.errors import PdfStreamError
from itertools import product as cartesian_product

import pytest

from PyPDF2.errors import PdfReadError, PdfStreamError
from PyPDF2.filters import ASCIIHexDecode, FlateDecode

filter_inputs = (
# "", '', """""",
string.ascii_lowercase,
string.ascii_uppercase,
string.ascii_letters,
string.digits,
string.hexdigits,
string.punctuation,
string.whitespace, # Add more...
)


@pytest.mark.parametrize("predictor, s", list(cartesian_product([1], filter_inputs)))
def test_FlateDecode(predictor, s):
"""
Tests FlateDecode decode() and encode() methods.
"""
codec = FlateDecode()
s = s.encode()
encoded = codec.encode(s)
assert codec.decode(encoded, {"/Predictor": predictor}) == s


def test_FlateDecode_unsupported_predictor():
"""
Inputs an unsupported predictor (outside the [10, 15] range) checking
that PdfReadError() is raised. Once this predictor support is updated
in the future, this test case may be removed.
"""
codec = FlateDecode()
predictors = (-10, -1, 0, 9, 16, 20, 100)

for predictor, s in cartesian_product(predictors, filter_inputs):
s = s.encode()
with pytest.raises(PdfReadError):
codec.decode(codec.encode(s), {"/Predictor": predictor})


@pytest.mark.parametrize(
"input,expected",
Expand Down Expand Up @@ -40,7 +80,7 @@
],
)
@pytest.mark.no_py27
def test_expected_results(input, expected):
def test_ASCIIHexDecode(input, expected):
"""
Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the
correct output is returned.
Expand All @@ -52,7 +92,7 @@ def test_expected_results(input, expected):
assert ASCIIHexDecode.decode(input) == expected


def test_no_eod():
def test_ASCIIHexDecode_no_eod():
"""Ensuring an exception is raised when no EOD character is present"""
with pytest.raises(PdfStreamError) as exc:
ASCIIHexDecode.decode("")
Expand Down
3 changes: 2 additions & 1 deletion Tests/test_papersizes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from PyPDF2 import papersizes
import pytest

from PyPDF2 import papersizes


def test_din_a0():
dim = papersizes.PaperSize.A0
Expand Down

0 comments on commit 39ffc1d

Please sign in to comment.