Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Use grouped constants instead of literals #745

Merged
merged 10 commits into from
Apr 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/github-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
pip install .
- name: Test with flake8
run: |
flake8 . --ignore=E203,W503,W504,E,F403,F405
flake8 . --ignore=E203,W503,W504,E,F403,F405 --exclude build
if: matrix.python-version != '2.7'
- name: Test with pytest
run: |
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ clean:
rm -rf Tests/__pycache__ PyPDF2/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf PyPDF2.egg-info PyPDF2_pdfLocation.txt

test:
pytest Tests --cov --cov-report term-missing -vv --cov-report html
pytest Tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30

mutation-test:
mutmut run
Expand Down
4 changes: 2 additions & 2 deletions PyPDF2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .pdf import PdfFileReader, PdfFileWriter
from ._version import __version__
from .merger import PdfFileMerger
from .pagerange import PageRange, parse_filename_page_ranges
from ._version import __version__
from .pdf import PdfFileReader, PdfFileWriter

__all__ = [
"__version__",
Expand Down
186 changes: 186 additions & 0 deletions PyPDF2/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""
See Portable Document Format Reference Manual, 1993. ISBN 0-201-62628-4.

See https://ia802202.us.archive.org/8/items/pdfy-0vt8s-egqFwDl7L2/PDF%20Reference%201.0.pdf

PDF Reference, third edition, Version 1.4, 2001. ISBN 0-201-75839-3.

PDF Reference, sixth edition, Version 1.7, 2006.
"""


class PagesAttributes:
"""Page Attributes, Table 6.2, Page 52"""

TYPE = "/Type" # name, required; must be /Pages
KIDS = "/Kids" # array, required; List of indirect references
COUNT = "/Count" # integer, required; the number of all nodes und this node
PARENT = "/Parent" # dictionary, required; indirect reference to pages object


class PageAttributes:
"""Page attributes, Table 6.3, Page 53"""

TYPE = "/Type" # name, required; must be /Page
MEDIABOX = "/MediaBox" # array, required; rectangle specifying page size
PARENT = "/Parent" # dictionary, required; a pages object
RESOURCES = "/Resources" # dictionary, required if there are any
CONTENTS = "/Contents" # stream or array, optional
CROPBOX = "/CropBox" # array, optional; rectangle
ROTATE = "/Rotate" # integer, optional; page rotation in degrees
THUMB = "/Thumb" # stream, optional; indirect reference to image of the page
ANNOTS = "/Annots" # array, optional; an array of annotations


class Ressources:
PROCSET = "/ProcSet" # Chapter 6.8.1
FONT = "/Font" # Chapter 6.8.2
# encoding
# font descriptors : 6.8.4
COLOR_SPACE = "/ColorSpace" # Chapter 6.8.5
XOBJECT = "/XObject" # Chapter 6.8.6


class StreamAttributes:
"""Table 4.2"""

LENGTH = "/Length" # integer, required
FILTER = "/Filter" # name or array of names, optional
DECODE_PARMS = "/DecodeParms" # variable, optional -- 'decodeParams is wrong


class FilterTypes:
"""
Table 4.3 of the 1.4 Manual

Page 354 of the 1.7 Manual
"""

ASCII_HEX_DECODE = "/ASCIIHexDecode" # abbreviation: AHx
ASCII_85_DECODE = "/ASCII85Decode" # abbreviation: A85
LZW_DECODE = "/LZWDecode" # abbreviation: LZW
FLATE_DECODE = "/FlateDecode" # abbreviation: Fl, PDF 1.2
RUN_LENGTH_DECODE = "/RunLengthDecode" # abbreviation: RL
CCITT_FAX_DECODE = "/CCITTFaxDecode" # abbreviation: CCF
DCT_DECODE = "/DCTDecode" # abbreviation: DCT


class FilterTypeAbbreviations:
"""
Table 4.44 of the 1.7 Manual (page 353ff)
"""

AHx = "/AHx"
A85 = "/A85"
LZW = "/LZW"
FL = "/Fl" # FlateDecode
RL = "/RL"
CCF = "/CCF"
DCT = "/DCT"


class LzwFilterParameters:
"""Table 4.4"""

PREDICTOR = "/Predictor" # integer
COLUMNS = "/Columns" # integer
COLORS = "/Colors" # integer
BITS_PER_COMPONENT = "/BitsPerComponent" # integer
EARLY_CHANGE = "/EarlyChange" # integer


class CcittFaxDecodeParameters:
"""Table 4.5"""

K = "/K" # integer
END_OF_LINE = "/EndOfLine" # boolean
ENCODED_BYTE_ALIGN = "/EncodedByteAlign" # boolean
COLUMNS = "/Columns" # integer
ROWS = "/Rows" # integer
END_OF_BLOCK = "/EndOfBlock" # boolean
BLACK_IS_1 = "/BlackIs1" # boolean
DAMAGED_ROWS_BEFORE_ERROR = "/DamagedRowsBeforeError" # integer


class ImageAttributes:
"""Table 6.20."""

TYPE = "/Type" # name, required; must be /XObject
SUBTYPE = "/Subtype" # name, required; must be /Image
NAME = "/Name" # name, required
WIDTH = "/Width" # integer, required
HEIGHT = "/Height" # integer, required
BITS_PER_COMPONENT = "/BitsPerComponent" # integer, required
COLOR_SPACE = "/ColorSpace" # name, required
DECODE = "/Decode" # array, optional
INTERPOLATE = "/Interpolate" # boolean, optional
IMAGE_MASK = "/ImageMask" # boolean, optional


class ColorSpaces:
DEVICE_RGB = "/DeviceRGB"
DEVICE_CMYK = "/DeviceCMYK"
DEVICE_GRAY = "/DeviceGray"


class TypArguments:
"""Table 8.2 of the PDF 1.7 reference"""

LEFT = "/Left"
RIGHT = "/Right"
BOTTOM = "/Bottom"
TOP = "/Top"


class TypFitArguments:
"""Table 8.2 of the PDF 1.7 reference"""

FIT = "/Fit"
FIT_V = "/FitV"
FIT_BV = "/FitBV"
FIT_B = "/FitB"
FIT_H = "/FitH"
FIT_BH = "/FitBH"
FIT_R = "/FitR"


class PageLayouts:
"""Page 84, PDF 1.4 reference"""

SINGLE_PAGE = "/SinglePage"
ONE_COLUMN = "/OneColumn"
TWO_COLUMN_LEFT = "/TwoColumnLeft"
TWO_COLUMN_RIGHT = "/TwoColumnRight"


class GraphicsStateParameters:
"""Table 4.8 of the 1.7 reference"""

TYPE = "/Type" # name, optional
LW = "/LW" # number, optional
# TODO: Many more!
FONT = "/Font" # array, optional
S_MASK = "/SMask" # dictionary or name, optional


class CatalogDictionary:
"""Table 3.25 in the 1.7 reference"""

TYPE = "/Type" # name, required; must be /Catalog
# TODO: Many more!


PDF_KEYS = [
PagesAttributes,
PageAttributes,
Ressources,
ImageAttributes,
StreamAttributes,
FilterTypes,
LzwFilterParameters,
TypArguments,
TypFitArguments,
PageLayouts,
GraphicsStateParameters,
CatalogDictionary,
]
69 changes: 41 additions & 28 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,23 @@
__author_email__ = "[email protected]"

import math
from sys import version_info

from PyPDF2.constants import CcittFaxDecodeParameters as CCITT
from PyPDF2.constants import ColorSpaces
from PyPDF2.constants import FilterTypeAbbreviations as FTA
from PyPDF2.constants import FilterTypes as FT
from PyPDF2.constants import ImageAttributes as IA
from PyPDF2.constants import LzwFilterParameters as LZW
from PyPDF2.constants import StreamAttributes as SA

from .utils import PdfReadError, ord_, paethPredictor
from sys import version_info

if version_info < ( 3, 0 ):
from cStringIO import StringIO
else:
from io import StringIO

import struct

try:
Expand Down Expand Up @@ -110,13 +120,13 @@ def decode(data, decodeParms):
predictor = 1
if decodeParms:
try:
predictor = decodeParms.get("/Predictor", 1)
predictor = decodeParms.get(LZW.PREDICTOR, 1)
except AttributeError:
pass # usually an array with a null object was read

# predictor 1 == no predictor
if predictor != 1:
columns = decodeParms["/Columns"]
columns = decodeParms[LZW.COLUMNS]
# PNG prediction:
if predictor >= 10 and predictor <= 15:
output = StringIO()
Expand Down Expand Up @@ -261,7 +271,7 @@ def decode(self):
return baos

@staticmethod
def decode(data,decodeParams=None):
def decode(data, decodeParms=None):
return LZWDecode.decoder(data).decode()


Expand Down Expand Up @@ -363,7 +373,7 @@ def decode(data, decodeParms=None, height=0):
else:
CCITTgroup = 3

width = decodeParms["/Columns"]
width = decodeParms[CCITT.COLUMNS]
imgSize = len(data)
tiff_header_struct = '<2shlh' + 'hhll' * 8 + 'h'
tiffHeader = struct.pack(tiff_header_struct,
Expand All @@ -388,7 +398,7 @@ def decode(data, decodeParms=None, height=0):

def decodeStreamData(stream):
from .generic import NameObject
filters = stream.get("/Filter", ())
filters = stream.get(SA.FILTER, ())

if len(filters) and not isinstance(filters[0], NameObject):
# we have a single filter instance
Expand All @@ -397,24 +407,24 @@ def decodeStreamData(stream):
# If there is not data to decode we should not try to decode the data.
if data:
for filterType in filters:
if filterType == "/FlateDecode" or filterType == "/Fl":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
if filterType == FT.FLATE_DECODE or filterType == FTA.FL:
data = FlateDecode.decode(data, stream.get(SA.DECODE_PARMS))
elif filterType == FT.ASCII_HEX_DECODE or filterType == FTA.AHx:
data = ASCIIHexDecode.decode(data)
elif filterType == "/LZWDecode" or filterType == "/LZW":
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCII85Decode" or filterType == "/A85":
elif filterType == FT.LZW_DECODE or filterType == FTA.LZW:
data = LZWDecode.decode(data, stream.get(SA.DECODE_PARMS))
elif filterType == FT.ASCII_85_DECODE or filterType == FTA.A85:
data = ASCII85Decode.decode(data)
elif filterType == "/DCTDecode":
elif filterType == FT.DCT_DECODE:
data = DCTDecode.decode(data)
elif filterType == "/JPXDecode":
data = JPXDecode.decode(data)
elif filterType == "/CCITTFaxDecode":
height = stream.get("/Height", ())
data = CCITTFaxDecode.decode(data, stream.get("/DecodeParms"), height)
elif filterType == FT.CCITT_FAX_DECODE:
height = stream.get(IA.HEIGHT, ())
data = CCITTFaxDecode.decode(data, stream.get(SA.DECODE_PARMS), height)
elif filterType == "/Crypt":
decodeParams = stream.get("/DecodeParams", {})
if "/Name" not in decodeParams and "/Type" not in decodeParams:
decodeParms = stream.get(SA.DECODE_PARMS, {})
if "/Name" not in decodeParms and "/Type" not in decodeParms:
pass
else:
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
Expand All @@ -434,34 +444,37 @@ def _xobj_to_image(x_object_obj):
:return: Tuple[file extension, bytes]
"""
import io

from PIL import Image

size = (x_object_obj["/Width"], x_object_obj["/Height"])
from PyPDF2.constants import GraphicsStateParameters as G

size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT])
data = x_object_obj.getData()
if x_object_obj["/ColorSpace"] == "/DeviceRGB":
if x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB:
mode = "RGB"
else:
mode = "P"
extension = None
if "/Filter" in x_object_obj:
if x_object_obj["/Filter"] == "/FlateDecode":
if SA.FILTER in x_object_obj:
if x_object_obj[SA.FILTER] == FT.FLATE_DECODE:
extension = ".png"
img = Image.frombytes(mode, size, data)
if "/SMask" in x_object_obj: # add alpha channel
alpha = Image.frombytes("L", size, x_object_obj["/SMask"].getData())
if G.S_MASK in x_object_obj: # add alpha channel
alpha = Image.frombytes("L", size, x_object_obj[G.S_MASK].getData())
img.putalpha(alpha)
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format="PNG")
data = img_byte_arr.getvalue()
elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']):
elif x_object_obj[SA.FILTER] in ([FT.LZW_DECODE], [FT.ASCII_85_DECODE], [FT.CCITT_FAX_DECODE]):
from PyPDF2.utils import b_
extension = ".png"
data = b_(data)
elif x_object_obj["/Filter"] == "/DCTDecode":
elif x_object_obj[SA.FILTER] == FT.DCT_DECODE:
extension = ".jpg"
elif x_object_obj["/Filter"] == "/JPXDecode":
elif x_object_obj[SA.FILTER] == "/JPXDecode":
extension = ".jp2"
elif x_object_obj["/Filter"] == "/CCITTFaxDecode":
elif x_object_obj[SA.FILTER] == FT.CCITT_FAX_DECODE:
extension = ".tiff"
else:
extension = ".png"
Expand Down
Loading