Skip to content

Commit

Permalink
STY: Fix style issues (py-pdf#743)
Browse files Browse the repository at this point in the history
* Explicitly export PdfFileReader, PdfFileWriter
* Implicit string concatenation
* Don't leave open file handles
* Apply hints from flake8-simplify
* Only import stuff that is used
  • Loading branch information
MartinThoma authored Apr 13, 2022
1 parent e45e66b commit 38d5ec4
Show file tree
Hide file tree
Showing 20 changed files with 86 additions and 90 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
- name: Test with flake8
run: |
flake8 . --ignore=E203,W503,E501,F405,E226,E128,E225,F403,E201,E202,E231,W504,E241,F401,E261,E302,E211,E701,E228,E111,F841,E117,E127,E251,E266,E
flake8 . --ignore=E203,W503,W504,E,F403,F405
if: matrix.python-version != '2.7'

- name: Test with pytest
Expand Down
10 changes: 9 additions & 1 deletion PyPDF2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,12 @@
from .pagerange import PageRange, parse_filename_page_ranges
from ._version import __version__

__all__ = ["pdf", "PdfFileMerger"]
__all__ = [
"__version__",
"PageRange",
"parse_filename_page_ranges",
"pdf",
"PdfFileMerger",
"PdfFileReader",
"PdfFileWriter",
]
6 changes: 3 additions & 3 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def compress(data):
# Unable to import zlib. Attempt to use the System.IO.Compression
# library from the .NET framework. (IronPython only)
import System
from System import IO, Collections, Array
from System import IO, Array

def _string_to_bytearr(buf):
retval = Array.CreateInstance(System.Byte, len(buf))
Expand Down Expand Up @@ -275,7 +275,7 @@ def decode(data, decodeParms=None):
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
data = [y for y in data if y not in ' \n\r\t']
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
Expand Down Expand Up @@ -363,7 +363,7 @@ def decode(data, decodeParms=None, height=0):

width = decodeParms["/Columns"]
imgSize = len(data)
tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
tiff_header_struct = '<2shlh' + 'hhll' * 8 + 'h'
tiffHeader = struct.pack(tiff_header_struct,
b'II', # Byte order indication: Little endian
42, # Version number (always 42)
Expand Down
8 changes: 4 additions & 4 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def readStringFromStream(stream):
# break occurs. If it's a multi-char EOL, consume the
# second character:
tok = stream.read(1)
if not tok in b_("\n\r"):
if tok not in b_("\n\r"):
stream.seek(-1, 1)
# Then don't add anything to the actual string, since this
# line break was escaped:
Expand Down Expand Up @@ -483,10 +483,10 @@ def readFromStream(stream, pdf):
try:
try:
ret=name.decode('utf-8')
except (UnicodeEncodeError, UnicodeDecodeError) as e:
except (UnicodeEncodeError, UnicodeDecodeError):
ret=name.decode('gbk')
return NameObject(ret)
except (UnicodeEncodeError, UnicodeDecodeError) as e:
except (UnicodeEncodeError, UnicodeDecodeError):
# Name objects should represent irregular characters
# with a '#' followed by the symbol's hex number
if not pdf.strict:
Expand Down Expand Up @@ -843,7 +843,7 @@ def getData(self):

decoded._data = filters.decodeStreamData(self)
for key, value in list(self.items()):
if not key in ("/Length", "/Filter", "/DecodeParms"):
if key not in ("/Length", "/Filter", "/DecodeParms"):
decoded[key] = value
self.decodedSelf = decoded
return decoded._data
Expand Down
5 changes: 2 additions & 3 deletions PyPDF2/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,6 @@ def _trim_dests(self, pdf, dests, pages):
page set.
"""
new_dests = []
prev_header_added = True
for k, o in list(dests.items()):
for j in range(*pages):
if pdf.getPage(j).getObject() == o['/Page'].getObject():
Expand Down Expand Up @@ -356,7 +355,7 @@ def _write_dests(self):
if p.id == v['/Page']:
v[NameObject('/Page')] = p.out_pagedata
pageno = i
pdf = p.src
pdf = p.src # noqa: F841
break
if pageno is not None:
self.output.addNamedDestinationObject(v)
Expand Down Expand Up @@ -429,7 +428,7 @@ def _write_bookmarks(self, bookmarks=None, parent=None):
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})

pageno = i
pdf = p.src
pdf = p.src # noqa: F841
break
if pageno is not None:
del b['/Page'], b['/Type']
Expand Down
5 changes: 2 additions & 3 deletions PyPDF2/pagerange.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ def __init__(self, arg):
@staticmethod
def valid(input):
""" True if input is a valid initializer for a PageRange. """
return isinstance(input, slice) or \
isinstance(input, PageRange) or \
return isinstance(input, (slice, PageRange)) or \
(isString(input)
and bool(re.match(PAGE_RANGE_RE, input)))

Expand Down Expand Up @@ -144,7 +143,7 @@ def parse_filename_page_ranges(args):
for arg in args + [None]:
if PageRange.valid(arg):
if not pdf_filename:
raise ValueError("The first argument must be a filename, " \
raise ValueError("The first argument must be a filename, "
"not a page range.")

pairs.append( (pdf_filename, PageRange(arg)) )
Expand Down
71 changes: 28 additions & 43 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
__maintainer__ = "Phaseit, Inc."
__maintainer_email = "[email protected]"

import string
import math
import struct
import sys
Expand All @@ -57,7 +56,6 @@
else:
from io import BytesIO

from . import filters
from . import utils
import warnings
import codecs
Expand Down Expand Up @@ -543,7 +541,6 @@ def _sweepIndirectReferences(self, externMap, data):
if debug: print((data, "TYPE", data.__class__.__name__))
if isinstance(data, DictionaryObject):
for key, value in list(data.items()):
origvalue = value
value = self._sweepIndirectReferences(externMap, value)
if isinstance(value, StreamObject):
# a dictionary value is a stream. streams must be indirect
Expand Down Expand Up @@ -794,6 +791,11 @@ def removeImages(self, ignoreByteStringObject=False):
to ignore ByteString Objects.
"""
pages = self.getObject(self._pages)['/Kids']
jump_operators = [
b_('cm'), b_('w'), b_('J'), b_('j'), b_('M'), b_('d'), b_('ri'), b_('i'),
b_('gs'), b_('W'), b_('b'), b_('s'), b_('S'), b_('f'), b_('F'), b_('n'), b_('m'), b_('l'),
b_('c'), b_('v'), b_('y'), b_('h'), b_('B'), b_('Do'), b_('sh')
]
for j in range(len(pages)):
page = pages[j]
pageRef = self.getObject(page)
Expand All @@ -804,36 +806,29 @@ def removeImages(self, ignoreByteStringObject=False):
_operations = []
seq_graphics = False
for operands, operator in content.operations:
if operator == b_('Tj'):
text = operands[0]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[0] = TextStringObject()
elif operator == b_("'"):
if operator in [b_('Tj'), b_("'")]:
text = operands[0]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[0] = TextStringObject()
elif operator == b_('"'):
text = operands[2]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[2] = TextStringObject()
if ignoreByteStringObject and not isinstance(text, TextStringObject):
operands[2] = TextStringObject()
elif operator == b_("TJ"):
for i in range(len(operands[0])):
if ignoreByteStringObject:
if not isinstance(operands[0][i], TextStringObject):
operands[0][i] = TextStringObject()
if (
ignoreByteStringObject
and not isinstance(operands[0][i], TextStringObject)
):
operands[0][i] = TextStringObject()

if operator == b_('q'):
seq_graphics = True
if operator == b_('Q'):
seq_graphics = False
if seq_graphics:
if operator in [b_('cm'), b_('w'), b_('J'), b_('j'), b_('M'), b_('d'), b_('ri'), b_('i'),
b_('gs'), b_('W'), b_('b'), b_('s'), b_('S'), b_('f'), b_('F'), b_('n'), b_('m'), b_('l'),
b_('c'), b_('v'), b_('y'), b_('h'), b_('B'), b_('Do'), b_('sh')]:
continue
if seq_graphics and operator in jump_operators:
continue
if operator == b_('re'):
continue
_operations.append((operands, operator))
Expand All @@ -856,41 +851,29 @@ def removeText(self, ignoreByteStringObject=False):
if not isinstance(content, ContentStream):
content = ContentStream(content, pageRef)
for operands,operator in content.operations:
if operator == b_('Tj'):
if operator in [b_('Tj'), b_("'")]:
text = operands[0]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[0] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
operands[0] = TextStringObject()
elif operator == b_("'"):
text = operands[0]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[0] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
if isinstance(text, (TextStringObject, ByteStringObject)):
operands[0] = TextStringObject()
elif operator == b_('"'):
text = operands[2]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[2] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
if isinstance(text, (TextStringObject, ByteStringObject)):
operands[2] = TextStringObject()
elif operator == b_("TJ"):
for i in range(len(operands[0])):
if not ignoreByteStringObject:
if isinstance(operands[0][i], TextStringObject):
operands[0][i] = TextStringObject()
else:
if isinstance(operands[0][i], TextStringObject) or \
isinstance(operands[0][i], ByteStringObject):
if isinstance(operands[0][i], (TextStringObject, ByteStringObject)):
operands[0][i] = TextStringObject()

pageRef.__setitem__(NameObject('/Contents'), content)
Expand Down Expand Up @@ -1172,9 +1155,8 @@ def _showwarning(message, category, filename, lineno, file=warndest, line=None):
if hasattr(stream, 'mode') and 'b' not in stream.mode:
warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
if isString(stream):
fileobj = open(stream, 'rb')
stream = BytesIO(b_(fileobj.read()))
fileobj.close()
with open(stream, 'rb') as fileobj:
stream = BytesIO(b_(fileobj.read()))
self.read(stream)
self.stream = stream

Expand Down Expand Up @@ -1729,7 +1711,7 @@ def getObject(self, indirectReference):
return retval

def _decryptObject(self, obj, key):
if isinstance(obj, ByteStringObject) or isinstance(obj, TextStringObject):
if isinstance(obj, (ByteStringObject, TextStringObject)):
obj = createStringObject(utils.RC4_encrypt(key, obj.original_bytes))
elif isinstance(obj, StreamObject):
obj._data = utils.RC4_encrypt(key, obj._data)
Expand All @@ -1752,7 +1734,10 @@ def readObjectHeader(self, stream):
idnum = readUntilWhitespace(stream)
extra |= utils.skipOverWhitespace(stream); stream.seek(-1, 1)
generation = readUntilWhitespace(stream)
obj = stream.read(3)

# although it's not used, it might still be necessary to read
_obj = stream.read(3) # noqa: F841

readNonWhitespace(stream)
stream.seek(-1, 1)
if (extra and self.strict):
Expand Down Expand Up @@ -1938,8 +1923,8 @@ def used_before(num, generation):
# The rest of the elements depend on the xref_type
if xref_type == 0:
# linked list of free objects
next_free_object = getEntry(1)
next_generation = getEntry(2)
next_free_object = getEntry(1) # noqa: F841
next_generation = getEntry(2) # noqa: F841
elif xref_type == 1:
# objects that are in use but are not compressed
byte_offset = getEntry(1)
Expand Down
11 changes: 5 additions & 6 deletions PyPDF2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,10 @@ def markLocation(stream):
# Mainly for debugging
RADIUS = 5000
stream.seek(-RADIUS, 1)
outputDoc = open('PyPDF2_pdfLocation.txt', 'wb')
outputDoc.write(stream.read(RADIUS))
outputDoc.write(b'HERE')
outputDoc.write(stream.read(RADIUS))
outputDoc.close()
with open('PyPDF2_pdfLocation.txt', 'wb') as outputDoc:
outputDoc.write(stream.read(RADIUS))
outputDoc.write(b'HERE')
outputDoc.write(stream.read(RADIUS))
stream.seek(-RADIUS, 1)


Expand Down Expand Up @@ -242,7 +241,7 @@ def b_(s):
if len(s) < 2:
bc[s] = r
return r
except Exception as e:
except Exception:
print(s)
r = s.encode('utf-8')
if len(s) < 2:
Expand Down
1 change: 0 additions & 1 deletion PyPDF2/xmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import datetime
import decimal
from .generic import PdfObject
from xml.dom import getDOMImplementation
from xml.dom.minidom import parseString
from .utils import u_

Expand Down
2 changes: 1 addition & 1 deletion Scripts/2-up.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def main():
print("usage: python 2-up.py input_file output_file")
sys.exit(1)
print("2-up input " + sys.argv[1])
reader = PdfFileReader(open(sys.argv[1], "rb"))
reader = PdfFileReader(sys.argv[1])
writer = PdfFileWriter()
for iter in range(0, reader.getNumPages() - 1, 2):
lhs = reader.getPage(iter)
Expand Down
13 changes: 7 additions & 6 deletions Scripts/booklet.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/usr/bin/env python

"""
Layout the pages from a PDF file to print a booklet or brochure.
Layout the pages from a PDF file to print a booklet or brochure.
The resulting media size is twice the size of the first page
of the source document. If you print the resulting PDF in duplex
(short edge), you get a center fold brochure that you can staple
together and read as a booklet.
The resulting media size is twice the size of the first page
of the source document. If you print the resulting PDF in duplex
(short edge), you get a center fold brochure that you can staple
together and read as a booklet.
"""

from __future__ import division, print_function
Expand Down Expand Up @@ -63,7 +63,8 @@ def mergePageByNumber(dstPage, pageNumber, xOffset):
mergePageByNumber(page, i, offsets[0])
mergePageByNumber(page, virtualPages - i - 1, offsets[1])

writer.write(open(args.output, "wb"))
with open(args.output, "wb") as fp:
writer.write(fp)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion Scripts/pdf-image-extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def main(pdf: str):
reader = PyPDF2.PdfFileReader(open(pdf, "rb"))
reader = PyPDF2.PdfFileReader(pdf)
page = reader.pages[30]

if "/XObject" in page["/Resources"]:
Expand Down
4 changes: 2 additions & 2 deletions Tests/test_basic_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
def test_basic_features():
output = PdfFileWriter()
document1 = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
input1 = PdfFileReader(open(document1, "rb"))
input1 = PdfFileReader(document1)

# print how many pages input1 has:
print("document1.pdf has %d pages." % input1.getNumPages())
Expand All @@ -32,7 +32,7 @@ def test_basic_features():
# add page 4 from input1, but first add a watermark from another PDF:
page4 = input1.getPage(0)
watermark_pdf = document1
watermark = PdfFileReader(open(watermark_pdf, "rb"))
watermark = PdfFileReader(watermark_pdf)
page4.mergePage(watermark.getPage(0))
output.addPage(page4)

Expand Down
Loading

0 comments on commit 38d5ec4

Please sign in to comment.