Skip to content

Commit

Permalink
STY: Apply fixes suggested by pylint (#999)
Browse files Browse the repository at this point in the history
* Use f-strings: They are IMHO more readable and faster than other formatting options
* Reduce code duplication
* Improvements in type annotations
* Add PyPDF2 module docstring for help text
  • Loading branch information
MartinThoma authored Jun 16, 2022
1 parent 6ce36f7 commit faebc9d
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 210 deletions.
5 changes: 0 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ repos:
hooks:
- id: black
args: [--target-version, py36]
# - repo: https://github.com/asottile/pyupgrade
# rev: v2.31.1
# hooks:
# - id: pyupgrade
# args: [--py36-plus]
- repo: https://github.com/asottile/blacken-docs
rev: v1.12.1
hooks:
Expand Down
9 changes: 9 additions & 0 deletions PyPDF2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
PyPDF2 is a free and open-source pure-python PDF library capable of splitting,
merging, cropping, and transforming the pages of PDF files. It can also add
custom data, viewing options, and passwords to PDF files. PyPDF2 can retrieve
text and metadata from PDFs as well.
You can read the full docs at https://pypdf2.readthedocs.io/.
"""

from ._merger import PdfFileMerger, PdfMerger
from ._page import PageObject, Transformation
from ._reader import DocumentInformation, PdfFileReader, PdfReader
Expand Down
4 changes: 2 additions & 2 deletions PyPDF2/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ def parse_to_unicode(
) # join is here as some cases where the code was split
int_entry.append(int(lst[0], 16))
lst = lst[2:]
for a in map_dict:
if map_dict[a] == " ":
for a, value in map_dict.items():
if value == " ":
space_code = a
return map_dict, space_code, int_entry

Expand Down
49 changes: 14 additions & 35 deletions PyPDF2/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,16 @@
NumberObject,
TextStringObject,
TreeObject,
createStringObject,
_create_bookmark,
)
from .pagerange import PageRange, PageRangeSpec
from .types import (
BookmarkTypes,
FitType,
LayoutType,
OutlinesType,
PagemodeType,
ZoomArgsType,
ZoomArgType,
)

ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore"
Expand Down Expand Up @@ -564,9 +564,7 @@ def _associate_dests_to_pages(self, pages: List[_MergedPage]) -> None:
if pageno is not None:
nd[NameObject("/Page")] = NumberObject(pageno)
else:
raise ValueError(
"Unresolved named destination '{}'".format(nd["/Title"])
)
raise ValueError(f"Unresolved named destination '{nd['/Title']}'")

def _associate_bookmarks_to_pages(
self, pages: List[_MergedPage], bookmarks: Optional[Iterable[Bookmark]] = None
Expand All @@ -592,7 +590,7 @@ def _associate_bookmarks_to_pages(
if pageno is not None:
b[NameObject("/Page")] = NumberObject(pageno)
else:
raise ValueError("Unresolved bookmark '{}'".format(b["/Title"]))
raise ValueError(f"Unresolved bookmark '{b['/Title']}'")

def find_bookmark(
self,
Expand Down Expand Up @@ -623,8 +621,8 @@ def addBookmark(
color: Optional[Tuple[float, float, float]] = None,
bold: bool = False,
italic: bool = False,
fit: str = "/Fit",
*args: ZoomArgType,
fit: FitType = "/Fit",
*args: ZoomArgsType,
) -> IndirectObject: # pragma: no cover
"""
.. deprecated:: 1.28.0
Expand All @@ -643,8 +641,8 @@ def add_bookmark(
color: Optional[Tuple[float, float, float]] = None,
bold: bool = False,
italic: bool = False,
fit: str = "/Fit",
*args: ZoomArgType,
fit: FitType = "/Fit",
*args: ZoomArgsType,
) -> IndirectObject:
"""
Add a bookmark to this PDF file.
Expand All @@ -658,7 +656,7 @@ def add_bookmark(
:param bool bold: Bookmark is bold
:param bool italic: Bookmark is italic
:param str fit: The fit of the destination page. See
:meth:`addLink()<addLin>` for details.
:meth:`addLink()<addLink>` for details.
"""
if self.output is None:
raise RuntimeError(ERR_CLOSED_WRITER)
Expand Down Expand Up @@ -689,32 +687,13 @@ def add_bookmark(
if parent is None:
parent = outline_ref

bookmark = TreeObject()
bookmark = _create_bookmark(action_ref, title, color, italic, bold)

bookmark.update(
{
NameObject("/A"): action_ref,
NameObject("/Title"): createStringObject(title),
}
)

if color is not None:
bookmark.update(
{NameObject("/C"): ArrayObject([FloatObject(c) for c in color])}
)

format_flag = 0
if italic:
format_flag += 1
if bold:
format_flag += 2
if format_flag:
bookmark.update({NameObject("/F"): NumberObject(format_flag)})

bookmark_ref = self.output._add_object(bookmark)
parent = cast(Bookmark, parent.get_object())
assert parent is not None, "hint for mypy"
parent.add_child(bookmark_ref, self.output)
bookmark_ref = self.output._add_object(bookmark)
parent_obj = cast(Bookmark, parent.get_object())
assert parent_obj is not None, "hint for mypy"
parent_obj.add_child(bookmark_ref, self.output)

return bookmark_ref

Expand Down
2 changes: 1 addition & 1 deletion PyPDF2/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def _content_stream_rename(
if isinstance(op, NameObject):
operands[i] = rename.get(op, op)
else:
raise KeyError("type of operands is %s" % type(operands))
raise KeyError(f"type of operands is {type(operands)}")
return stream

@staticmethod
Expand Down
76 changes: 33 additions & 43 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ def _build_outline(self, node: DictionaryObject) -> Optional[Destination]:
outline = self._namedDests[dest]
outline[NameObject("/Title")] = title # type: ignore
else:
raise PdfReadError("Unexpected destination %r" % dest)
raise PdfReadError(f"Unexpected destination {dest!r}")
return outline

@property
Expand Down Expand Up @@ -993,13 +993,14 @@ def _get_object_from_stream(
# Stream object cannot be read. Normally, a critical error, but
# Adobe Reader doesn't complain, so continue (in strict mode?)
warnings.warn(
"Invalid stream (index %d) within object %d %d: %s"
% (i, indirect_reference.idnum, indirect_reference.generation, exc),
f"Invalid stream (index {i}) within object "
f"{indirect_reference.idnum} {indirect_reference.generation}: "
f"{exc}",
PdfReadWarning,
)

if self.strict:
raise PdfReadError("Can't read object stream: %s" % exc)
raise PdfReadError(f"Can't read object stream: {exc}")
# Replace with null. Hopefully it's nothing important.
obj = NullObject()
return obj
Expand Down Expand Up @@ -1030,26 +1031,18 @@ def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
# Xref table probably had bad indexes due to not being zero-indexed
if self.strict:
raise PdfReadError(
"Expected object ID (%d %d) does not match actual (%d %d); xref table not zero-indexed."
% (
indirect_reference.idnum,
indirect_reference.generation,
idnum,
generation,
)
f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
f"does not match actual ({idnum} {generation}); "
"xref table not zero-indexed."
)
else:
pass # xref table is corrected in non-strict mode
elif idnum != indirect_reference.idnum and self.strict:
# some other problem
raise PdfReadError(
"Expected object ID (%d %d) does not match actual (%d %d)."
% (
indirect_reference.idnum,
indirect_reference.generation,
idnum,
generation,
)
f"Expected object ID ({indirect_reference.idnum} "
f"{indirect_reference.generation}) does not match actual "
f"({idnum} {generation})."
)
if self.strict:
assert generation == indirect_reference.generation
Expand All @@ -1070,8 +1063,8 @@ def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
retval = self._decrypt_object(retval, key) # type: ignore
else:
warnings.warn(
"Object %d %d not defined."
% (indirect_reference.idnum, indirect_reference.generation),
f"Object {indirect_reference.idnum} {indirect_reference.generation} "
"not defined.",
PdfReadWarning,
)
if self.strict:
Expand Down Expand Up @@ -1144,8 +1137,7 @@ def read_object_header(self, stream: StreamType) -> Tuple[int, int]:
stream.seek(-1, 1)
if extra and self.strict:
warnings.warn(
"Superfluous whitespace found in object header %s %s"
% (idnum, generation), # type: ignore
f"Superfluous whitespace found in object header {idnum} {generation}", # type: ignore
PdfReadWarning,
)
return int(idnum), int(generation)
Expand Down Expand Up @@ -1212,9 +1204,8 @@ def read(self, stream: StreamType) -> None:
header_byte = stream.read(5)
if header_byte != b"%PDF-":
raise PdfReadError(
"PDF starts with '{}', but '%PDF-' expected".format(
header_byte.decode("utf8")
)
f"PDF starts with '{header_byte.decode('utf8')}', "
"but '%PDF-' expected"
)
stream.seek(0, os.SEEK_END)
last_mb = stream.tell() - 1024 * 1024 + 1 # offset of last MB of stream
Expand All @@ -1237,8 +1228,8 @@ def read(self, stream: StreamType) -> None:
)

# read all cross reference tables and their trailers
self.xref: Dict[Any, Any] = {}
self.xref_objStm: Dict[Any, Any] = {}
self.xref: Dict[int, Dict[Any, Any]] = {}
self.xref_objStm: Dict[int, Tuple[Any, Any]] = {}
self.trailer = DictionaryObject()
while True:
# load the xref table
Expand Down Expand Up @@ -1311,13 +1302,13 @@ def read(self, stream: StreamType) -> None:
# if not zero-indexed, verify that the table is correct; change it if necessary
if self.xref_index and not self.strict:
loc = stream.tell()
for gen in self.xref:
for gen, xref_entry in self.xref.items():
if gen == 65535:
continue
for id in self.xref[gen]:
stream.seek(self.xref[gen][id], 0)
for id in xref_entry:
stream.seek(xref_entry[id], 0)
try:
pid, pgen = self.read_object_header(stream)
pid, _pgen = self.read_object_header(stream)
except ValueError:
break
if pid == id - self.xref_index:
Expand Down Expand Up @@ -1431,7 +1422,7 @@ def _read_pdf15_xref_stream(
entry_sizes = cast(Dict[Any, Any], xrefstream.get("/W"))
assert len(entry_sizes) >= 3
if self.strict and len(entry_sizes) > 3:
raise PdfReadError("Too many entry sizes: %s" % entry_sizes)
raise PdfReadError(f"Too many entry sizes: {entry_sizes}")

def get_entry(i: int) -> Union[int, Tuple[int, ...]]:
# Reads the correct number of bytes for each entry. See the
Expand All @@ -1449,7 +1440,7 @@ def get_entry(i: int) -> Union[int, Tuple[int, ...]]:

def used_before(num: int, generation: Union[int, Tuple[int, ...]]) -> bool:
# We move backwards through the xrefs, don't replace any.
return num in self.xref.get(generation, []) or num in self.xref_objStm
return num in self.xref.get(generation, []) or num in self.xref_objStm # type: ignore

# Iterate through each subsection
self._read_xref_subsections(idx_pairs, get_entry, used_before)
Expand Down Expand Up @@ -1528,9 +1519,9 @@ def _read_xref_subsections(
byte_offset = get_entry(1)
generation = get_entry(2)
if generation not in self.xref:
self.xref[generation] = {}
self.xref[generation] = {} # type: ignore
if not used_before(num, generation):
self.xref[generation][num] = byte_offset
self.xref[generation][num] = byte_offset # type: ignore
elif xref_type == 2:
# compressed objects
objstr_num = get_entry(1)
Expand All @@ -1539,7 +1530,7 @@ def _read_xref_subsections(
if not used_before(num, generation):
self.xref_objStm[num] = (objstr_num, obstr_idx)
elif self.strict:
raise PdfReadError("Unknown xref type: %s" % xref_type)
raise PdfReadError(f"Unknown xref type: {xref_type}")

def _zero_xref(self, generation: int) -> None:
self.xref[generation] = {
Expand Down Expand Up @@ -1568,11 +1559,11 @@ def read_next_end_line(
if stream.tell() < 2:
raise PdfReadError("EOL marker not found")
stream.seek(-2, 1)
if x == b"\n" or x == b"\r": # \n = LF; \r = CR
if x in (b"\n", b"\r"): # \n = LF; \r = CR
crlf = False
while x == b"\n" or x == b"\r":
while x in (b"\n", b"\r"):
x = stream.read(1)
if x == b"\n" or x == b"\r": # account for CR+LF
if x in (b"\n", b"\r"): # account for CR+LF
stream.seek(-1, 1)
crlf = True
if stream.tell() < 2:
Expand Down Expand Up @@ -1657,8 +1648,7 @@ def _decrypt(self, password: Union[str, bytes]) -> int:
encrypt_v = cast(int, encrypt["/V"])
if encrypt_v not in (1, 2):
raise NotImplementedError(
"only algorithm code 1 and 2 are supported. This PDF uses code %s"
% encrypt_v
f"only algorithm code 1 and 2 are supported. This PDF uses code {encrypt_v}"
)
user_password, key = self._authenticate_user_password(password)
if user_password:
Expand All @@ -1678,8 +1668,8 @@ def _decrypt(self, password: Union[str, bytes]) -> int:
val = real_O
for i in range(19, -1, -1):
new_key = b""
for l in range(len(key)):
new_key += b_(chr(ord_(key[l]) ^ i))
for key_char in key:
new_key += b_(chr(ord_(key_char) ^ i))
val = RC4_encrypt(new_key, val)
userpass = val
owner_password, key = self._authenticate_user_password(userpass)
Expand Down
8 changes: 4 additions & 4 deletions PyPDF2/_security.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes:
if rev >= 3:
for i in range(1, 20):
new_key = ""
for l in range(len(key)):
new_key += chr(ord_(key[l]) ^ i)
for key_char in key:
new_key += chr(ord_(key_char) ^ i)
val = RC4_encrypt(new_key, val)
# 8. Store the output from the final invocation of the RC4 as the value of
# the /O entry in the encryption dictionary.
Expand Down Expand Up @@ -233,10 +233,10 @@ def RC4_encrypt(key: Union[str, bytes], plaintext: bytes) -> bytes:
S[i], S[j] = S[j], S[i]
i, j = 0, 0
retval = []
for x in range(len(plaintext)):
for plaintext_char in plaintext:
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
t = S[(S[i] + S[j]) % 256]
retval.append(b_(chr(ord_(plaintext[x]) ^ t)))
retval.append(b_(chr(ord_(plaintext_char) ^ t)))
return b"".join(retval)
Loading

0 comments on commit faebc9d

Please sign in to comment.