diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b473c459c..eaa043f596 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,11 +33,6 @@ repos: hooks: - id: black args: [--target-version, py36] -# - repo: https://github.com/asottile/pyupgrade -# rev: v2.31.1 -# hooks: -# - id: pyupgrade -# args: [--py36-plus] - repo: https://github.com/asottile/blacken-docs rev: v1.12.1 hooks: diff --git a/PyPDF2/__init__.py b/PyPDF2/__init__.py index 09076ba8df..f7fcd92b90 100644 --- a/PyPDF2/__init__.py +++ b/PyPDF2/__init__.py @@ -1,3 +1,12 @@ +""" +PyPDF2 is a free and open-source pure-python PDF library capable of splitting, +merging, cropping, and transforming the pages of PDF files. It can also add +custom data, viewing options, and passwords to PDF files. PyPDF2 can retrieve +text and metadata from PDFs as well. + +You can read the full docs at https://pypdf2.readthedocs.io/. +""" + from ._merger import PdfFileMerger, PdfMerger from ._page import PageObject, Transformation from ._reader import DocumentInformation, PdfFileReader, PdfReader diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index 8f64e6c2a3..e319cfae99 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -1317,7 +1317,7 @@ def read(self, stream: StreamType) -> None: for id in self.xref[gen]: stream.seek(self.xref[gen][id], 0) try: - pid, pgen = self.read_object_header(stream) + pid, _pgen = self.read_object_header(stream) except ValueError: break if pid == id - self.xref_index: @@ -1678,8 +1678,8 @@ def _decrypt(self, password: Union[str, bytes]) -> int: val = real_O for i in range(19, -1, -1): new_key = b"" - for l in range(len(key)): - new_key += b_(chr(ord_(key[l]) ^ i)) + for key_char in key: + new_key += b_(chr(ord_(key_char) ^ i)) val = RC4_encrypt(new_key, val) userpass = val owner_password, key = self._authenticate_user_password(userpass) diff --git a/PyPDF2/_security.py b/PyPDF2/_security.py index 041c0e6241..e79c6d891a 100644 --- a/PyPDF2/_security.py +++ b/PyPDF2/_security.py @@ -119,8 +119,8 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes: if rev >= 3: for i in range(1, 20): new_key = "" - for l in range(len(key)): - new_key += chr(ord_(key[l]) ^ i) + for key_char in key: + new_key += chr(ord_(key_char) ^ i) val = RC4_encrypt(new_key, val) # 8. Store the output from the final invocation of the RC4 as the value of # the /O entry in the encryption dictionary. @@ -233,10 +233,10 @@ def RC4_encrypt(key: Union[str, bytes], plaintext: bytes) -> bytes: S[i], S[j] = S[j], S[i] i, j = 0, 0 retval = [] - for x in range(len(plaintext)): + for plaintext_char in plaintext: i = (i + 1) % 256 j = (j + S[i]) % 256 S[i], S[j] = S[j], S[i] t = S[(S[i] + S[j]) % 256] - retval.append(b_(chr(ord_(plaintext[x]) ^ t))) + retval.append(b_(chr(ord_(plaintext_char) ^ t))) return b"".join(retval) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 9cc5bc0125..b2b4251dfb 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -224,7 +224,7 @@ def get_page(self, pageNumber: int) -> PageObject: # TODO: PEP8 :rtype: :class:`PageObject` """ pages = cast(Dict[str, Any], self.get_object(self._pages)) - # XXX: crude hack + # TODO: crude hack return pages[PA.KIDS][pageNumber].get_object() def getPage(self, pageNumber: int) -> PageObject: # pragma: no cover @@ -347,7 +347,7 @@ def add_js(self, javascript: str) -> None: { NameObject(PA.TYPE): NameObject("/Action"), NameObject("/S"): NameObject("/JavaScript"), - NameObject("/JS"): NameObject("(%s)" % javascript), + NameObject("/JS"): NameObject(f"({javascript})"), } ) js_indirect_object = self._add_object(js) @@ -402,30 +402,30 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None: # * The file's name, which goes in the Catalog # The entry for the file - """ Sample: - 8 0 obj - << - /Length 12 - /Type /EmbeddedFile - >> - stream - Hello world! - endstream - endobj - """ + # Sample: + # 8 0 obj + # << + # /Length 12 + # /Type /EmbeddedFile + # >> + # stream + # Hello world! + # endstream + # endobj + file_entry = DecodedStreamObject() file_entry.set_data(data) file_entry.update({NameObject(PA.TYPE): NameObject("/EmbeddedFile")}) # The Filespec entry - """ Sample: - 7 0 obj - << - /Type /Filespec - /F (hello.txt) - /EF << /F 8 0 R >> - >> - """ + # Sample: + # 7 0 obj + # << + # /Type /Filespec + # /F (hello.txt) + # /EF << /F 8 0 R >> + # >> + ef_entry = DictionaryObject() ef_entry.update({NameObject("/F"): file_entry}) @@ -441,17 +441,16 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None: ) # Then create the entry for the root, as it needs a reference to the Filespec - """ Sample: - 1 0 obj - << - /Type /Catalog - /Outlines 2 0 R - /Pages 3 0 R - /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >> - >> - endobj + # Sample: + # 1 0 obj + # << + # /Type /Catalog + # /Outlines 2 0 R + # /Pages 3 0 R + # /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >> + # >> + # endobj - """ embedded_files_names_dictionary = DictionaryObject() embedded_files_names_dictionary.update( { @@ -693,10 +692,8 @@ def write(self, stream: StreamType) -> None: """ if hasattr(stream, "mode") and "b" not in stream.mode: warnings.warn( - ( - "File <{}> to write to is not in binary mode. " # type: ignore - "It may not be written to correctly." - ).format(stream.name) + f"File <{stream.name}> to write to is not in binary mode. " + "It may not be written to correctly." ) if not self._root: @@ -712,8 +709,7 @@ def write(self, stream: StreamType) -> None: # we sweep for indirect references. This forces self-page-referencing # trees to reference the correct new object location, rather than # copying in a new copy of the page object. - for obj_index in range(len(self._objects)): - obj = self._objects[obj_index] + for obj_index, obj in enumerate(self._objects): if isinstance(obj, PageObject) and obj.indirect_ref is not None: data = obj.indirect_ref if data.pdf not in external_reference_map: @@ -731,13 +727,13 @@ def write(self, stream: StreamType) -> None: object_positions = self._write_header(stream) xref_location = self._write_xref_table(stream, object_positions) self._write_trailer(stream) - stream.write(b_("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))) # eof + stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] stream.write(self._header + b"\n") stream.write(b"%\xE2\xE3\xCF\xD3\n") - for i in range(len(self._objects)): + for i, obj in enumerate(self._objects): obj = self._objects[i] # If the obj is None we can't write anything if obj is not None: @@ -760,9 +756,9 @@ def _write_xref_table(self, stream: StreamType, object_positions: List[int]) -> xref_location = stream.tell() stream.write(b"xref\n") stream.write(b_("0 %s\n" % (len(self._objects) + 1))) - stream.write(b_("%010d %05d f \n" % (0, 65535))) + stream.write(b_(f"{0:0>10} {65535:0>5} f \n")) for offset in object_positions: - stream.write(b_("%010d %05d n \n" % (offset, 0))) + stream.write(b_(f"{offset:0>10} {0:0>5} n \n")) return xref_location def _write_trailer(self, stream: StreamType) -> None: @@ -873,9 +869,8 @@ def _sweep_indirect_references( except (ValueError, RecursionError): # Unable to resolve the Object, returning NullObject instead. warnings.warn( - "Unable to resolve [{}: {}], returning NullObject instead".format( - data.__class__.__name__, data - ) + f"Unable to resolve [{data.__class__.__name__}: {data}], " + "returning NullObject instead" ) return NullObject() return newobj @@ -1236,8 +1231,7 @@ def remove_images(self, ignore_byte_string_object: bool = False) -> None: b"Do", b"sh", ) - for j in range(len(pages)): - page = pages[j] + for page in pages: page_ref = cast(DictionaryObject, self.get_object(page)) content = page_ref["/Contents"].get_object() if not isinstance(content, ContentStream): @@ -1298,8 +1292,7 @@ def remove_text(self, ignore_byte_string_object: bool = False) -> None: """ pg_dict = cast(DictionaryObject, self.get_object(self._pages)) pages = cast(List[IndirectObject], pg_dict[PA.KIDS]) - for j in range(len(pages)): - page = pages[j] + for page in pages: page_ref = cast(Dict[str, Any], self.get_object(page)) content = page_ref["/Contents"].get_object() if not isinstance(content, ContentStream): @@ -1591,7 +1584,7 @@ def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None: if not isinstance(layout, NameObject): if layout not in self._valid_layouts: warnings.warn( - "Layout should be one of: {}".format(", ".join(self._valid_layouts)) + f"Layout should be one of: {'', ''.join(self._valid_layouts)}" ) layout = NameObject(layout) self._root_object.update({NameObject("/PageLayout"): layout}) @@ -1690,9 +1683,7 @@ def set_page_mode(self, mode: PagemodeType) -> None: mode_name: NameObject = mode else: if mode not in self._valid_modes: - warnings.warn( - "Mode should be one of: {}".format(", ".join(self._valid_modes)) - ) + warnings.warn(f"Mode should be one of: {', '.join(self._valid_modes)}") mode_name = NameObject(mode) self._root_object.update({NameObject("/PageMode"): mode_name}) diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index ca21f4aec9..76320d74ca 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -232,8 +232,7 @@ def next_code(self) -> int: return -1 nextbits = ord_(self.data[self.bytepos]) bitsfromhere = 8 - self.bitpos - if bitsfromhere > fillbits: - bitsfromhere = fillbits + bitsfromhere = min(bitsfromhere, fillbits) value |= ( (nextbits >> (8 - self.bitpos - bitsfromhere)) & (0xFF >> (8 - bitsfromhere)) diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 7b0c692c72..da5d93a05c 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -1312,7 +1312,7 @@ def ensureIsNumber( return self._ensure_is_number(value) def __repr__(self) -> str: - return "RectangleObject(%s)" % repr(list(self)) + return f"RectangleObject({repr(list(self))})" @property def left(self) -> FloatObject: @@ -1887,8 +1887,7 @@ def createStringObject( elif isinstance(forced_encoding, str): if forced_encoding == "bytes": return ByteStringObject(string) - else: - return TextStringObject(string.decode(forced_encoding)) + return TextStringObject(string.decode(forced_encoding)) else: try: if string.startswith(codecs.BOM_UTF16_BE):